• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use crate::api::icd::*;
2 use crate::api::types::*;
3 use crate::api::util::*;
4 use crate::core::context::*;
5 use crate::core::device::*;
6 use crate::core::format::*;
7 use crate::core::gl::*;
8 use crate::core::queue::*;
9 use crate::core::util::*;
10 use crate::impl_cl_type_trait;
11 use crate::impl_cl_type_trait_base;
12 
13 use mesa_rust::pipe::context::*;
14 use mesa_rust::pipe::resource::*;
15 use mesa_rust::pipe::screen::ResourceType;
16 use mesa_rust::pipe::transfer::*;
17 use mesa_rust_gen::*;
18 use mesa_rust_util::math::*;
19 use mesa_rust_util::properties::Properties;
20 use rusticl_opencl_gen::*;
21 
22 use std::cmp;
23 use std::collections::hash_map::Entry;
24 use std::collections::HashMap;
25 use std::convert::TryInto;
26 use std::mem;
27 use std::mem::size_of;
28 use std::ops::Deref;
29 use std::os::raw::c_void;
30 use std::ptr;
31 use std::sync::Arc;
32 use std::sync::Mutex;
33 
34 struct MappingTransfer {
35     tx: PipeTransfer,
36     shadow: Option<PipeResource>,
37     pending: u32,
38 }
39 
40 impl MappingTransfer {
new(tx: PipeTransfer, shadow: Option<PipeResource>) -> Self41     fn new(tx: PipeTransfer, shadow: Option<PipeResource>) -> Self {
42         MappingTransfer {
43             tx: tx,
44             shadow: shadow,
45             pending: 1,
46         }
47     }
48 }
49 
50 struct Mappings {
51     tx: HashMap<&'static Device, MappingTransfer>,
52     maps: HashMap<usize, u32>,
53 }
54 
55 impl Mappings {
new() -> Mutex<Self>56     fn new() -> Mutex<Self> {
57         Mutex::new(Mappings {
58             tx: HashMap::new(),
59             maps: HashMap::new(),
60         })
61     }
62 
contains_ptr(&self, ptr: *mut c_void) -> bool63     fn contains_ptr(&self, ptr: *mut c_void) -> bool {
64         let ptr = ptr as usize;
65         self.maps.contains_key(&ptr)
66     }
67 
mark_pending(&mut self, dev: &Device)68     fn mark_pending(&mut self, dev: &Device) {
69         self.tx.get_mut(dev).unwrap().pending += 1;
70     }
71 
unmark_pending(&mut self, dev: &Device)72     fn unmark_pending(&mut self, dev: &Device) {
73         if let Some(tx) = self.tx.get_mut(dev) {
74             tx.pending -= 1;
75         }
76     }
77 
increase_ref(&mut self, dev: &Device, ptr: *mut c_void) -> bool78     fn increase_ref(&mut self, dev: &Device, ptr: *mut c_void) -> bool {
79         let ptr = ptr as usize;
80         let res = self.maps.is_empty();
81         *self.maps.entry(ptr).or_default() += 1;
82         self.unmark_pending(dev);
83         res
84     }
85 
decrease_ref(&mut self, ptr: *mut c_void, dev: &Device) -> (bool, Option<&PipeResource>)86     fn decrease_ref(&mut self, ptr: *mut c_void, dev: &Device) -> (bool, Option<&PipeResource>) {
87         let ptr = ptr as usize;
88         if let Some(r) = self.maps.get_mut(&ptr) {
89             *r -= 1;
90 
91             if *r == 0 {
92                 self.maps.remove(&ptr);
93             }
94 
95             if self.maps.is_empty() {
96                 let shadow = self.tx.get(dev).and_then(|tx| tx.shadow.as_ref());
97                 return (true, shadow);
98             }
99         }
100         (false, None)
101     }
102 
clean_up_tx(&mut self, dev: &Device, ctx: &PipeContext)103     fn clean_up_tx(&mut self, dev: &Device, ctx: &PipeContext) {
104         if self.maps.is_empty() {
105             if let Some(tx) = self.tx.get(&dev) {
106                 if tx.pending == 0 {
107                     self.tx.remove(dev).unwrap().tx.with_ctx(ctx);
108                 }
109             }
110         }
111     }
112 }
113 
114 #[repr(transparent)]
115 #[derive(Clone, Copy)]
116 pub struct ConstMemoryPtr {
117     ptr: *const c_void,
118 }
119 unsafe impl Send for ConstMemoryPtr {}
120 unsafe impl Sync for ConstMemoryPtr {}
121 
122 impl ConstMemoryPtr {
as_ptr(&self) -> *const c_void123     pub fn as_ptr(&self) -> *const c_void {
124         self.ptr
125     }
126 
127     /// # Safety
128     ///
129     /// Users need to ensure that `ptr` is only accessed in a thread-safe manner sufficient for
130     /// [Send] and [Sync]
from_ptr(ptr: *const c_void) -> Self131     pub unsafe fn from_ptr(ptr: *const c_void) -> Self {
132         Self { ptr: ptr }
133     }
134 }
135 
136 #[repr(transparent)]
137 #[derive(Clone, Copy)]
138 pub struct MutMemoryPtr {
139     ptr: *mut c_void,
140 }
141 unsafe impl Send for MutMemoryPtr {}
142 unsafe impl Sync for MutMemoryPtr {}
143 
144 impl MutMemoryPtr {
as_ptr(&self) -> *mut c_void145     pub fn as_ptr(&self) -> *mut c_void {
146         self.ptr
147     }
148 
149     /// # Safety
150     ///
151     /// Users need to ensure that `ptr` is only accessed in a thread-safe manner sufficient for
152     /// [Send] and [Sync]
from_ptr(ptr: *mut c_void) -> Self153     pub unsafe fn from_ptr(ptr: *mut c_void) -> Self {
154         Self { ptr: ptr }
155     }
156 }
157 
158 pub enum Mem {
159     Buffer(Arc<Buffer>),
160     Image(Arc<Image>),
161 }
162 
163 impl Deref for Mem {
164     type Target = MemBase;
165 
deref(&self) -> &Self::Target166     fn deref(&self) -> &Self::Target {
167         match self {
168             Self::Buffer(b) => &b.base,
169             Self::Image(i) => &i.base,
170         }
171     }
172 }
173 
174 impl Mem {
unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()>175     pub fn unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
176         match self {
177             Self::Buffer(b) => b.unmap(q, ctx, ptr),
178             Self::Image(i) => i.unmap(q, ctx, ptr),
179         }
180     }
181 }
182 
183 /// # Mapping memory
184 ///
185 /// Maps the queue associated device's resource.
186 ///
187 /// Mapping resources could have been quite straightforward if OpenCL wouldn't allow for so called
188 /// non blocking maps. Non blocking maps shall return a valid pointer to the mapped region
189 /// immediately, but should not synchronize data (in case of shadow buffers) until after the map
190 /// event is reached in the queue. This makes it not possible to simply use pipe_transfers as those
191 /// can't be explicitly synced by the frontend.
192 ///
193 /// In order to have a compliant implementation of the mapping API we have to consider the following
194 /// cases:
195 ///   1. Mapping a cl_mem object with CL_MEM_USE_HOST_PTR: We simply return the host_ptr.
196 ///      Synchronization of shadowed host ptrs are done in `sync_shadow` on demand.
197 ///   2. Mapping linear resources on UMA systems: We simply create the pipe_transfer with
198 ///      `PIPE_MAP_DIRECTLY` and `PIPE_MAP_UNSYNCHRONIZED` and return the attached pointer.
199 ///   3. On non UMA systems or when 2. fails (e.g. due to the resource being tiled) we
200 ///      - create a shadow pipe_resource with `PIPE_USAGE_STAGING`,
201 ///        `PIPE_RESOURCE_FLAG_MAP_PERSISTENT` and `PIPE_RESOURCE_FLAG_MAP_COHERENT`
202 ///      - create a pipe_transfer with `PIPE_MAP_COHERENT`, `PIPE_MAP_PERSISTENT` and
203 ///        `PIPE_MAP_UNSYNCHRONIZED`
204 ///      - sync the shadow buffer like a host_ptr shadow buffer in 1.
205 ///
206 /// Taking this approach we guarentee that we only copy when actually needed while making sure the
207 /// content behind the returned pointer is valid until unmapped.
208 pub struct MemBase {
209     pub base: CLObjectBase<CL_INVALID_MEM_OBJECT>,
210     pub context: Arc<Context>,
211     pub parent: Option<Mem>,
212     pub mem_type: cl_mem_object_type,
213     pub flags: cl_mem_flags,
214     pub size: usize,
215     // it's a bit hacky, but storing the pointer as `usize` gives us `Send` and `Sync`. The
216     // application is required to ensure no data races exist on the memory anyway.
217     pub host_ptr: usize,
218     pub props: Vec<cl_mem_properties>,
219     pub cbs: Mutex<Vec<MemCB>>,
220     pub gl_obj: Option<GLObject>,
221     res: Option<HashMap<&'static Device, Arc<PipeResource>>>,
222     maps: Mutex<Mappings>,
223 }
224 
225 pub struct Buffer {
226     base: MemBase,
227     pub offset: usize,
228 }
229 
230 pub struct Image {
231     base: MemBase,
232     pub image_format: cl_image_format,
233     pub pipe_format: pipe_format,
234     pub image_desc: cl_image_desc,
235     pub image_elem_size: u8,
236 }
237 
238 impl Deref for Buffer {
239     type Target = MemBase;
240 
deref(&self) -> &Self::Target241     fn deref(&self) -> &Self::Target {
242         &self.base
243     }
244 }
245 
246 impl Deref for Image {
247     type Target = MemBase;
248 
deref(&self) -> &Self::Target249     fn deref(&self) -> &Self::Target {
250         &self.base
251     }
252 }
253 
254 impl_cl_type_trait_base!(cl_mem, MemBase, [Buffer, Image], CL_INVALID_MEM_OBJECT);
255 impl_cl_type_trait!(cl_mem, Buffer, CL_INVALID_MEM_OBJECT, base.base);
256 impl_cl_type_trait!(cl_mem, Image, CL_INVALID_MEM_OBJECT, base.base);
257 
258 pub trait CLImageDescInfo {
type_info(&self) -> (u8, bool)259     fn type_info(&self) -> (u8, bool);
pixels(&self) -> usize260     fn pixels(&self) -> usize;
bx(&self) -> CLResult<pipe_box>261     fn bx(&self) -> CLResult<pipe_box>;
row_pitch(&self) -> CLResult<u32>262     fn row_pitch(&self) -> CLResult<u32>;
slice_pitch(&self) -> usize263     fn slice_pitch(&self) -> usize;
width(&self) -> CLResult<u32>264     fn width(&self) -> CLResult<u32>;
height(&self) -> CLResult<u32>265     fn height(&self) -> CLResult<u32>;
size(&self) -> CLVec<usize>266     fn size(&self) -> CLVec<usize>;
267 
dims(&self) -> u8268     fn dims(&self) -> u8 {
269         self.type_info().0
270     }
271 
dims_with_array(&self) -> u8272     fn dims_with_array(&self) -> u8 {
273         let array: u8 = self.is_array().into();
274         self.dims() + array
275     }
276 
has_slice(&self) -> bool277     fn has_slice(&self) -> bool {
278         self.dims() == 3 || self.is_array()
279     }
280 
is_array(&self) -> bool281     fn is_array(&self) -> bool {
282         self.type_info().1
283     }
284 }
285 
286 impl CLImageDescInfo for cl_image_desc {
type_info(&self) -> (u8, bool)287     fn type_info(&self) -> (u8, bool) {
288         match self.image_type {
289             CL_MEM_OBJECT_IMAGE1D | CL_MEM_OBJECT_IMAGE1D_BUFFER => (1, false),
290             CL_MEM_OBJECT_IMAGE1D_ARRAY => (1, true),
291             CL_MEM_OBJECT_IMAGE2D => (2, false),
292             CL_MEM_OBJECT_IMAGE2D_ARRAY => (2, true),
293             CL_MEM_OBJECT_IMAGE3D => (3, false),
294             _ => panic!("unknown image_type {:x}", self.image_type),
295         }
296     }
297 
pixels(&self) -> usize298     fn pixels(&self) -> usize {
299         let mut res = self.image_width;
300         let dims = self.dims();
301 
302         if dims > 1 {
303             res *= self.image_height;
304         }
305 
306         if dims > 2 {
307             res *= self.image_depth;
308         }
309 
310         if self.is_array() {
311             res *= self.image_array_size;
312         }
313 
314         res
315     }
316 
size(&self) -> CLVec<usize>317     fn size(&self) -> CLVec<usize> {
318         let mut height = cmp::max(self.image_height, 1);
319         let mut depth = cmp::max(self.image_depth, 1);
320 
321         match self.image_type {
322             CL_MEM_OBJECT_IMAGE1D_ARRAY => height = self.image_array_size,
323             CL_MEM_OBJECT_IMAGE2D_ARRAY => depth = self.image_array_size,
324             _ => {}
325         }
326 
327         CLVec::new([self.image_width, height, depth])
328     }
329 
bx(&self) -> CLResult<pipe_box>330     fn bx(&self) -> CLResult<pipe_box> {
331         create_pipe_box(CLVec::default(), self.size(), self.image_type)
332     }
333 
row_pitch(&self) -> CLResult<u32>334     fn row_pitch(&self) -> CLResult<u32> {
335         self.image_row_pitch
336             .try_into()
337             .map_err(|_| CL_OUT_OF_HOST_MEMORY)
338     }
339 
slice_pitch(&self) -> usize340     fn slice_pitch(&self) -> usize {
341         self.image_slice_pitch
342     }
343 
width(&self) -> CLResult<u32>344     fn width(&self) -> CLResult<u32> {
345         self.image_width
346             .try_into()
347             .map_err(|_| CL_OUT_OF_HOST_MEMORY)
348     }
349 
height(&self) -> CLResult<u32>350     fn height(&self) -> CLResult<u32> {
351         self.image_height
352             .try_into()
353             .map_err(|_| CL_OUT_OF_HOST_MEMORY)
354     }
355 }
356 
sw_copy( src: *const c_void, dst: *mut c_void, region: &CLVec<usize>, src_origin: &CLVec<usize>, src_row_pitch: usize, src_slice_pitch: usize, dst_origin: &CLVec<usize>, dst_row_pitch: usize, dst_slice_pitch: usize, pixel_size: u8, )357 fn sw_copy(
358     src: *const c_void,
359     dst: *mut c_void,
360     region: &CLVec<usize>,
361     src_origin: &CLVec<usize>,
362     src_row_pitch: usize,
363     src_slice_pitch: usize,
364     dst_origin: &CLVec<usize>,
365     dst_row_pitch: usize,
366     dst_slice_pitch: usize,
367     pixel_size: u8,
368 ) {
369     for z in 0..region[2] {
370         for y in 0..region[1] {
371             unsafe {
372                 ptr::copy_nonoverlapping(
373                     src.add(
374                         (*src_origin + [0, y, z])
375                             * [pixel_size as usize, src_row_pitch, src_slice_pitch],
376                     ),
377                     dst.add(
378                         (*dst_origin + [0, y, z])
379                             * [pixel_size as usize, dst_row_pitch, dst_slice_pitch],
380                     ),
381                     region[0] * pixel_size as usize,
382                 )
383             };
384         }
385     }
386 }
387 
388 /// helper function to determine if we can just map the resource in question or if we have to go
389 /// through a shdow buffer to let the CPU access the resources memory
can_map_directly(dev: &Device, res: &PipeResource) -> bool390 fn can_map_directly(dev: &Device, res: &PipeResource) -> bool {
391     // there are two aprts to this check:
392     //   1. is the resource located in system RAM
393     //   2. has the resource a linear memory layout
394     // we do not want to map memory over the PCIe bus as this generally leads to bad performance.
395     (dev.unified_memory() || res.is_staging() || res.is_user)
396         && (res.is_buffer() || res.is_linear())
397 }
398 
399 impl MemBase {
new_buffer( context: Arc<Context>, flags: cl_mem_flags, size: usize, host_ptr: *mut c_void, props: Vec<cl_mem_properties>, ) -> CLResult<Arc<Buffer>>400     pub fn new_buffer(
401         context: Arc<Context>,
402         flags: cl_mem_flags,
403         size: usize,
404         host_ptr: *mut c_void,
405         props: Vec<cl_mem_properties>,
406     ) -> CLResult<Arc<Buffer>> {
407         let res_type = if bit_check(flags, CL_MEM_ALLOC_HOST_PTR) {
408             ResourceType::Staging
409         } else {
410             ResourceType::Normal
411         };
412 
413         let buffer = context.create_buffer(
414             size,
415             host_ptr,
416             bit_check(flags, CL_MEM_COPY_HOST_PTR),
417             res_type,
418         )?;
419 
420         let host_ptr = if bit_check(flags, CL_MEM_USE_HOST_PTR) {
421             host_ptr as usize
422         } else {
423             0
424         };
425 
426         Ok(Arc::new(Buffer {
427             base: Self {
428                 base: CLObjectBase::new(RusticlTypes::Buffer),
429                 context: context,
430                 parent: None,
431                 mem_type: CL_MEM_OBJECT_BUFFER,
432                 flags: flags,
433                 size: size,
434                 host_ptr: host_ptr,
435                 props: props,
436                 gl_obj: None,
437                 cbs: Mutex::new(Vec::new()),
438                 res: Some(buffer),
439                 maps: Mappings::new(),
440             },
441             offset: 0,
442         }))
443     }
444 
new_sub_buffer( parent: Arc<Buffer>, flags: cl_mem_flags, offset: usize, size: usize, ) -> Arc<Buffer>445     pub fn new_sub_buffer(
446         parent: Arc<Buffer>,
447         flags: cl_mem_flags,
448         offset: usize,
449         size: usize,
450     ) -> Arc<Buffer> {
451         let host_ptr = if parent.host_ptr().is_null() {
452             0
453         } else {
454             unsafe { parent.host_ptr().add(offset) as usize }
455         };
456 
457         Arc::new(Buffer {
458             base: Self {
459                 base: CLObjectBase::new(RusticlTypes::Buffer),
460                 context: parent.context.clone(),
461                 parent: Some(Mem::Buffer(parent)),
462                 mem_type: CL_MEM_OBJECT_BUFFER,
463                 flags: flags,
464                 size: size,
465                 host_ptr: host_ptr,
466                 props: Vec::new(),
467                 gl_obj: None,
468                 cbs: Mutex::new(Vec::new()),
469                 res: None,
470                 maps: Mappings::new(),
471             },
472             offset: offset,
473         })
474     }
475 
new_image( context: Arc<Context>, parent: Option<Mem>, mem_type: cl_mem_object_type, flags: cl_mem_flags, image_format: &cl_image_format, mut image_desc: cl_image_desc, image_elem_size: u8, host_ptr: *mut c_void, props: Vec<cl_mem_properties>, ) -> CLResult<Arc<Image>>476     pub fn new_image(
477         context: Arc<Context>,
478         parent: Option<Mem>,
479         mem_type: cl_mem_object_type,
480         flags: cl_mem_flags,
481         image_format: &cl_image_format,
482         mut image_desc: cl_image_desc,
483         image_elem_size: u8,
484         host_ptr: *mut c_void,
485         props: Vec<cl_mem_properties>,
486     ) -> CLResult<Arc<Image>> {
487         // we have to sanitize the image_desc a little for internal use
488         let api_image_desc = image_desc;
489         let dims = image_desc.dims();
490         let is_array = image_desc.is_array();
491         if dims < 3 {
492             image_desc.image_depth = 1;
493         }
494         if dims < 2 {
495             image_desc.image_height = 1;
496         }
497         if !is_array {
498             image_desc.image_array_size = 1;
499         }
500 
501         let res_type = if bit_check(flags, CL_MEM_ALLOC_HOST_PTR) {
502             ResourceType::Staging
503         } else {
504             ResourceType::Normal
505         };
506 
507         let texture = if parent.is_none() {
508             let mut texture = context.create_texture(
509                 &image_desc,
510                 image_format,
511                 host_ptr,
512                 bit_check(flags, CL_MEM_COPY_HOST_PTR),
513                 res_type,
514             );
515 
516             // if we error allocating a Staging resource, just try with normal as
517             // `CL_MEM_ALLOC_HOST_PTR` is just a performance hint.
518             if res_type == ResourceType::Staging && texture.is_err() {
519                 texture = context.create_texture(
520                     &image_desc,
521                     image_format,
522                     host_ptr,
523                     bit_check(flags, CL_MEM_COPY_HOST_PTR),
524                     ResourceType::Normal,
525                 )
526             }
527 
528             Some(texture?)
529         } else {
530             None
531         };
532 
533         let host_ptr = if bit_check(flags, CL_MEM_USE_HOST_PTR) {
534             host_ptr as usize
535         } else {
536             0
537         };
538 
539         let pipe_format = image_format.to_pipe_format().unwrap();
540         Ok(Arc::new(Image {
541             base: Self {
542                 base: CLObjectBase::new(RusticlTypes::Image),
543                 context: context,
544                 parent: parent,
545                 mem_type: mem_type,
546                 flags: flags,
547                 size: image_desc.pixels() * image_format.pixel_size().unwrap() as usize,
548                 host_ptr: host_ptr,
549                 props: props,
550                 gl_obj: None,
551                 cbs: Mutex::new(Vec::new()),
552                 res: texture,
553                 maps: Mappings::new(),
554             },
555             image_format: *image_format,
556             pipe_format: pipe_format,
557             image_desc: api_image_desc,
558             image_elem_size: image_elem_size,
559         }))
560     }
561 
arc_from_raw(ptr: cl_mem) -> CLResult<Mem>562     pub fn arc_from_raw(ptr: cl_mem) -> CLResult<Mem> {
563         let mem = Self::ref_from_raw(ptr)?;
564         match mem.base.get_type()? {
565             RusticlTypes::Buffer => Ok(Mem::Buffer(Buffer::arc_from_raw(ptr)?)),
566             RusticlTypes::Image => Ok(Mem::Image(Image::arc_from_raw(ptr)?)),
567             _ => Err(CL_INVALID_MEM_OBJECT),
568         }
569     }
570 
arcs_from_arr(objs: *const cl_mem, count: u32) -> CLResult<Vec<Mem>>571     pub fn arcs_from_arr(objs: *const cl_mem, count: u32) -> CLResult<Vec<Mem>> {
572         let count = count as usize;
573         let mut res = Vec::with_capacity(count);
574         for i in 0..count {
575             res.push(Self::arc_from_raw(unsafe { *objs.add(i) })?);
576         }
577         Ok(res)
578     }
579 
from_gl( context: Arc<Context>, flags: cl_mem_flags, gl_export_manager: &GLExportManager, ) -> CLResult<cl_mem>580     pub fn from_gl(
581         context: Arc<Context>,
582         flags: cl_mem_flags,
583         gl_export_manager: &GLExportManager,
584     ) -> CLResult<cl_mem> {
585         let export_in = &gl_export_manager.export_in;
586         let export_out = &gl_export_manager.export_out;
587 
588         let (mem_type, gl_object_type) = target_from_gl(export_in.target)?;
589         let gl_mem_props = gl_export_manager.get_gl_mem_props()?;
590 
591         // Handle Buffers
592         let (image_format, pipe_format, rusticl_type) = if gl_export_manager.is_gl_buffer() {
593             (
594                 cl_image_format::default(),
595                 pipe_format::PIPE_FORMAT_NONE,
596                 RusticlTypes::Buffer,
597             )
598         } else {
599             let image_format =
600                 format_from_gl(export_out.internal_format).ok_or(CL_OUT_OF_HOST_MEMORY)?;
601             (
602                 image_format,
603                 image_format.to_pipe_format().unwrap(),
604                 RusticlTypes::Image,
605             )
606         };
607 
608         let imported_gl_tex = context.import_gl_buffer(
609             export_out.dmabuf_fd as u32,
610             export_out.modifier,
611             mem_type,
612             export_in.target,
613             pipe_format,
614             gl_mem_props.clone(),
615         )?;
616 
617         // Cube maps faces are not linear in memory, so copy all contents
618         // of desired face into a 2D image and copy it back after gl release.
619         let (shadow_map, texture) = if is_cube_map_face(export_in.target) {
620             let shadow = create_shadow_slice(&imported_gl_tex, image_format)?;
621 
622             let mut res_map = HashMap::new();
623             shadow
624                 .iter()
625                 .map(|(k, v)| {
626                     let gl_res = imported_gl_tex.get(k).unwrap().clone();
627                     res_map.insert(v.clone(), gl_res);
628                 })
629                 .for_each(drop);
630 
631             (Some(res_map), shadow)
632         } else {
633             (None, imported_gl_tex)
634         };
635 
636         // it's kinda not supported, but we want to know if anything actually hits this as it's
637         // certainly not tested by the CL CTS.
638         if mem_type != CL_MEM_OBJECT_BUFFER {
639             assert_eq!(gl_mem_props.offset, 0);
640         }
641 
642         let base = Self {
643             base: CLObjectBase::new(rusticl_type),
644             context: context,
645             parent: None,
646             mem_type: mem_type,
647             flags: flags,
648             size: gl_mem_props.size(),
649             host_ptr: 0,
650             props: Vec::new(),
651             gl_obj: Some(GLObject {
652                 gl_object_target: gl_export_manager.export_in.target,
653                 gl_object_type: gl_object_type,
654                 gl_object_name: export_in.obj,
655                 shadow_map: shadow_map,
656             }),
657             cbs: Mutex::new(Vec::new()),
658             res: Some(texture),
659             maps: Mappings::new(),
660         };
661 
662         Ok(if rusticl_type == RusticlTypes::Buffer {
663             Arc::new(Buffer {
664                 base: base,
665                 offset: gl_mem_props.offset as usize,
666             })
667             .into_cl()
668         } else {
669             Arc::new(Image {
670                 base: base,
671                 image_format: image_format,
672                 pipe_format: pipe_format,
673                 image_desc: cl_image_desc {
674                     image_type: mem_type,
675                     image_width: gl_mem_props.width as usize,
676                     image_height: gl_mem_props.height as usize,
677                     image_depth: gl_mem_props.depth as usize,
678                     image_array_size: gl_mem_props.array_size as usize,
679                     image_row_pitch: 0,
680                     image_slice_pitch: 0,
681                     num_mip_levels: 1,
682                     num_samples: 1,
683                     ..Default::default()
684                 },
685                 image_elem_size: gl_mem_props.pixel_size,
686             })
687             .into_cl()
688         })
689     }
690 
is_buffer(&self) -> bool691     pub fn is_buffer(&self) -> bool {
692         self.mem_type == CL_MEM_OBJECT_BUFFER
693     }
694 
has_same_parent(&self, other: &Self) -> bool695     pub fn has_same_parent(&self, other: &Self) -> bool {
696         ptr::eq(self.get_parent(), other.get_parent())
697     }
698 
699     // this is kinda bogus, because that won't work with system SVM, but the spec wants us to
700     // implement this.
is_svm(&self) -> bool701     pub fn is_svm(&self) -> bool {
702         let mem = self.get_parent();
703         self.context.find_svm_alloc(mem.host_ptr).is_some()
704             && bit_check(mem.flags, CL_MEM_USE_HOST_PTR)
705     }
706 
get_res_of_dev(&self, dev: &Device) -> CLResult<&Arc<PipeResource>>707     pub fn get_res_of_dev(&self, dev: &Device) -> CLResult<&Arc<PipeResource>> {
708         self.get_parent()
709             .res
710             .as_ref()
711             .and_then(|resources| resources.get(dev))
712             .ok_or(CL_OUT_OF_HOST_MEMORY)
713     }
714 
get_parent(&self) -> &Self715     fn get_parent(&self) -> &Self {
716         if let Some(parent) = &self.parent {
717             parent
718         } else {
719             self
720         }
721     }
722 
has_user_shadow_buffer(&self, d: &Device) -> CLResult<bool>723     fn has_user_shadow_buffer(&self, d: &Device) -> CLResult<bool> {
724         let r = self.get_res_of_dev(d)?;
725         Ok(!r.is_user && bit_check(self.flags, CL_MEM_USE_HOST_PTR))
726     }
727 
host_ptr(&self) -> *mut c_void728     pub fn host_ptr(&self) -> *mut c_void {
729         self.host_ptr as *mut c_void
730     }
731 
is_mapped_ptr(&self, ptr: *mut c_void) -> bool732     pub fn is_mapped_ptr(&self, ptr: *mut c_void) -> bool {
733         self.maps.lock().unwrap().contains_ptr(ptr)
734     }
735 }
736 
737 impl Drop for MemBase {
drop(&mut self)738     fn drop(&mut self) {
739         let cbs = mem::take(self.cbs.get_mut().unwrap());
740         for cb in cbs.into_iter().rev() {
741             cb.call(self);
742         }
743 
744         for (d, tx) in self.maps.get_mut().unwrap().tx.drain() {
745             d.helper_ctx().unmap(tx.tx);
746         }
747     }
748 }
749 
750 impl Buffer {
apply_offset(&self, offset: usize) -> CLResult<usize>751     fn apply_offset(&self, offset: usize) -> CLResult<usize> {
752         self.offset.checked_add(offset).ok_or(CL_OUT_OF_HOST_MEMORY)
753     }
754 
copy_rect( &self, dst: &Self, q: &Queue, ctx: &PipeContext, region: &CLVec<usize>, src_origin: &CLVec<usize>, src_row_pitch: usize, src_slice_pitch: usize, dst_origin: &CLVec<usize>, dst_row_pitch: usize, dst_slice_pitch: usize, ) -> CLResult<()>755     pub fn copy_rect(
756         &self,
757         dst: &Self,
758         q: &Queue,
759         ctx: &PipeContext,
760         region: &CLVec<usize>,
761         src_origin: &CLVec<usize>,
762         src_row_pitch: usize,
763         src_slice_pitch: usize,
764         dst_origin: &CLVec<usize>,
765         dst_row_pitch: usize,
766         dst_slice_pitch: usize,
767     ) -> CLResult<()> {
768         let (offset, size) =
769             CLVec::calc_offset_size(src_origin, region, [1, src_row_pitch, src_slice_pitch]);
770         let tx_src = self.tx(q, ctx, offset, size, RWFlags::RD)?;
771 
772         let (offset, size) =
773             CLVec::calc_offset_size(dst_origin, region, [1, dst_row_pitch, dst_slice_pitch]);
774         let tx_dst = dst.tx(q, ctx, offset, size, RWFlags::WR)?;
775 
776         // TODO check to use hw accelerated paths (e.g. resource_copy_region or blits)
777         sw_copy(
778             tx_src.ptr(),
779             tx_dst.ptr(),
780             region,
781             &CLVec::default(),
782             src_row_pitch,
783             src_slice_pitch,
784             &CLVec::default(),
785             dst_row_pitch,
786             dst_slice_pitch,
787             1,
788         );
789 
790         Ok(())
791     }
792 
copy_to_buffer( &self, q: &Queue, ctx: &PipeContext, dst: &Buffer, src_offset: usize, dst_offset: usize, size: usize, ) -> CLResult<()>793     pub fn copy_to_buffer(
794         &self,
795         q: &Queue,
796         ctx: &PipeContext,
797         dst: &Buffer,
798         src_offset: usize,
799         dst_offset: usize,
800         size: usize,
801     ) -> CLResult<()> {
802         let src_offset = self.apply_offset(src_offset)?;
803         let dst_offset = dst.apply_offset(dst_offset)?;
804         let src_res = self.get_res_of_dev(q.device)?;
805         let dst_res = dst.get_res_of_dev(q.device)?;
806 
807         let bx = create_pipe_box(
808             [src_offset, 0, 0].into(),
809             [size, 1, 1].into(),
810             CL_MEM_OBJECT_BUFFER,
811         )?;
812         let dst_origin: [u32; 3] = [
813             dst_offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
814             0,
815             0,
816         ];
817 
818         ctx.resource_copy_region(src_res, dst_res, &dst_origin, &bx);
819         Ok(())
820     }
821 
copy_to_image( &self, q: &Queue, ctx: &PipeContext, dst: &Image, src_offset: usize, dst_origin: CLVec<usize>, region: &CLVec<usize>, ) -> CLResult<()>822     pub fn copy_to_image(
823         &self,
824         q: &Queue,
825         ctx: &PipeContext,
826         dst: &Image,
827         src_offset: usize,
828         dst_origin: CLVec<usize>,
829         region: &CLVec<usize>,
830     ) -> CLResult<()> {
831         let src_offset = self.apply_offset(src_offset)?;
832         let bpp = dst.image_format.pixel_size().unwrap().into();
833         let src_pitch = [bpp, bpp * region[0], bpp * region[0] * region[1]];
834         let size = CLVec::calc_size(region, src_pitch);
835         let tx_src = self.tx(q, ctx, src_offset, size, RWFlags::RD)?;
836 
837         // If image is created from a buffer, use image's slice and row pitch instead
838         let tx_dst;
839         let dst_pitch;
840         if let Some(Mem::Buffer(buffer)) = &dst.parent {
841             dst_pitch = [
842                 bpp,
843                 dst.image_desc.row_pitch()? as usize,
844                 dst.image_desc.slice_pitch(),
845             ];
846 
847             let (offset, size) = CLVec::calc_offset_size(dst_origin, region, dst_pitch);
848             tx_dst = buffer.tx(q, ctx, offset, size, RWFlags::WR)?;
849         } else {
850             tx_dst = dst.tx_image(
851                 q,
852                 ctx,
853                 &create_pipe_box(dst_origin, *region, dst.mem_type)?,
854                 RWFlags::WR,
855             )?;
856 
857             dst_pitch = [1, tx_dst.row_pitch() as usize, tx_dst.slice_pitch()];
858         }
859 
860         // Those pitch values cannot have 0 value in its coordinates
861         debug_assert!(src_pitch[0] != 0 && src_pitch[1] != 0 && src_pitch[2] != 0);
862         debug_assert!(dst_pitch[0] != 0 && dst_pitch[1] != 0 && dst_pitch[2] != 0);
863 
864         sw_copy(
865             tx_src.ptr(),
866             tx_dst.ptr(),
867             region,
868             &CLVec::default(),
869             src_pitch[1],
870             src_pitch[2],
871             &CLVec::default(),
872             dst_pitch[1],
873             dst_pitch[2],
874             bpp as u8,
875         );
876         Ok(())
877     }
878 
fill( &self, q: &Queue, ctx: &PipeContext, pattern: &[u8], offset: usize, size: usize, ) -> CLResult<()>879     pub fn fill(
880         &self,
881         q: &Queue,
882         ctx: &PipeContext,
883         pattern: &[u8],
884         offset: usize,
885         size: usize,
886     ) -> CLResult<()> {
887         let offset = self.apply_offset(offset)?;
888         let res = self.get_res_of_dev(q.device)?;
889         ctx.clear_buffer(
890             res,
891             pattern,
892             offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
893             size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
894         );
895         Ok(())
896     }
897 
map(&self, dev: &'static Device, offset: usize) -> CLResult<MutMemoryPtr>898     pub fn map(&self, dev: &'static Device, offset: usize) -> CLResult<MutMemoryPtr> {
899         let ptr = if self.has_user_shadow_buffer(dev)? {
900             self.host_ptr()
901         } else {
902             let mut lock = self.maps.lock().unwrap();
903 
904             if let Entry::Vacant(e) = lock.tx.entry(dev) {
905                 let (tx, res) = self.tx_raw_async(dev, RWFlags::RW)?;
906                 e.insert(MappingTransfer::new(tx, res));
907             } else {
908                 lock.mark_pending(dev);
909             }
910 
911             lock.tx.get(dev).unwrap().tx.ptr()
912         };
913 
914         let ptr = unsafe { ptr.add(offset) };
915         // SAFETY: it's required that applications do not cause data races
916         Ok(unsafe { MutMemoryPtr::from_ptr(ptr) })
917     }
918 
read( &self, q: &Queue, ctx: &PipeContext, offset: usize, ptr: MutMemoryPtr, size: usize, ) -> CLResult<()>919     pub fn read(
920         &self,
921         q: &Queue,
922         ctx: &PipeContext,
923         offset: usize,
924         ptr: MutMemoryPtr,
925         size: usize,
926     ) -> CLResult<()> {
927         let ptr = ptr.as_ptr();
928         let tx = self.tx(q, ctx, offset, size, RWFlags::RD)?;
929 
930         unsafe {
931             ptr::copy_nonoverlapping(tx.ptr(), ptr, size);
932         }
933 
934         Ok(())
935     }
936 
read_rect( &self, dst: MutMemoryPtr, q: &Queue, ctx: &PipeContext, region: &CLVec<usize>, src_origin: &CLVec<usize>, src_row_pitch: usize, src_slice_pitch: usize, dst_origin: &CLVec<usize>, dst_row_pitch: usize, dst_slice_pitch: usize, ) -> CLResult<()>937     pub fn read_rect(
938         &self,
939         dst: MutMemoryPtr,
940         q: &Queue,
941         ctx: &PipeContext,
942         region: &CLVec<usize>,
943         src_origin: &CLVec<usize>,
944         src_row_pitch: usize,
945         src_slice_pitch: usize,
946         dst_origin: &CLVec<usize>,
947         dst_row_pitch: usize,
948         dst_slice_pitch: usize,
949     ) -> CLResult<()> {
950         let dst = dst.as_ptr();
951         let (offset, size) =
952             CLVec::calc_offset_size(src_origin, region, [1, src_row_pitch, src_slice_pitch]);
953         let tx = self.tx(q, ctx, offset, size, RWFlags::RD)?;
954 
955         sw_copy(
956             tx.ptr(),
957             dst,
958             region,
959             &CLVec::default(),
960             src_row_pitch,
961             src_slice_pitch,
962             dst_origin,
963             dst_row_pitch,
964             dst_slice_pitch,
965             1,
966         );
967 
968         Ok(())
969     }
970 
971     // TODO: only sync on map when the memory is not mapped with discard
sync_shadow(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()>972     pub fn sync_shadow(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
973         let ptr = ptr.as_ptr();
974         let mut lock = self.maps.lock().unwrap();
975         if !lock.increase_ref(q.device, ptr) {
976             return Ok(());
977         }
978 
979         if self.has_user_shadow_buffer(q.device)? {
980             self.read(
981                 q,
982                 ctx,
983                 0,
984                 // SAFETY: it's required that applications do not cause data races
985                 unsafe { MutMemoryPtr::from_ptr(self.host_ptr()) },
986                 self.size,
987             )
988         } else {
989             if let Some(shadow) = lock.tx.get(&q.device).and_then(|tx| tx.shadow.as_ref()) {
990                 let res = self.get_res_of_dev(q.device)?;
991                 let bx = create_pipe_box(
992                     [self.offset, 0, 0].into(),
993                     [self.size, 1, 1].into(),
994                     CL_MEM_OBJECT_BUFFER,
995                 )?;
996                 ctx.resource_copy_region(res, shadow, &[0; 3], &bx);
997             }
998             Ok(())
999         }
1000     }
1001 
tx<'a>( &self, q: &Queue, ctx: &'a PipeContext, offset: usize, size: usize, rw: RWFlags, ) -> CLResult<GuardedPipeTransfer<'a>>1002     fn tx<'a>(
1003         &self,
1004         q: &Queue,
1005         ctx: &'a PipeContext,
1006         offset: usize,
1007         size: usize,
1008         rw: RWFlags,
1009     ) -> CLResult<GuardedPipeTransfer<'a>> {
1010         let offset = self.apply_offset(offset)?;
1011         let r = self.get_res_of_dev(q.device)?;
1012 
1013         Ok(ctx
1014             .buffer_map(
1015                 r,
1016                 offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
1017                 size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
1018                 rw,
1019                 ResourceMapType::Normal,
1020             )
1021             .ok_or(CL_OUT_OF_RESOURCES)?
1022             .with_ctx(ctx))
1023     }
1024 
tx_raw_async( &self, dev: &Device, rw: RWFlags, ) -> CLResult<(PipeTransfer, Option<PipeResource>)>1025     fn tx_raw_async(
1026         &self,
1027         dev: &Device,
1028         rw: RWFlags,
1029     ) -> CLResult<(PipeTransfer, Option<PipeResource>)> {
1030         let r = self.get_res_of_dev(dev)?;
1031         let offset = self.offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
1032         let size = self.size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
1033         let ctx = dev.helper_ctx();
1034 
1035         let tx = if can_map_directly(dev, r) {
1036             ctx.buffer_map_directly(r, offset, size, rw)
1037         } else {
1038             None
1039         };
1040 
1041         if let Some(tx) = tx {
1042             Ok((tx, None))
1043         } else {
1044             let shadow = dev
1045                 .screen()
1046                 .resource_create_buffer(size as u32, ResourceType::Staging, 0)
1047                 .ok_or(CL_OUT_OF_RESOURCES)?;
1048             let tx = ctx
1049                 .buffer_map_coherent(&shadow, 0, size, rw)
1050                 .ok_or(CL_OUT_OF_RESOURCES)?;
1051             Ok((tx, Some(shadow)))
1052         }
1053     }
1054 
1055     // TODO: only sync on unmap when the memory is not mapped for writing
unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()>1056     pub fn unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
1057         let ptr = ptr.as_ptr();
1058         let mut lock = self.maps.lock().unwrap();
1059         if !lock.contains_ptr(ptr) {
1060             return Ok(());
1061         }
1062 
1063         let (needs_sync, shadow) = lock.decrease_ref(ptr, q.device);
1064         if needs_sync {
1065             if let Some(shadow) = shadow {
1066                 let res = self.get_res_of_dev(q.device)?;
1067                 let offset = self.offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?;
1068                 let bx = create_pipe_box(
1069                     CLVec::default(),
1070                     [self.size, 1, 1].into(),
1071                     CL_MEM_OBJECT_BUFFER,
1072                 )?;
1073 
1074                 ctx.resource_copy_region(shadow, res, &[offset, 0, 0], &bx);
1075             } else if self.has_user_shadow_buffer(q.device)? {
1076                 self.write(
1077                     q,
1078                     ctx,
1079                     0,
1080                     // SAFETY: it's required that applications do not cause data races
1081                     unsafe { ConstMemoryPtr::from_ptr(self.host_ptr()) },
1082                     self.size,
1083                 )?;
1084             }
1085         }
1086 
1087         lock.clean_up_tx(q.device, ctx);
1088 
1089         Ok(())
1090     }
1091 
write( &self, q: &Queue, ctx: &PipeContext, offset: usize, ptr: ConstMemoryPtr, size: usize, ) -> CLResult<()>1092     pub fn write(
1093         &self,
1094         q: &Queue,
1095         ctx: &PipeContext,
1096         offset: usize,
1097         ptr: ConstMemoryPtr,
1098         size: usize,
1099     ) -> CLResult<()> {
1100         let ptr = ptr.as_ptr();
1101         let offset = self.apply_offset(offset)?;
1102         let r = self.get_res_of_dev(q.device)?;
1103         ctx.buffer_subdata(
1104             r,
1105             offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
1106             ptr,
1107             size.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
1108         );
1109         Ok(())
1110     }
1111 
write_rect( &self, src: ConstMemoryPtr, q: &Queue, ctx: &PipeContext, region: &CLVec<usize>, src_origin: &CLVec<usize>, src_row_pitch: usize, src_slice_pitch: usize, dst_origin: &CLVec<usize>, dst_row_pitch: usize, dst_slice_pitch: usize, ) -> CLResult<()>1112     pub fn write_rect(
1113         &self,
1114         src: ConstMemoryPtr,
1115         q: &Queue,
1116         ctx: &PipeContext,
1117         region: &CLVec<usize>,
1118         src_origin: &CLVec<usize>,
1119         src_row_pitch: usize,
1120         src_slice_pitch: usize,
1121         dst_origin: &CLVec<usize>,
1122         dst_row_pitch: usize,
1123         dst_slice_pitch: usize,
1124     ) -> CLResult<()> {
1125         let src = src.as_ptr();
1126         let (offset, size) =
1127             CLVec::calc_offset_size(dst_origin, region, [1, dst_row_pitch, dst_slice_pitch]);
1128         let tx = self.tx(q, ctx, offset, size, RWFlags::WR)?;
1129 
1130         sw_copy(
1131             src,
1132             tx.ptr(),
1133             region,
1134             src_origin,
1135             src_row_pitch,
1136             src_slice_pitch,
1137             &CLVec::default(),
1138             dst_row_pitch,
1139             dst_slice_pitch,
1140             1,
1141         );
1142 
1143         Ok(())
1144     }
1145 }
1146 
1147 impl Image {
copy_to_buffer( &self, q: &Queue, ctx: &PipeContext, dst: &Buffer, src_origin: CLVec<usize>, dst_offset: usize, region: &CLVec<usize>, ) -> CLResult<()>1148     pub fn copy_to_buffer(
1149         &self,
1150         q: &Queue,
1151         ctx: &PipeContext,
1152         dst: &Buffer,
1153         src_origin: CLVec<usize>,
1154         dst_offset: usize,
1155         region: &CLVec<usize>,
1156     ) -> CLResult<()> {
1157         let dst_offset = dst.apply_offset(dst_offset)?;
1158         let bpp = self.image_format.pixel_size().unwrap().into();
1159 
1160         let src_pitch;
1161         let tx_src;
1162         if let Some(Mem::Buffer(buffer)) = &self.parent {
1163             src_pitch = [
1164                 bpp,
1165                 self.image_desc.row_pitch()? as usize,
1166                 self.image_desc.slice_pitch(),
1167             ];
1168             let (offset, size) = CLVec::calc_offset_size(src_origin, region, src_pitch);
1169             tx_src = buffer.tx(q, ctx, offset, size, RWFlags::RD)?;
1170         } else {
1171             tx_src = self.tx_image(
1172                 q,
1173                 ctx,
1174                 &create_pipe_box(src_origin, *region, self.mem_type)?,
1175                 RWFlags::RD,
1176             )?;
1177             src_pitch = [1, tx_src.row_pitch() as usize, tx_src.slice_pitch()];
1178         }
1179 
1180         // If image is created from a buffer, use image's slice and row pitch instead
1181         let dst_pitch = [bpp, bpp * region[0], bpp * region[0] * region[1]];
1182 
1183         let dst_origin: CLVec<usize> = [dst_offset, 0, 0].into();
1184         let (offset, size) = CLVec::calc_offset_size(dst_origin, region, dst_pitch);
1185         let tx_dst = dst.tx(q, ctx, offset, size, RWFlags::WR)?;
1186 
1187         // Those pitch values cannot have 0 value in its coordinates
1188         debug_assert!(src_pitch[0] != 0 && src_pitch[1] != 0 && src_pitch[2] != 0);
1189         debug_assert!(dst_pitch[0] != 0 && dst_pitch[1] != 0 && dst_pitch[2] != 0);
1190 
1191         sw_copy(
1192             tx_src.ptr(),
1193             tx_dst.ptr(),
1194             region,
1195             &CLVec::default(),
1196             src_pitch[1],
1197             src_pitch[2],
1198             &CLVec::default(),
1199             dst_pitch[1],
1200             dst_pitch[2],
1201             bpp as u8,
1202         );
1203         Ok(())
1204     }
1205 
copy_to_image( &self, q: &Queue, ctx: &PipeContext, dst: &Image, src_origin: CLVec<usize>, dst_origin: CLVec<usize>, region: &CLVec<usize>, ) -> CLResult<()>1206     pub fn copy_to_image(
1207         &self,
1208         q: &Queue,
1209         ctx: &PipeContext,
1210         dst: &Image,
1211         src_origin: CLVec<usize>,
1212         dst_origin: CLVec<usize>,
1213         region: &CLVec<usize>,
1214     ) -> CLResult<()> {
1215         let src_parent = self.get_parent();
1216         let dst_parent = dst.get_parent();
1217         let src_res = src_parent.get_res_of_dev(q.device)?;
1218         let dst_res = dst_parent.get_res_of_dev(q.device)?;
1219 
1220         // We just want to use sw_copy if mem objects have different types or if copy can have
1221         // custom strides (image2d from buff/images)
1222         if src_parent.is_buffer() || dst_parent.is_buffer() {
1223             let bpp = self.image_format.pixel_size().unwrap().into();
1224 
1225             let tx_src;
1226             let tx_dst;
1227             let dst_pitch;
1228             let src_pitch;
1229             if let Some(Mem::Buffer(buffer)) = &self.parent {
1230                 src_pitch = [
1231                     bpp,
1232                     self.image_desc.row_pitch()? as usize,
1233                     self.image_desc.slice_pitch(),
1234                 ];
1235 
1236                 let (offset, size) = CLVec::calc_offset_size(src_origin, region, src_pitch);
1237                 tx_src = buffer.tx(q, ctx, offset, size, RWFlags::RD)?;
1238             } else {
1239                 tx_src = self.tx_image(
1240                     q,
1241                     ctx,
1242                     &create_pipe_box(src_origin, *region, src_parent.mem_type)?,
1243                     RWFlags::RD,
1244                 )?;
1245 
1246                 src_pitch = [1, tx_src.row_pitch() as usize, tx_src.slice_pitch()];
1247             }
1248 
1249             if let Some(Mem::Buffer(buffer)) = &dst.parent {
1250                 // If image is created from a buffer, use image's slice and row pitch instead
1251                 dst_pitch = [
1252                     bpp,
1253                     dst.image_desc.row_pitch()? as usize,
1254                     dst.image_desc.slice_pitch(),
1255                 ];
1256 
1257                 let (offset, size) = CLVec::calc_offset_size(dst_origin, region, dst_pitch);
1258                 tx_dst = buffer.tx(q, ctx, offset, size, RWFlags::WR)?;
1259             } else {
1260                 tx_dst = dst.tx_image(
1261                     q,
1262                     ctx,
1263                     &create_pipe_box(dst_origin, *region, dst_parent.mem_type)?,
1264                     RWFlags::WR,
1265                 )?;
1266 
1267                 dst_pitch = [1, tx_dst.row_pitch() as usize, tx_dst.slice_pitch()];
1268             }
1269 
1270             // Those pitch values cannot have 0 value in its coordinates
1271             debug_assert!(src_pitch[0] != 0 && src_pitch[1] != 0 && src_pitch[2] != 0);
1272             debug_assert!(dst_pitch[0] != 0 && dst_pitch[1] != 0 && dst_pitch[2] != 0);
1273 
1274             sw_copy(
1275                 tx_src.ptr(),
1276                 tx_dst.ptr(),
1277                 region,
1278                 &CLVec::default(),
1279                 src_pitch[1],
1280                 src_pitch[2],
1281                 &CLVec::default(),
1282                 dst_pitch[1],
1283                 dst_pitch[2],
1284                 bpp as u8,
1285             )
1286         } else {
1287             let bx = create_pipe_box(src_origin, *region, src_parent.mem_type)?;
1288             let mut dst_origin: [u32; 3] = dst_origin.try_into()?;
1289 
1290             if src_parent.mem_type == CL_MEM_OBJECT_IMAGE1D_ARRAY {
1291                 (dst_origin[1], dst_origin[2]) = (dst_origin[2], dst_origin[1]);
1292             }
1293 
1294             ctx.resource_copy_region(src_res, dst_res, &dst_origin, &bx);
1295         }
1296         Ok(())
1297     }
1298 
fill( &self, q: &Queue, ctx: &PipeContext, pattern: &[u32], origin: &CLVec<usize>, region: &CLVec<usize>, ) -> CLResult<()>1299     pub fn fill(
1300         &self,
1301         q: &Queue,
1302         ctx: &PipeContext,
1303         pattern: &[u32],
1304         origin: &CLVec<usize>,
1305         region: &CLVec<usize>,
1306     ) -> CLResult<()> {
1307         let res = self.get_res_of_dev(q.device)?;
1308 
1309         // make sure we allocate multiples of 4 bytes so drivers don't read out of bounds or
1310         // unaligned.
1311         // TODO: use div_ceil once it's available
1312         let pixel_size = align(
1313             self.image_format.pixel_size().unwrap().into(),
1314             size_of::<u32>(),
1315         );
1316         let mut new_pattern: Vec<u32> = vec![0; pixel_size / size_of::<u32>()];
1317 
1318         // we don't support CL_DEPTH for now
1319         assert!(pattern.len() == 4);
1320 
1321         // SAFETY: pointers have to be valid for read/writes of exactly one pixel of their
1322         // respective format.
1323         // `new_pattern` has the correct size due to the `size` above.
1324         // `pattern` is validated through the CL API and allows undefined behavior if not followed
1325         // by CL API rules. It's expected to be a 4 component array of 32 bit values, except for
1326         // CL_DEPTH where it's just one value.
1327         unsafe {
1328             util_format_pack_rgba(
1329                 self.pipe_format,
1330                 new_pattern.as_mut_ptr().cast(),
1331                 pattern.as_ptr().cast(),
1332                 1,
1333             );
1334         }
1335 
1336         // If image is created from a buffer, use clear_image_buffer instead
1337         if self.is_parent_buffer() {
1338             let strides = (
1339                 self.image_desc.row_pitch()? as usize,
1340                 self.image_desc.slice_pitch(),
1341             );
1342             ctx.clear_image_buffer(res, &new_pattern, origin, region, strides, pixel_size);
1343         } else {
1344             let bx = create_pipe_box(*origin, *region, self.mem_type)?;
1345             ctx.clear_texture(res, &new_pattern, &bx);
1346         }
1347 
1348         Ok(())
1349     }
1350 
is_parent_buffer(&self) -> bool1351     pub fn is_parent_buffer(&self) -> bool {
1352         matches!(self.parent, Some(Mem::Buffer(_)))
1353     }
1354 
map( &self, dev: &'static Device, origin: &CLVec<usize>, row_pitch: &mut usize, slice_pitch: &mut usize, ) -> CLResult<*mut c_void>1355     pub fn map(
1356         &self,
1357         dev: &'static Device,
1358         origin: &CLVec<usize>,
1359         row_pitch: &mut usize,
1360         slice_pitch: &mut usize,
1361     ) -> CLResult<*mut c_void> {
1362         // we might have a host_ptr shadow buffer or image created from buffer
1363         let ptr = if self.has_user_shadow_buffer(dev)? {
1364             *row_pitch = self.image_desc.image_row_pitch;
1365             *slice_pitch = self.image_desc.image_slice_pitch;
1366             self.host_ptr()
1367         } else if let Some(Mem::Buffer(buffer)) = &self.parent {
1368             *row_pitch = self.image_desc.image_row_pitch;
1369             *slice_pitch = self.image_desc.image_slice_pitch;
1370             buffer.map(dev, 0)?.as_ptr()
1371         } else {
1372             let mut lock = self.maps.lock().unwrap();
1373 
1374             if let Entry::Vacant(e) = lock.tx.entry(dev) {
1375                 let bx = self.image_desc.bx()?;
1376                 let (tx, res) = self.tx_raw_async(dev, &bx, RWFlags::RW)?;
1377                 e.insert(MappingTransfer::new(tx, res));
1378             } else {
1379                 lock.mark_pending(dev);
1380             }
1381 
1382             let tx = &lock.tx.get(dev).unwrap().tx;
1383 
1384             if self.image_desc.dims() > 1 {
1385                 *row_pitch = tx.row_pitch() as usize;
1386             }
1387             if self.image_desc.dims() > 2 || self.image_desc.is_array() {
1388                 *slice_pitch = tx.slice_pitch();
1389             }
1390 
1391             tx.ptr()
1392         };
1393 
1394         let ptr = unsafe {
1395             ptr.add(
1396                 *origin
1397                     * [
1398                         self.image_format.pixel_size().unwrap().into(),
1399                         *row_pitch,
1400                         *slice_pitch,
1401                     ],
1402             )
1403         };
1404 
1405         Ok(ptr)
1406     }
1407 
pipe_image_host_access(&self) -> u161408     pub fn pipe_image_host_access(&self) -> u16 {
1409         // those flags are all mutually exclusive
1410         (if bit_check(self.flags, CL_MEM_HOST_READ_ONLY) {
1411             PIPE_IMAGE_ACCESS_READ
1412         } else if bit_check(self.flags, CL_MEM_HOST_WRITE_ONLY) {
1413             PIPE_IMAGE_ACCESS_WRITE
1414         } else if bit_check(self.flags, CL_MEM_HOST_NO_ACCESS) {
1415             0
1416         } else {
1417             PIPE_IMAGE_ACCESS_READ_WRITE
1418         }) as u16
1419     }
1420 
read( &self, dst: MutMemoryPtr, q: &Queue, ctx: &PipeContext, region: &CLVec<usize>, src_origin: &CLVec<usize>, dst_row_pitch: usize, dst_slice_pitch: usize, ) -> CLResult<()>1421     pub fn read(
1422         &self,
1423         dst: MutMemoryPtr,
1424         q: &Queue,
1425         ctx: &PipeContext,
1426         region: &CLVec<usize>,
1427         src_origin: &CLVec<usize>,
1428         dst_row_pitch: usize,
1429         dst_slice_pitch: usize,
1430     ) -> CLResult<()> {
1431         let dst = dst.as_ptr();
1432         let pixel_size = self.image_format.pixel_size().unwrap();
1433 
1434         let tx;
1435         let src_row_pitch;
1436         let src_slice_pitch;
1437         if let Some(Mem::Buffer(buffer)) = &self.parent {
1438             src_row_pitch = self.image_desc.image_row_pitch;
1439             src_slice_pitch = self.image_desc.image_slice_pitch;
1440 
1441             let (offset, size) = CLVec::calc_offset_size(
1442                 src_origin,
1443                 region,
1444                 [pixel_size.into(), src_row_pitch, src_slice_pitch],
1445             );
1446 
1447             tx = buffer.tx(q, ctx, offset, size, RWFlags::RD)?;
1448         } else {
1449             let bx = create_pipe_box(*src_origin, *region, self.mem_type)?;
1450             tx = self.tx_image(q, ctx, &bx, RWFlags::RD)?;
1451             src_row_pitch = tx.row_pitch() as usize;
1452             src_slice_pitch = tx.slice_pitch();
1453         };
1454 
1455         sw_copy(
1456             tx.ptr(),
1457             dst,
1458             region,
1459             &CLVec::default(),
1460             src_row_pitch,
1461             src_slice_pitch,
1462             &CLVec::default(),
1463             dst_row_pitch,
1464             dst_slice_pitch,
1465             pixel_size,
1466         );
1467 
1468         Ok(())
1469     }
1470 
1471     // TODO: only sync on map when the memory is not mapped with discard
sync_shadow(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()>1472     pub fn sync_shadow(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
1473         let ptr = ptr.as_ptr();
1474         let mut lock = self.maps.lock().unwrap();
1475         if !lock.increase_ref(q.device, ptr) {
1476             return Ok(());
1477         }
1478 
1479         if self.has_user_shadow_buffer(q.device)? {
1480             self.read(
1481                 // SAFETY: it's required that applications do not cause data races
1482                 unsafe { MutMemoryPtr::from_ptr(self.host_ptr()) },
1483                 q,
1484                 ctx,
1485                 &self.image_desc.size(),
1486                 &CLVec::default(),
1487                 self.image_desc.image_row_pitch,
1488                 self.image_desc.image_slice_pitch,
1489             )
1490         } else {
1491             if let Some(shadow) = lock.tx.get(q.device).and_then(|tx| tx.shadow.as_ref()) {
1492                 let res = self.get_res_of_dev(q.device)?;
1493                 let bx = self.image_desc.bx()?;
1494                 ctx.resource_copy_region(res, shadow, &[0, 0, 0], &bx);
1495             }
1496             Ok(())
1497         }
1498     }
1499 
tx_image<'a>( &self, q: &Queue, ctx: &'a PipeContext, bx: &pipe_box, rw: RWFlags, ) -> CLResult<GuardedPipeTransfer<'a>>1500     fn tx_image<'a>(
1501         &self,
1502         q: &Queue,
1503         ctx: &'a PipeContext,
1504         bx: &pipe_box,
1505         rw: RWFlags,
1506     ) -> CLResult<GuardedPipeTransfer<'a>> {
1507         let r = self.get_res_of_dev(q.device)?;
1508         Ok(ctx
1509             .texture_map(r, bx, rw, ResourceMapType::Normal)
1510             .ok_or(CL_OUT_OF_RESOURCES)?
1511             .with_ctx(ctx))
1512     }
1513 
tx_raw_async( &self, dev: &Device, bx: &pipe_box, rw: RWFlags, ) -> CLResult<(PipeTransfer, Option<PipeResource>)>1514     fn tx_raw_async(
1515         &self,
1516         dev: &Device,
1517         bx: &pipe_box,
1518         rw: RWFlags,
1519     ) -> CLResult<(PipeTransfer, Option<PipeResource>)> {
1520         let r = self.get_res_of_dev(dev)?;
1521         let ctx = dev.helper_ctx();
1522 
1523         let tx = if can_map_directly(dev, r) {
1524             ctx.texture_map_directly(r, bx, rw)
1525         } else {
1526             None
1527         };
1528 
1529         if let Some(tx) = tx {
1530             Ok((tx, None))
1531         } else {
1532             let shadow = dev
1533                 .screen()
1534                 .resource_create_texture(
1535                     r.width(),
1536                     r.height(),
1537                     r.depth(),
1538                     r.array_size(),
1539                     cl_mem_type_to_texture_target(self.image_desc.image_type),
1540                     self.pipe_format,
1541                     ResourceType::Staging,
1542                     false,
1543                 )
1544                 .ok_or(CL_OUT_OF_RESOURCES)?;
1545             let tx = ctx
1546                 .texture_map_coherent(&shadow, bx, rw)
1547                 .ok_or(CL_OUT_OF_RESOURCES)?;
1548             Ok((tx, Some(shadow)))
1549         }
1550     }
1551 
1552     // TODO: only sync on unmap when the memory is not mapped for writing
unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()>1553     pub fn unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
1554         let ptr = ptr.as_ptr();
1555         let mut lock = self.maps.lock().unwrap();
1556         if !lock.contains_ptr(ptr) {
1557             return Ok(());
1558         }
1559 
1560         let (needs_sync, shadow) = lock.decrease_ref(ptr, q.device);
1561         if needs_sync {
1562             if let Some(shadow) = shadow {
1563                 let res = self.get_res_of_dev(q.device)?;
1564                 let bx = self.image_desc.bx()?;
1565                 ctx.resource_copy_region(shadow, res, &[0, 0, 0], &bx);
1566             } else if self.has_user_shadow_buffer(q.device)? {
1567                 self.write(
1568                     // SAFETY: it's required that applications do not cause data races
1569                     unsafe { ConstMemoryPtr::from_ptr(self.host_ptr()) },
1570                     q,
1571                     ctx,
1572                     &self.image_desc.size(),
1573                     self.image_desc.image_row_pitch,
1574                     self.image_desc.image_slice_pitch,
1575                     &CLVec::default(),
1576                 )?;
1577             }
1578         }
1579 
1580         lock.clean_up_tx(q.device, ctx);
1581 
1582         Ok(())
1583     }
1584 
write( &self, src: ConstMemoryPtr, q: &Queue, ctx: &PipeContext, region: &CLVec<usize>, src_row_pitch: usize, mut src_slice_pitch: usize, dst_origin: &CLVec<usize>, ) -> CLResult<()>1585     pub fn write(
1586         &self,
1587         src: ConstMemoryPtr,
1588         q: &Queue,
1589         ctx: &PipeContext,
1590         region: &CLVec<usize>,
1591         src_row_pitch: usize,
1592         mut src_slice_pitch: usize,
1593         dst_origin: &CLVec<usize>,
1594     ) -> CLResult<()> {
1595         let src = src.as_ptr();
1596         let dst_row_pitch = self.image_desc.image_row_pitch;
1597         let dst_slice_pitch = self.image_desc.image_slice_pitch;
1598 
1599         if let Some(Mem::Buffer(buffer)) = &self.parent {
1600             let pixel_size = self.image_format.pixel_size().unwrap();
1601             let (offset, size) = CLVec::calc_offset_size(
1602                 dst_origin,
1603                 region,
1604                 [pixel_size.into(), dst_row_pitch, dst_slice_pitch],
1605             );
1606             let tx = buffer.tx(q, ctx, offset, size, RWFlags::WR)?;
1607 
1608             sw_copy(
1609                 src,
1610                 tx.ptr(),
1611                 region,
1612                 &CLVec::default(),
1613                 src_row_pitch,
1614                 src_slice_pitch,
1615                 &CLVec::default(),
1616                 dst_row_pitch,
1617                 dst_slice_pitch,
1618                 pixel_size,
1619             );
1620         } else {
1621             let res = self.get_res_of_dev(q.device)?;
1622             let bx = create_pipe_box(*dst_origin, *region, self.mem_type)?;
1623 
1624             if self.mem_type == CL_MEM_OBJECT_IMAGE1D_ARRAY {
1625                 src_slice_pitch = src_row_pitch;
1626             }
1627 
1628             ctx.texture_subdata(
1629                 res,
1630                 &bx,
1631                 src,
1632                 src_row_pitch
1633                     .try_into()
1634                     .map_err(|_| CL_OUT_OF_HOST_MEMORY)?,
1635                 src_slice_pitch,
1636             );
1637         }
1638         Ok(())
1639     }
1640 }
1641 
1642 pub struct Sampler {
1643     pub base: CLObjectBase<CL_INVALID_SAMPLER>,
1644     pub context: Arc<Context>,
1645     pub normalized_coords: bool,
1646     pub addressing_mode: cl_addressing_mode,
1647     pub filter_mode: cl_filter_mode,
1648     pub props: Option<Properties<cl_sampler_properties>>,
1649 }
1650 
1651 impl_cl_type_trait!(cl_sampler, Sampler, CL_INVALID_SAMPLER);
1652 
1653 impl Sampler {
new( context: Arc<Context>, normalized_coords: bool, addressing_mode: cl_addressing_mode, filter_mode: cl_filter_mode, props: Option<Properties<cl_sampler_properties>>, ) -> Arc<Sampler>1654     pub fn new(
1655         context: Arc<Context>,
1656         normalized_coords: bool,
1657         addressing_mode: cl_addressing_mode,
1658         filter_mode: cl_filter_mode,
1659         props: Option<Properties<cl_sampler_properties>>,
1660     ) -> Arc<Sampler> {
1661         Arc::new(Self {
1662             base: CLObjectBase::new(RusticlTypes::Sampler),
1663             context: context,
1664             normalized_coords: normalized_coords,
1665             addressing_mode: addressing_mode,
1666             filter_mode: filter_mode,
1667             props: props,
1668         })
1669     }
1670 
nir_to_cl( addressing_mode: u32, filter_mode: u32, normalized_coords: u32, ) -> (cl_addressing_mode, cl_filter_mode, bool)1671     pub fn nir_to_cl(
1672         addressing_mode: u32,
1673         filter_mode: u32,
1674         normalized_coords: u32,
1675     ) -> (cl_addressing_mode, cl_filter_mode, bool) {
1676         let addr_mode = match addressing_mode {
1677             cl_sampler_addressing_mode::SAMPLER_ADDRESSING_MODE_NONE => CL_ADDRESS_NONE,
1678             cl_sampler_addressing_mode::SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE => {
1679                 CL_ADDRESS_CLAMP_TO_EDGE
1680             }
1681             cl_sampler_addressing_mode::SAMPLER_ADDRESSING_MODE_CLAMP => CL_ADDRESS_CLAMP,
1682             cl_sampler_addressing_mode::SAMPLER_ADDRESSING_MODE_REPEAT => CL_ADDRESS_REPEAT,
1683             cl_sampler_addressing_mode::SAMPLER_ADDRESSING_MODE_REPEAT_MIRRORED => {
1684                 CL_ADDRESS_MIRRORED_REPEAT
1685             }
1686             _ => panic!("unknown addressing_mode"),
1687         };
1688 
1689         let filter = match filter_mode {
1690             cl_sampler_filter_mode::SAMPLER_FILTER_MODE_NEAREST => CL_FILTER_NEAREST,
1691             cl_sampler_filter_mode::SAMPLER_FILTER_MODE_LINEAR => CL_FILTER_LINEAR,
1692             _ => panic!("unknown filter_mode"),
1693         };
1694 
1695         (addr_mode, filter, normalized_coords != 0)
1696     }
1697 
cl_to_pipe( (addressing_mode, filter_mode, normalized_coords): ( cl_addressing_mode, cl_filter_mode, bool, ), ) -> pipe_sampler_state1698     pub fn cl_to_pipe(
1699         (addressing_mode, filter_mode, normalized_coords): (
1700             cl_addressing_mode,
1701             cl_filter_mode,
1702             bool,
1703         ),
1704     ) -> pipe_sampler_state {
1705         let mut res = pipe_sampler_state::default();
1706 
1707         let wrap = match addressing_mode {
1708             CL_ADDRESS_CLAMP_TO_EDGE => pipe_tex_wrap::PIPE_TEX_WRAP_CLAMP_TO_EDGE,
1709             CL_ADDRESS_CLAMP => pipe_tex_wrap::PIPE_TEX_WRAP_CLAMP_TO_BORDER,
1710             CL_ADDRESS_REPEAT => pipe_tex_wrap::PIPE_TEX_WRAP_REPEAT,
1711             CL_ADDRESS_MIRRORED_REPEAT => pipe_tex_wrap::PIPE_TEX_WRAP_MIRROR_REPEAT,
1712             // TODO: what's a reasonable default?
1713             _ => pipe_tex_wrap::PIPE_TEX_WRAP_CLAMP_TO_EDGE,
1714         };
1715 
1716         let img_filter = match filter_mode {
1717             CL_FILTER_NEAREST => pipe_tex_filter::PIPE_TEX_FILTER_NEAREST,
1718             CL_FILTER_LINEAR => pipe_tex_filter::PIPE_TEX_FILTER_LINEAR,
1719             _ => panic!("unknown filter_mode"),
1720         };
1721 
1722         res.set_min_img_filter(img_filter);
1723         res.set_mag_img_filter(img_filter);
1724         res.set_unnormalized_coords((!normalized_coords).into());
1725         res.set_wrap_r(wrap);
1726         res.set_wrap_s(wrap);
1727         res.set_wrap_t(wrap);
1728 
1729         res
1730     }
1731 
pipe(&self) -> pipe_sampler_state1732     pub fn pipe(&self) -> pipe_sampler_state {
1733         Self::cl_to_pipe((
1734             self.addressing_mode,
1735             self.filter_mode,
1736             self.normalized_coords,
1737         ))
1738     }
1739 }
1740