• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use crate::api::icd::*;
2 use crate::api::util::*;
3 use crate::core::format::*;
4 use crate::core::platform::*;
5 use crate::core::util::*;
6 use crate::core::version::*;
7 use crate::impl_cl_type_trait_base;
8 
9 use mesa_rust::compiler::clc::*;
10 use mesa_rust::compiler::nir::*;
11 use mesa_rust::pipe::context::*;
12 use mesa_rust::pipe::device::load_screens;
13 use mesa_rust::pipe::fence::*;
14 use mesa_rust::pipe::resource::*;
15 use mesa_rust::pipe::screen::*;
16 use mesa_rust::pipe::transfer::PipeTransfer;
17 use mesa_rust_gen::*;
18 use mesa_rust_util::math::SetBitIndices;
19 use mesa_rust_util::static_assert;
20 use rusticl_opencl_gen::*;
21 
22 use std::cmp::max;
23 use std::cmp::min;
24 use std::collections::HashMap;
25 use std::convert::TryInto;
26 use std::env;
27 use std::ffi::CStr;
28 use std::mem::transmute;
29 use std::os::raw::*;
30 use std::sync::Arc;
31 use std::sync::Mutex;
32 use std::sync::MutexGuard;
33 
34 pub struct Device {
35     pub base: CLObjectBase<CL_INVALID_DEVICE>,
36     pub screen: Arc<PipeScreen>,
37     pub cl_version: CLVersion,
38     pub clc_version: CLVersion,
39     pub clc_versions: Vec<cl_name_version>,
40     pub device_type: u32,
41     pub embedded: bool,
42     pub extension_string: String,
43     pub extensions: Vec<cl_name_version>,
44     pub spirv_extensions: Vec<&'static CStr>,
45     pub clc_features: Vec<cl_name_version>,
46     pub formats: HashMap<cl_image_format, HashMap<cl_mem_object_type, cl_mem_flags>>,
47     pub lib_clc: NirShader,
48     pub caps: DeviceCaps,
49     helper_ctx: Mutex<PipeContext>,
50     reusable_ctx: Mutex<Vec<PipeContext>>,
51 }
52 
53 #[derive(Default)]
54 pub struct DeviceCaps {
55     pub has_3d_image_writes: bool,
56     pub has_depth_images: bool,
57     pub has_images: bool,
58     pub has_rw_images: bool,
59     pub has_timestamp: bool,
60     pub image_2d_size: u32,
61     pub max_read_images: u32,
62     pub max_write_images: u32,
63     pub timer_resolution: u32,
64 }
65 
66 impl DeviceCaps {
new(screen: &PipeScreen) -> Self67     fn new(screen: &PipeScreen) -> Self {
68         let cap_timestamp = screen.caps().query_timestamp;
69         let timer_resolution = screen.caps().timer_resolution;
70 
71         let max_write_images =
72             Self::shader_param(screen, pipe_shader_cap::PIPE_SHADER_CAP_MAX_SHADER_IMAGES) as u32;
73         let max_read_images =
74             Self::shader_param(screen, pipe_shader_cap::PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS) as u32;
75         let image_2d_size = screen.caps().max_texture_2d_size;
76 
77         let has_images = screen.caps().texture_sampler_independent &&
78             screen.caps().image_store_formatted &&
79             // The minimum value is 8 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
80             max_read_images >= 8 &&
81             // The minimum value is 8 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
82             max_write_images >= 8 &&
83             // The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
84             image_2d_size >= 2048;
85 
86         Self {
87             has_images: has_images,
88             has_timestamp: cap_timestamp && timer_resolution > 0,
89             image_2d_size: has_images.then_some(image_2d_size).unwrap_or_default(),
90             max_read_images: has_images.then_some(max_read_images).unwrap_or_default(),
91             max_write_images: has_images.then_some(max_write_images).unwrap_or_default(),
92             timer_resolution: timer_resolution,
93             ..Default::default()
94         }
95     }
96 
shader_param(screen: &PipeScreen, cap: pipe_shader_cap) -> i3297     fn shader_param(screen: &PipeScreen, cap: pipe_shader_cap) -> i32 {
98         screen.shader_param(pipe_shader_type::PIPE_SHADER_COMPUTE, cap)
99     }
100 }
101 
102 pub trait HelperContextWrapper {
103     #[must_use]
exec<F>(&self, func: F) -> PipeFence where F: Fn(&HelperContext)104     fn exec<F>(&self, func: F) -> PipeFence
105     where
106         F: Fn(&HelperContext);
107 
create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void108     fn create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void;
delete_compute_state(&self, cso: *mut c_void)109     fn delete_compute_state(&self, cso: *mut c_void);
compute_state_info(&self, state: *mut c_void) -> pipe_compute_state_object_info110     fn compute_state_info(&self, state: *mut c_void) -> pipe_compute_state_object_info;
compute_state_subgroup_size(&self, state: *mut c_void, block: &[u32; 3]) -> u32111     fn compute_state_subgroup_size(&self, state: *mut c_void, block: &[u32; 3]) -> u32;
112 
map_buffer_unsynchronized( &self, res: &PipeResource, offset: i32, size: i32, rw: RWFlags, ) -> Option<PipeTransfer>113     fn map_buffer_unsynchronized(
114         &self,
115         res: &PipeResource,
116         offset: i32,
117         size: i32,
118         rw: RWFlags,
119     ) -> Option<PipeTransfer>;
120 
map_texture_unsynchronized( &self, res: &PipeResource, bx: &pipe_box, rw: RWFlags, ) -> Option<PipeTransfer>121     fn map_texture_unsynchronized(
122         &self,
123         res: &PipeResource,
124         bx: &pipe_box,
125         rw: RWFlags,
126     ) -> Option<PipeTransfer>;
127 
is_create_fence_fd_supported(&self) -> bool128     fn is_create_fence_fd_supported(&self) -> bool;
import_fence(&self, fence_fd: &FenceFd) -> PipeFence129     fn import_fence(&self, fence_fd: &FenceFd) -> PipeFence;
130 }
131 
132 pub struct HelperContext<'a> {
133     lock: MutexGuard<'a, PipeContext>,
134 }
135 
136 impl HelperContext<'_> {
buffer_subdata( &self, res: &PipeResource, offset: c_uint, data: *const c_void, size: c_uint, )137     pub fn buffer_subdata(
138         &self,
139         res: &PipeResource,
140         offset: c_uint,
141         data: *const c_void,
142         size: c_uint,
143     ) {
144         self.lock.buffer_subdata(res, offset, data, size)
145     }
146 
texture_subdata( &self, res: &PipeResource, bx: &pipe_box, data: *const c_void, stride: u32, layer_stride: usize, )147     pub fn texture_subdata(
148         &self,
149         res: &PipeResource,
150         bx: &pipe_box,
151         data: *const c_void,
152         stride: u32,
153         layer_stride: usize,
154     ) {
155         self.lock
156             .texture_subdata(res, bx, data, stride, layer_stride)
157     }
158 }
159 
160 impl HelperContextWrapper for HelperContext<'_> {
exec<F>(&self, func: F) -> PipeFence where F: Fn(&HelperContext),161     fn exec<F>(&self, func: F) -> PipeFence
162     where
163         F: Fn(&HelperContext),
164     {
165         func(self);
166         self.lock.flush()
167     }
168 
create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void169     fn create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void {
170         self.lock.create_compute_state(nir, static_local_mem)
171     }
172 
delete_compute_state(&self, cso: *mut c_void)173     fn delete_compute_state(&self, cso: *mut c_void) {
174         self.lock.delete_compute_state(cso)
175     }
176 
compute_state_info(&self, state: *mut c_void) -> pipe_compute_state_object_info177     fn compute_state_info(&self, state: *mut c_void) -> pipe_compute_state_object_info {
178         self.lock.compute_state_info(state)
179     }
180 
compute_state_subgroup_size(&self, state: *mut c_void, block: &[u32; 3]) -> u32181     fn compute_state_subgroup_size(&self, state: *mut c_void, block: &[u32; 3]) -> u32 {
182         self.lock.compute_state_subgroup_size(state, block)
183     }
184 
map_buffer_unsynchronized( &self, res: &PipeResource, offset: i32, size: i32, rw: RWFlags, ) -> Option<PipeTransfer>185     fn map_buffer_unsynchronized(
186         &self,
187         res: &PipeResource,
188         offset: i32,
189         size: i32,
190         rw: RWFlags,
191     ) -> Option<PipeTransfer> {
192         self.lock.buffer_map_flags(
193             res,
194             offset,
195             size,
196             pipe_map_flags::PIPE_MAP_UNSYNCHRONIZED | rw.into(),
197         )
198     }
199 
map_texture_unsynchronized( &self, res: &PipeResource, bx: &pipe_box, rw: RWFlags, ) -> Option<PipeTransfer>200     fn map_texture_unsynchronized(
201         &self,
202         res: &PipeResource,
203         bx: &pipe_box,
204         rw: RWFlags,
205     ) -> Option<PipeTransfer> {
206         self.lock
207             .texture_map_flags(res, bx, pipe_map_flags::PIPE_MAP_UNSYNCHRONIZED | rw.into())
208     }
209 
is_create_fence_fd_supported(&self) -> bool210     fn is_create_fence_fd_supported(&self) -> bool {
211         self.lock.is_create_fence_fd_supported()
212     }
213 
import_fence(&self, fd: &FenceFd) -> PipeFence214     fn import_fence(&self, fd: &FenceFd) -> PipeFence {
215         self.lock.import_fence(fd)
216     }
217 }
218 
219 impl_cl_type_trait_base!(cl_device_id, Device, [Device], CL_INVALID_DEVICE);
220 
221 impl Device {
new(screen: PipeScreen) -> Option<Device>222     fn new(screen: PipeScreen) -> Option<Device> {
223         if !Self::check_valid(&screen) {
224             return None;
225         }
226 
227         let screen = Arc::new(screen);
228         // Create before loading libclc as llvmpipe only creates the shader cache with the first
229         // context being created.
230         let helper_ctx = screen.create_context()?;
231         let lib_clc = spirv::SPIRVBin::get_lib_clc(&screen);
232         if lib_clc.is_none() {
233             eprintln!("Libclc failed to load. Please make sure it is installed and provides spirv-mesa3d-.spv and/or spirv64-mesa3d-.spv");
234         }
235 
236         let mut d = Self {
237             caps: DeviceCaps::new(&screen),
238             base: CLObjectBase::new(RusticlTypes::Device),
239             helper_ctx: Mutex::new(helper_ctx),
240             screen: screen,
241             cl_version: CLVersion::Cl3_0,
242             clc_version: CLVersion::Cl3_0,
243             clc_versions: Vec::new(),
244             device_type: 0,
245             embedded: false,
246             extension_string: String::from(""),
247             extensions: Vec::new(),
248             spirv_extensions: Vec::new(),
249             clc_features: Vec::new(),
250             formats: HashMap::new(),
251             lib_clc: lib_clc?,
252             reusable_ctx: Mutex::new(Vec::new()),
253         };
254 
255         // check if we are embedded or full profile first
256         d.embedded = d.check_embedded_profile();
257 
258         d.set_device_type();
259 
260         d.fill_format_tables();
261 
262         // query supported extensions
263         d.fill_extensions();
264 
265         // now figure out what version we are
266         d.check_version();
267 
268         Some(d)
269     }
270 
271     /// Converts a temporary reference to a static if and only if this device lives inside static
272     /// memory.
to_static(&self) -> Option<&'static Self>273     pub fn to_static(&self) -> Option<&'static Self> {
274         devs().iter().find(|&dev| self == dev)
275     }
276 
fill_format_tables(&mut self)277     fn fill_format_tables(&mut self) {
278         // no need to do this if we don't support images
279         if !self.caps.has_images {
280             return;
281         }
282 
283         for f in FORMATS {
284             let mut fs = HashMap::new();
285             for t in CL_IMAGE_TYPES {
286                 // depth images are only valid for 2D and 2DArray
287                 if [CL_DEPTH, CL_DEPTH_STENCIL].contains(&f.cl_image_format.image_channel_order)
288                     && ![CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY].contains(&t)
289                 {
290                     continue;
291                 }
292 
293                 // the CTS doesn't test them, so let's not advertize them by accident if they are
294                 // broken
295                 if t == CL_MEM_OBJECT_IMAGE1D_BUFFER
296                     && [CL_RGB, CL_RGBx].contains(&f.cl_image_format.image_channel_order)
297                     && ![CL_UNORM_SHORT_565, CL_UNORM_SHORT_555]
298                         .contains(&f.cl_image_format.image_channel_data_type)
299                 {
300                     continue;
301                 }
302 
303                 let mut flags: cl_uint = 0;
304                 if self.screen.is_format_supported(
305                     f.pipe,
306                     cl_mem_type_to_texture_target(t),
307                     PIPE_BIND_SAMPLER_VIEW,
308                 ) {
309                     flags |= CL_MEM_READ_ONLY;
310                 }
311 
312                 // TODO: cl_khr_srgb_image_writes
313                 if !f.is_srgb
314                     && self.screen.is_format_supported(
315                         f.pipe,
316                         cl_mem_type_to_texture_target(t),
317                         PIPE_BIND_SHADER_IMAGE,
318                     )
319                 {
320                     flags |= CL_MEM_WRITE_ONLY | CL_MEM_KERNEL_READ_AND_WRITE;
321                 }
322 
323                 // TODO: cl_khr_srgb_image_writes
324                 if !f.is_srgb
325                     && self.screen.is_format_supported(
326                         f.pipe,
327                         cl_mem_type_to_texture_target(t),
328                         PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE,
329                     )
330                 {
331                     flags |= CL_MEM_READ_WRITE;
332                 }
333 
334                 fs.insert(t, flags as cl_mem_flags);
335             }
336 
337             // Restrict supported formats with 1DBuffer images. This is an OpenCL CTS workaround.
338             // See https://github.com/KhronosGroup/OpenCL-CTS/issues/1889
339             let image1d_mask = fs.get(&CL_MEM_OBJECT_IMAGE1D).copied().unwrap_or_default();
340             if let Some(entry) = fs.get_mut(&CL_MEM_OBJECT_IMAGE1D_BUFFER) {
341                 *entry &= image1d_mask;
342             }
343 
344             self.formats.insert(f.cl_image_format, fs);
345         }
346 
347         // now enable some caps based on advertized formats
348         self.caps.has_3d_image_writes = !FORMATS
349             .iter()
350             .filter(|f| {
351                 if self.embedded {
352                     f.req_for_embeded_read_or_write
353                 } else {
354                     f.req_for_full_read_or_write
355                 }
356             })
357             .map(|f| self.formats[&f.cl_image_format][&CL_MEM_OBJECT_IMAGE3D])
358             .any(|f| f & cl_mem_flags::from(CL_MEM_WRITE_ONLY) == 0);
359 
360         self.caps.has_depth_images = self
361             .formats
362             .iter()
363             .filter_map(|(k, v)| (k.image_channel_order == CL_DEPTH).then_some(v.values()))
364             .flatten()
365             .any(|mask| *mask != 0);
366 
367         // if we can't advertize 3d image write ext, we have to disable them all
368         if !self.caps.has_3d_image_writes {
369             for f in &mut self.formats.values_mut() {
370                 *f.get_mut(&CL_MEM_OBJECT_IMAGE3D).unwrap() &= !cl_mem_flags::from(
371                     CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_KERNEL_READ_AND_WRITE,
372                 );
373             }
374         }
375 
376         // we require formatted loads
377         if self.screen.caps().image_load_formatted {
378             // "For embedded profiles devices that support reading from and writing to the same
379             // image object from the same kernel instance (see CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS)
380             // there is no required minimum list of supported image formats."
381             self.caps.has_rw_images = if self.embedded {
382                 FORMATS
383                     .iter()
384                     .flat_map(|f| self.formats[&f.cl_image_format].values())
385                     .any(|f| f & cl_mem_flags::from(CL_MEM_KERNEL_READ_AND_WRITE) != 0)
386             } else {
387                 !FORMATS
388                     .iter()
389                     .filter(|f| f.req_for_full_read_and_write)
390                     .flat_map(|f| &self.formats[&f.cl_image_format])
391                     // maybe? things being all optional is kinda a mess
392                     .filter(|(target, _)| **target != CL_MEM_OBJECT_IMAGE3D)
393                     .any(|(_, mask)| mask & cl_mem_flags::from(CL_MEM_KERNEL_READ_AND_WRITE) == 0)
394             }
395         }
396 
397         // if we can't advertize read_write images, disable them all
398         if !self.caps.has_rw_images {
399             self.formats
400                 .values_mut()
401                 .flat_map(|f| f.values_mut())
402                 .for_each(|f| *f &= !cl_mem_flags::from(CL_MEM_KERNEL_READ_AND_WRITE));
403         }
404     }
405 
check_valid(screen: &PipeScreen) -> bool406     fn check_valid(screen: &PipeScreen) -> bool {
407         if !screen.caps().compute
408             || screen.shader_param(
409                 pipe_shader_type::PIPE_SHADER_COMPUTE,
410                 pipe_shader_cap::PIPE_SHADER_CAP_SUPPORTED_IRS,
411             ) & (1 << (pipe_shader_ir::PIPE_SHADER_IR_NIR as i32))
412                 == 0
413         {
414             return false;
415         }
416 
417         // CL_DEVICE_MAX_PARAMETER_SIZE
418         // For this minimum value, only a maximum of 128 arguments can be passed to a kernel
419         if (screen.shader_param(
420             pipe_shader_type::PIPE_SHADER_COMPUTE,
421             pipe_shader_cap::PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE,
422         ) as u32)
423             < 128
424         {
425             return false;
426         }
427         true
428     }
429 
check_custom(&self) -> bool430     fn check_custom(&self) -> bool {
431         // Max size of memory object allocation in bytes. The minimum value is
432         // max(min(1024 × 1024 × 1024, 1/4th of CL_DEVICE_GLOBAL_MEM_SIZE), 32 × 1024 × 1024)
433         // for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
434         let mut limit = min(1024 * 1024 * 1024, self.global_mem_size() / 4);
435         limit = max(limit, 32 * 1024 * 1024);
436         if self.max_mem_alloc() < limit {
437             return true;
438         }
439 
440         // CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS
441         // The minimum value is 3 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
442         if self.max_grid_dimensions() < 3 {
443             return true;
444         }
445 
446         if self.embedded {
447             // CL_DEVICE_MAX_PARAMETER_SIZE
448             // The minimum value is 256 bytes for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
449             if self.param_max_size() < 256 {
450                 return true;
451             }
452 
453             // CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
454             // The minimum value is 1 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
455             if self.const_max_size() < 1024 {
456                 return true;
457             }
458 
459             // TODO
460             // CL_DEVICE_MAX_CONSTANT_ARGS
461             // The minimum value is 4 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
462 
463             // CL_DEVICE_LOCAL_MEM_SIZE
464             // The minimum value is 1 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
465             if self.local_mem_size() < 1024 {
466                 return true;
467             }
468         } else {
469             // CL 1.0 spec:
470             // CL_DEVICE_MAX_PARAMETER_SIZE
471             // The minimum value is 256 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
472             if self.param_max_size() < 256 {
473                 return true;
474             }
475 
476             // CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
477             // The minimum value is 64 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
478             if self.const_max_size() < 64 * 1024 {
479                 return true;
480             }
481 
482             // TODO
483             // CL_DEVICE_MAX_CONSTANT_ARGS
484             // The minimum value is 8 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
485 
486             // CL 1.0 spec:
487             // CL_DEVICE_LOCAL_MEM_SIZE
488             // The minimum value is 16 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
489             if self.local_mem_size() < 16 * 1024 {
490                 return true;
491             }
492         }
493 
494         false
495     }
496 
check_embedded_profile(&self) -> bool497     fn check_embedded_profile(&self) -> bool {
498         if self.caps.has_images {
499             // The minimum value is 16 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
500             if self.max_samplers() < 16 ||
501             // The minimum value is 128 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
502             self.caps.max_read_images < 128 ||
503             // The minimum value is 64 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
504             self.caps.max_write_images < 64 ||
505             // The minimum value is 16384 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
506             self.caps.image_2d_size < 16384 ||
507             // The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
508             self.image_array_size() < 2048 ||
509             // The minimum value is 65536 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
510             self.image_buffer_max_size_pixels() < 65536
511             {
512                 return true;
513             }
514 
515             // TODO check req formats
516         }
517         !self.int64_supported()
518     }
519 
parse_env_version() -> Option<CLVersion>520     fn parse_env_version() -> Option<CLVersion> {
521         let val = env::var("RUSTICL_CL_VERSION").ok()?;
522         let (major, minor) = val.split_once('.')?;
523         let major = major.parse().ok()?;
524         let minor = minor.parse().ok()?;
525         mk_cl_version(major, minor, 0).try_into().ok()
526     }
527 
528     // TODO add CLC checks
check_version(&mut self)529     fn check_version(&mut self) {
530         let exts: Vec<&str> = self.extension_string.split(' ').collect();
531         let mut res = CLVersion::Cl3_0;
532 
533         if self.embedded {
534             if self.caps.has_images {
535                 let supports_array_writes = !FORMATS
536                     .iter()
537                     .filter(|f| f.req_for_embeded_read_or_write)
538                     .map(|f| self.formats.get(&f.cl_image_format).unwrap())
539                     .map(|f| f.get(&CL_MEM_OBJECT_IMAGE2D_ARRAY).unwrap())
540                     .any(|f| *f & cl_mem_flags::from(CL_MEM_WRITE_ONLY) == 0);
541                 if self.image_3d_size() < 2048 || !supports_array_writes {
542                     res = CLVersion::Cl1_2;
543                 }
544             }
545         }
546 
547         // TODO: check image 1D, 1Dbuffer, 1Darray and 2Darray support explicitly
548         if self.caps.has_images {
549             // The minimum value is 256 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
550             if self.image_array_size() < 256 ||
551             // The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
552             self.image_buffer_max_size_pixels() < 2048
553             {
554                 res = CLVersion::Cl1_1;
555             }
556         }
557 
558         if self.embedded {
559             // The minimum value for the EMBEDDED profile is 1 KB.
560             if self.printf_buffer_size() < 1024 {
561                 res = CLVersion::Cl1_1;
562             }
563         } else {
564             // The minimum value for the FULL profile is 1 MB.
565             if self.printf_buffer_size() < 1024 * 1024 {
566                 res = CLVersion::Cl1_1;
567             }
568         }
569 
570         if !exts.contains(&"cl_khr_byte_addressable_store")
571          || !exts.contains(&"cl_khr_global_int32_base_atomics")
572          || !exts.contains(&"cl_khr_global_int32_extended_atomics")
573          || !exts.contains(&"cl_khr_local_int32_base_atomics")
574          || !exts.contains(&"cl_khr_local_int32_extended_atomics")
575          // The following modifications are made to the OpenCL 1.1 platform layer and runtime (sections 4 and 5):
576          // The minimum FULL_PROFILE value for CL_DEVICE_MAX_PARAMETER_SIZE increased from 256 to 1024 bytes
577          || self.param_max_size() < 1024
578          // The minimum FULL_PROFILE value for CL_DEVICE_LOCAL_MEM_SIZE increased from 16 KB to 32 KB.
579          || self.local_mem_size() < 32 * 1024
580         {
581             res = CLVersion::Cl1_0;
582         }
583 
584         if let Some(val) = Self::parse_env_version() {
585             res = val;
586         }
587 
588         if res >= CLVersion::Cl3_0 {
589             self.clc_versions
590                 .push(mk_cl_version_ext(3, 0, 0, "OpenCL C"));
591         }
592 
593         if res >= CLVersion::Cl1_2 {
594             self.clc_versions
595                 .push(mk_cl_version_ext(1, 2, 0, "OpenCL C"));
596         }
597 
598         if res >= CLVersion::Cl1_1 {
599             self.clc_versions
600                 .push(mk_cl_version_ext(1, 1, 0, "OpenCL C"));
601         }
602 
603         if res >= CLVersion::Cl1_0 {
604             self.clc_versions
605                 .push(mk_cl_version_ext(1, 0, 0, "OpenCL C"));
606         }
607 
608         self.cl_version = res;
609         self.clc_version = min(CLVersion::Cl1_2, res);
610     }
611 
fill_extensions(&mut self)612     fn fill_extensions(&mut self) {
613         let mut exts_str: Vec<String> = Vec::new();
614         let mut exts = PLATFORM_EXTENSIONS.to_vec();
615         let mut feats = Vec::new();
616         let mut spirv_exts = Vec::new();
617         let mut add_ext = |major, minor, patch, ext: &str| {
618             exts.push(mk_cl_version_ext(major, minor, patch, ext));
619             exts_str.push(ext.to_owned());
620         };
621         let mut add_feat = |major, minor, patch, feat: &str| {
622             feats.push(mk_cl_version_ext(major, minor, patch, feat));
623         };
624         let mut add_spirv = |ext| {
625             spirv_exts.push(ext);
626         };
627 
628         // add extensions all drivers support for now
629         add_ext(1, 0, 0, "cl_khr_global_int32_base_atomics");
630         add_ext(1, 0, 0, "cl_khr_global_int32_extended_atomics");
631         add_ext(2, 0, 0, "cl_khr_integer_dot_product");
632         add_feat(
633             2,
634             0,
635             0,
636             "__opencl_c_integer_dot_product_input_4x8bit_packed",
637         );
638         add_feat(2, 0, 0, "__opencl_c_integer_dot_product_input_4x8bit");
639         add_ext(1, 0, 0, "cl_khr_local_int32_base_atomics");
640         add_ext(1, 0, 0, "cl_khr_local_int32_extended_atomics");
641 
642         add_spirv(c"SPV_KHR_expect_assume");
643         add_spirv(c"SPV_KHR_float_controls");
644         add_spirv(c"SPV_KHR_integer_dot_product");
645         add_spirv(c"SPV_KHR_no_integer_wrap_decoration");
646 
647         if self.fp16_supported() {
648             add_ext(1, 0, 0, "cl_khr_fp16");
649         }
650 
651         if self.fp64_supported() {
652             add_ext(1, 0, 0, "cl_khr_fp64");
653             add_feat(1, 0, 0, "__opencl_c_fp64");
654         }
655 
656         if self.is_gl_sharing_supported() {
657             add_ext(1, 0, 0, "cl_khr_gl_sharing");
658         }
659 
660         if self.int64_supported() {
661             if self.embedded {
662                 add_ext(1, 0, 0, "cles_khr_int64");
663             };
664 
665             add_feat(1, 0, 0, "__opencl_c_int64");
666         }
667 
668         if self.caps.has_images {
669             add_feat(1, 0, 0, "__opencl_c_images");
670 
671             if self.image2d_from_buffer_supported() {
672                 add_ext(1, 0, 0, "cl_khr_image2d_from_buffer");
673             }
674 
675             if self.caps.has_rw_images {
676                 add_feat(1, 0, 0, "__opencl_c_read_write_images");
677             }
678 
679             if self.caps.has_3d_image_writes {
680                 add_ext(1, 0, 0, "cl_khr_3d_image_writes");
681                 add_feat(1, 0, 0, "__opencl_c_3d_image_writes");
682             }
683 
684             if self.caps.has_depth_images {
685                 add_ext(1, 0, 0, "cl_khr_depth_images");
686             }
687         }
688 
689         if self.pci_info().is_some() {
690             add_ext(1, 0, 0, "cl_khr_pci_bus_info");
691         }
692 
693         if self.screen().device_uuid().is_some() && self.screen().driver_uuid().is_some() {
694             static_assert!(PIPE_UUID_SIZE == CL_UUID_SIZE_KHR);
695             static_assert!(PIPE_LUID_SIZE == CL_LUID_SIZE_KHR);
696 
697             add_ext(1, 0, 0, "cl_khr_device_uuid");
698         }
699 
700         if self.subgroups_supported() {
701             // requires CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS
702             //add_ext(1, 0, 0, "cl_khr_subgroups");
703             add_feat(1, 0, 0, "__opencl_c_subgroups");
704 
705             // we have lowering in `nir_lower_subgroups`, drivers can just use that
706             add_ext(1, 0, 0, "cl_khr_subgroup_shuffle");
707             add_ext(1, 0, 0, "cl_khr_subgroup_shuffle_relative");
708         }
709 
710         if self.svm_supported() {
711             add_ext(1, 0, 0, "cl_arm_shared_virtual_memory");
712         }
713 
714         self.extensions = exts;
715         self.clc_features = feats;
716         self.extension_string = format!("{} {}", PLATFORM_EXTENSION_STR, exts_str.join(" "));
717         self.spirv_extensions = spirv_exts;
718     }
719 
shader_param(&self, cap: pipe_shader_cap) -> i32720     fn shader_param(&self, cap: pipe_shader_cap) -> i32 {
721         self.screen
722             .shader_param(pipe_shader_type::PIPE_SHADER_COMPUTE, cap)
723     }
724 
all() -> Vec<Device>725     pub fn all() -> Vec<Device> {
726         let mut devs: Vec<_> = load_screens().filter_map(Device::new).collect();
727 
728         // Pick a default device. One must be the default one no matter what. And custom devices can
729         // only be that one if they are the only devices available.
730         //
731         // The entry with the highest value will be the default device.
732         let default = devs.iter_mut().max_by_key(|dev| {
733             let mut val = if dev.device_type == CL_DEVICE_TYPE_CUSTOM {
734                 // needs to be small enough so it's always going to be the smallest value
735                 -100
736             } else if dev.device_type == CL_DEVICE_TYPE_CPU {
737                 0
738             } else if dev.unified_memory() {
739                 // we give unified memory devices max priority, because we don't want to spin up the
740                 // discrete GPU on laptops by default.
741                 100
742             } else {
743                 10
744             };
745 
746             // we deprioritize zink for now.
747             if dev.screen.driver_name() == c"zink" {
748                 val -= 1;
749             }
750 
751             val
752         });
753 
754         if let Some(default) = default {
755             default.device_type |= CL_DEVICE_TYPE_DEFAULT;
756         }
757 
758         devs
759     }
760 
address_bits(&self) -> cl_uint761     pub fn address_bits(&self) -> cl_uint {
762         self.screen
763             .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_ADDRESS_BITS)
764     }
765 
const_max_size(&self) -> cl_ulong766     pub fn const_max_size(&self) -> cl_ulong {
767         min(
768             // Needed to fix the `api min_max_constant_buffer_size` CL CTS test as it can't really
769             // handle arbitrary values here. We might want to reconsider later and figure out how to
770             // advertize higher values without tripping of the test.
771             // should be at least 1 << 16 (native UBO size on NVidia)
772             // advertising more just in case it benefits other hardware
773             1 << 26,
774             min(
775                 self.max_mem_alloc(),
776                 self.screen.caps().max_shader_buffer_size as u64,
777             ),
778         )
779     }
780 
const_max_count(&self) -> cl_uint781     pub fn const_max_count(&self) -> cl_uint {
782         self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_MAX_CONST_BUFFERS) as cl_uint
783     }
784 
set_device_type(&mut self)785     fn set_device_type(&mut self) {
786         let env = env::var("RUSTICL_DEVICE_TYPE").ok().and_then(|env| {
787             Some(match &*env.to_ascii_lowercase() {
788                 "accelerator" => CL_DEVICE_TYPE_ACCELERATOR,
789                 "cpu" => CL_DEVICE_TYPE_CPU,
790                 "custom" => CL_DEVICE_TYPE_CUSTOM,
791                 "gpu" => CL_DEVICE_TYPE_GPU,
792                 // if no valid string is set we treat is as no value was set
793                 _ => return None,
794             })
795         });
796 
797         self.device_type = if let Some(env) = env {
798             env
799         } else if self.check_custom() {
800             CL_DEVICE_TYPE_CUSTOM
801         } else {
802             match self.screen.device_type() {
803                 pipe_loader_device_type::PIPE_LOADER_DEVICE_SOFTWARE => CL_DEVICE_TYPE_CPU,
804                 pipe_loader_device_type::PIPE_LOADER_DEVICE_PCI => CL_DEVICE_TYPE_GPU,
805                 pipe_loader_device_type::PIPE_LOADER_DEVICE_PLATFORM => CL_DEVICE_TYPE_GPU,
806                 pipe_loader_device_type::NUM_PIPE_LOADER_DEVICE_TYPES => CL_DEVICE_TYPE_CUSTOM,
807             }
808         };
809     }
810 
fp16_supported(&self) -> bool811     pub fn fp16_supported(&self) -> bool {
812         if !Platform::features().fp16 {
813             return false;
814         }
815 
816         self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_FP16) != 0
817     }
818 
fp64_supported(&self) -> bool819     pub fn fp64_supported(&self) -> bool {
820         if !Platform::features().fp64 {
821             return false;
822         }
823 
824         self.screen.caps().doubles
825     }
826 
is_gl_sharing_supported(&self) -> bool827     pub fn is_gl_sharing_supported(&self) -> bool {
828         self.screen.caps().cl_gl_sharing
829             && self.screen.caps().dmabuf != 0
830             && !self.is_device_software()
831             && self.screen.is_res_handle_supported()
832             && self.screen.device_uuid().is_some()
833             && self.helper_ctx().is_create_fence_fd_supported()
834     }
835 
is_device_software(&self) -> bool836     pub fn is_device_software(&self) -> bool {
837         self.screen.device_type() == pipe_loader_device_type::PIPE_LOADER_DEVICE_SOFTWARE
838     }
839 
get_nir_options(&self) -> nir_shader_compiler_options840     pub fn get_nir_options(&self) -> nir_shader_compiler_options {
841         unsafe {
842             *self
843                 .screen
844                 .nir_shader_compiler_options(pipe_shader_type::PIPE_SHADER_COMPUTE)
845         }
846     }
847 
sdot_4x8_supported(&self) -> bool848     pub fn sdot_4x8_supported(&self) -> bool {
849         self.get_nir_options().has_sdot_4x8
850     }
851 
udot_4x8_supported(&self) -> bool852     pub fn udot_4x8_supported(&self) -> bool {
853         self.get_nir_options().has_udot_4x8
854     }
855 
sudot_4x8_supported(&self) -> bool856     pub fn sudot_4x8_supported(&self) -> bool {
857         self.get_nir_options().has_sudot_4x8
858     }
859 
pack_32_4x8_supported(&self) -> bool860     pub fn pack_32_4x8_supported(&self) -> bool {
861         self.get_nir_options().has_pack_32_4x8
862     }
863 
sdot_4x8_sat_supported(&self) -> bool864     pub fn sdot_4x8_sat_supported(&self) -> bool {
865         self.get_nir_options().has_sdot_4x8_sat
866     }
867 
udot_4x8_sat_supported(&self) -> bool868     pub fn udot_4x8_sat_supported(&self) -> bool {
869         self.get_nir_options().has_udot_4x8_sat
870     }
871 
sudot_4x8_sat_supported(&self) -> bool872     pub fn sudot_4x8_sat_supported(&self) -> bool {
873         self.get_nir_options().has_sudot_4x8_sat
874     }
875 
fp64_is_softfp(&self) -> bool876     pub fn fp64_is_softfp(&self) -> bool {
877         bit_check(
878             self.get_nir_options().lower_doubles_options as u32,
879             nir_lower_doubles_options::nir_lower_fp64_full_software as u32,
880         )
881     }
882 
int64_supported(&self) -> bool883     pub fn int64_supported(&self) -> bool {
884         self.screen.caps().int64
885     }
886 
global_mem_size(&self) -> cl_ulong887     pub fn global_mem_size(&self) -> cl_ulong {
888         if let Some(memory_info) = self.screen().query_memory_info() {
889             let memory: cl_ulong = if memory_info.total_device_memory != 0 {
890                 memory_info.total_device_memory.into()
891             } else {
892                 memory_info.total_staging_memory.into()
893             };
894             memory * 1024
895         } else {
896             self.screen
897                 .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE)
898         }
899     }
900 
image_3d_size(&self) -> usize901     pub fn image_3d_size(&self) -> usize {
902         if self.caps.has_images {
903             1 << (self.screen.caps().max_texture_3d_levels - 1)
904         } else {
905             0
906         }
907     }
908 
image_3d_supported(&self) -> bool909     pub fn image_3d_supported(&self) -> bool {
910         self.caps.has_images && self.screen.caps().max_texture_3d_levels != 0
911     }
912 
image_array_size(&self) -> usize913     pub fn image_array_size(&self) -> usize {
914         if self.caps.has_images {
915             self.screen.caps().max_texture_array_layers as usize
916         } else {
917             0
918         }
919     }
920 
image_pitch_alignment(&self) -> cl_uint921     pub fn image_pitch_alignment(&self) -> cl_uint {
922         if self.caps.has_images {
923             self.screen.caps().linear_image_pitch_alignment
924         } else {
925             0
926         }
927     }
928 
image_base_address_alignment(&self) -> cl_uint929     pub fn image_base_address_alignment(&self) -> cl_uint {
930         if self.caps.has_images {
931             self.screen.caps().linear_image_base_address_alignment
932         } else {
933             0
934         }
935     }
936 
image_buffer_max_size_pixels(&self) -> usize937     pub fn image_buffer_max_size_pixels(&self) -> usize {
938         if self.caps.has_images {
939             min(
940                 // The CTS requires it to not exceed `CL_MAX_MEM_ALLOC_SIZE`, also we need to divide
941                 // by the max pixel size, because this cap is in pixels, not bytes.
942                 //
943                 // The CTS also casts this to int in a couple of places,
944                 // see: https://github.com/KhronosGroup/OpenCL-CTS/issues/2056
945                 min(
946                     self.max_mem_alloc() / MAX_PIXEL_SIZE_BYTES,
947                     c_int::MAX as cl_ulong,
948                 ),
949                 self.screen.caps().max_texel_buffer_elements as cl_ulong,
950             ) as usize
951         } else {
952             0
953         }
954     }
955 
image2d_from_buffer_supported(&self) -> bool956     pub fn image2d_from_buffer_supported(&self) -> bool {
957         self.image_pitch_alignment() != 0 && self.image_base_address_alignment() != 0
958     }
959 
little_endian(&self) -> bool960     pub fn little_endian(&self) -> bool {
961         let endianness = self.screen.caps().endianness;
962         endianness == pipe_endian::PIPE_ENDIAN_LITTLE
963     }
964 
local_mem_size(&self) -> cl_ulong965     pub fn local_mem_size(&self) -> cl_ulong {
966         self.screen
967             .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE)
968     }
969 
max_block_sizes(&self) -> Vec<usize>970     pub fn max_block_sizes(&self) -> Vec<usize> {
971         let v: Vec<u64> = self
972             .screen
973             .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
974         v.into_iter().map(|v| v as usize).collect()
975     }
976 
max_grid_size(&self) -> Vec<u64>977     pub fn max_grid_size(&self) -> Vec<u64> {
978         let v: Vec<u64> = self
979             .screen
980             .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_GRID_SIZE);
981 
982         v.into_iter()
983             .map(|a| min(a, Platform::dbg().max_grid_size))
984             .collect()
985     }
986 
max_clock_freq(&self) -> cl_uint987     pub fn max_clock_freq(&self) -> cl_uint {
988         self.screen
989             .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY)
990     }
991 
max_compute_units(&self) -> cl_uint992     pub fn max_compute_units(&self) -> cl_uint {
993         self.screen
994             .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)
995     }
996 
max_grid_dimensions(&self) -> cl_uint997     pub fn max_grid_dimensions(&self) -> cl_uint {
998         ComputeParam::<u64>::compute_param(
999             self.screen.as_ref(),
1000             pipe_compute_cap::PIPE_COMPUTE_CAP_GRID_DIMENSION,
1001         ) as cl_uint
1002     }
1003 
max_mem_alloc(&self) -> cl_ulong1004     pub fn max_mem_alloc(&self) -> cl_ulong {
1005         // TODO: at the moment gallium doesn't support bigger buffers
1006         min(
1007             self.screen
1008                 .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE),
1009             0x80000000,
1010         )
1011     }
1012 
max_samplers(&self) -> cl_uint1013     pub fn max_samplers(&self) -> cl_uint {
1014         self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS) as cl_uint
1015     }
1016 
max_threads_per_block(&self) -> usize1017     pub fn max_threads_per_block(&self) -> usize {
1018         ComputeParam::<u64>::compute_param(
1019             self.screen.as_ref(),
1020             pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK,
1021         ) as usize
1022     }
1023 
param_max_size(&self) -> usize1024     pub fn param_max_size(&self) -> usize {
1025         min(
1026             self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE) as u32,
1027             4 * 1024,
1028         ) as usize
1029     }
1030 
printf_buffer_size(&self) -> usize1031     pub fn printf_buffer_size(&self) -> usize {
1032         1024 * 1024
1033     }
1034 
pci_info(&self) -> Option<cl_device_pci_bus_info_khr>1035     pub fn pci_info(&self) -> Option<cl_device_pci_bus_info_khr> {
1036         if self.screen.device_type() != pipe_loader_device_type::PIPE_LOADER_DEVICE_PCI {
1037             return None;
1038         }
1039 
1040         let pci_domain = self.screen.caps().pci_group as cl_uint;
1041         let pci_bus = self.screen.caps().pci_bus as cl_uint;
1042         let pci_device = self.screen.caps().pci_device as cl_uint;
1043         let pci_function = self.screen.caps().pci_function as cl_uint;
1044 
1045         Some(cl_device_pci_bus_info_khr {
1046             pci_domain,
1047             pci_bus,
1048             pci_device,
1049             pci_function,
1050         })
1051     }
1052 
reusable_ctx(&self) -> MutexGuard<Vec<PipeContext>>1053     fn reusable_ctx(&self) -> MutexGuard<Vec<PipeContext>> {
1054         self.reusable_ctx.lock().unwrap()
1055     }
1056 
screen(&self) -> &Arc<PipeScreen>1057     pub fn screen(&self) -> &Arc<PipeScreen> {
1058         &self.screen
1059     }
1060 
create_context(&self) -> Option<PipeContext>1061     pub fn create_context(&self) -> Option<PipeContext> {
1062         self.reusable_ctx()
1063             .pop()
1064             .or_else(|| self.screen.create_context())
1065     }
1066 
recycle_context(&self, ctx: PipeContext)1067     pub fn recycle_context(&self, ctx: PipeContext) {
1068         if Platform::dbg().reuse_context {
1069             self.reusable_ctx().push(ctx);
1070         }
1071     }
1072 
subgroup_sizes(&self) -> Vec<usize>1073     pub fn subgroup_sizes(&self) -> Vec<usize> {
1074         let subgroup_size = ComputeParam::<u32>::compute_param(
1075             self.screen.as_ref(),
1076             pipe_compute_cap::PIPE_COMPUTE_CAP_SUBGROUP_SIZES,
1077         );
1078 
1079         SetBitIndices::from_msb(subgroup_size)
1080             .map(|bit| 1 << bit)
1081             .collect()
1082     }
1083 
max_subgroups(&self) -> u321084     pub fn max_subgroups(&self) -> u32 {
1085         ComputeParam::<u32>::compute_param(
1086             self.screen.as_ref(),
1087             pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_SUBGROUPS,
1088         )
1089     }
1090 
subgroups_supported(&self) -> bool1091     pub fn subgroups_supported(&self) -> bool {
1092         let subgroup_sizes = self.subgroup_sizes().len();
1093 
1094         // we need to be able to query a CSO for subgroup sizes if multiple sub group sizes are
1095         // supported, doing it without shareable shaders isn't practical
1096         self.max_subgroups() > 0
1097             && (subgroup_sizes == 1 || (subgroup_sizes > 1 && self.shareable_shaders()))
1098     }
1099 
svm_supported(&self) -> bool1100     pub fn svm_supported(&self) -> bool {
1101         self.screen.caps().system_svm
1102     }
1103 
unified_memory(&self) -> bool1104     pub fn unified_memory(&self) -> bool {
1105         self.screen.caps().uma
1106     }
1107 
vendor_id(&self) -> cl_uint1108     pub fn vendor_id(&self) -> cl_uint {
1109         let id = self.screen.caps().vendor_id;
1110         if id == 0xFFFFFFFF {
1111             return 0;
1112         }
1113         id
1114     }
1115 
prefers_real_buffer_in_cb0(&self) -> bool1116     pub fn prefers_real_buffer_in_cb0(&self) -> bool {
1117         self.screen.caps().prefer_real_buffer_in_constbuf0
1118     }
1119 
shareable_shaders(&self) -> bool1120     pub fn shareable_shaders(&self) -> bool {
1121         self.screen.caps().shareable_shaders
1122     }
1123 
images_as_deref(&self) -> bool1124     pub fn images_as_deref(&self) -> bool {
1125         self.screen.caps().nir_images_as_deref
1126     }
1127 
samplers_as_deref(&self) -> bool1128     pub fn samplers_as_deref(&self) -> bool {
1129         self.screen.caps().nir_samplers_as_deref
1130     }
1131 
helper_ctx(&self) -> impl HelperContextWrapper + '_1132     pub fn helper_ctx(&self) -> impl HelperContextWrapper + '_ {
1133         HelperContext {
1134             lock: self.helper_ctx.lock().unwrap(),
1135         }
1136     }
1137 
cl_features(&self) -> clc_optional_features1138     pub fn cl_features(&self) -> clc_optional_features {
1139         let subgroups_supported = self.subgroups_supported();
1140         clc_optional_features {
1141             fp16: self.fp16_supported(),
1142             fp64: self.fp64_supported(),
1143             int64: self.int64_supported(),
1144             images: self.caps.has_images,
1145             images_depth: self.caps.has_depth_images,
1146             images_read_write: self.caps.has_rw_images,
1147             images_write_3d: self.caps.has_3d_image_writes,
1148             integer_dot_product: true,
1149             subgroups: subgroups_supported,
1150             subgroups_shuffle: subgroups_supported,
1151             subgroups_shuffle_relative: subgroups_supported,
1152             ..Default::default()
1153         }
1154     }
1155 }
1156 
devs() -> &'static Vec<Device>1157 pub fn devs() -> &'static Vec<Device> {
1158     &Platform::get().devs
1159 }
1160 
get_devs_for_type(device_type: cl_device_type) -> Vec<&'static Device>1161 pub fn get_devs_for_type(device_type: cl_device_type) -> Vec<&'static Device> {
1162     devs()
1163         .iter()
1164         .filter(|d| device_type & d.device_type as cl_device_type != 0)
1165         .collect()
1166 }
1167 
get_dev_for_uuid(uuid: [c_char; UUID_SIZE]) -> Option<&'static Device>1168 pub fn get_dev_for_uuid(uuid: [c_char; UUID_SIZE]) -> Option<&'static Device> {
1169     devs().iter().find(|d| {
1170         let uuid: [c_uchar; UUID_SIZE] = unsafe { transmute(uuid) };
1171         uuid == d.screen().device_uuid().unwrap()
1172     })
1173 }
1174