• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use crate::api::icd::*;
2 use crate::api::util::*;
3 use crate::core::format::*;
4 use crate::core::platform::*;
5 use crate::core::util::*;
6 use crate::core::version::*;
7 use crate::impl_cl_type_trait_base;
8 
9 use mesa_rust::compiler::clc::*;
10 use mesa_rust::compiler::nir::*;
11 use mesa_rust::pipe::context::*;
12 use mesa_rust::pipe::device::load_screens;
13 use mesa_rust::pipe::fence::*;
14 use mesa_rust::pipe::resource::*;
15 use mesa_rust::pipe::screen::*;
16 use mesa_rust::pipe::transfer::PipeTransfer;
17 use mesa_rust_gen::*;
18 use mesa_rust_util::math::SetBitIndices;
19 use mesa_rust_util::static_assert;
20 use rusticl_opencl_gen::*;
21 
22 use std::cmp::max;
23 use std::cmp::min;
24 use std::collections::HashMap;
25 use std::convert::TryInto;
26 use std::env;
27 use std::ffi::CStr;
28 use std::mem::transmute;
29 use std::os::raw::*;
30 use std::sync::Arc;
31 use std::sync::Mutex;
32 use std::sync::MutexGuard;
33 
34 pub struct Device {
35     pub base: CLObjectBase<CL_INVALID_DEVICE>,
36     pub screen: Arc<PipeScreen>,
37     pub cl_version: CLVersion,
38     pub clc_version: CLVersion,
39     pub clc_versions: Vec<cl_name_version>,
40     pub device_type: u32,
41     pub embedded: bool,
42     pub extension_string: String,
43     pub extensions: Vec<cl_name_version>,
44     pub spirv_extensions: Vec<&'static CStr>,
45     pub clc_features: Vec<cl_name_version>,
46     pub formats: HashMap<cl_image_format, HashMap<cl_mem_object_type, cl_mem_flags>>,
47     pub lib_clc: NirShader,
48     pub caps: DeviceCaps,
49     helper_ctx: Mutex<PipeContext>,
50     reusable_ctx: Mutex<Vec<PipeContext>>,
51 }
52 
53 #[derive(Default)]
54 pub struct DeviceCaps {
55     pub has_3d_image_writes: bool,
56     pub has_depth_images: bool,
57     pub has_images: bool,
58     pub has_rw_images: bool,
59     pub has_timestamp: bool,
60     pub image_2d_size: u32,
61     pub max_read_images: u32,
62     pub max_write_images: u32,
63     pub timer_resolution: u32,
64 }
65 
66 impl DeviceCaps {
new(screen: &PipeScreen) -> Self67     fn new(screen: &PipeScreen) -> Self {
68         let cap_timestamp = screen.caps().query_timestamp;
69         let timer_resolution = screen.caps().timer_resolution;
70 
71         let max_write_images =
72             Self::shader_param(screen, pipe_shader_cap::PIPE_SHADER_CAP_MAX_SHADER_IMAGES) as u32;
73         let max_read_images =
74             Self::shader_param(screen, pipe_shader_cap::PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS) as u32;
75         let image_2d_size = screen.caps().max_texture_2d_size;
76 
77         let has_images = screen.caps().texture_sampler_independent &&
78             screen.caps().image_store_formatted &&
79             // The minimum value is 8 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
80             max_read_images >= 8 &&
81             // The minimum value is 8 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
82             max_write_images >= 8 &&
83             // The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
84             image_2d_size >= 2048;
85 
86         Self {
87             has_images: has_images,
88             has_timestamp: cap_timestamp && timer_resolution > 0,
89             image_2d_size: has_images.then_some(image_2d_size).unwrap_or_default(),
90             max_read_images: has_images.then_some(max_read_images).unwrap_or_default(),
91             max_write_images: has_images.then_some(max_write_images).unwrap_or_default(),
92             timer_resolution: timer_resolution,
93             ..Default::default()
94         }
95     }
96 
shader_param(screen: &PipeScreen, cap: pipe_shader_cap) -> i3297     fn shader_param(screen: &PipeScreen, cap: pipe_shader_cap) -> i32 {
98         screen.shader_param(pipe_shader_type::PIPE_SHADER_COMPUTE, cap)
99     }
100 }
101 
102 pub trait HelperContextWrapper {
103     #[must_use]
exec<F>(&self, func: F) -> PipeFence where F: Fn(&HelperContext)104     fn exec<F>(&self, func: F) -> PipeFence
105     where
106         F: Fn(&HelperContext);
107 
create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void108     fn create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void;
delete_compute_state(&self, cso: *mut c_void)109     fn delete_compute_state(&self, cso: *mut c_void);
compute_state_info(&self, state: *mut c_void) -> pipe_compute_state_object_info110     fn compute_state_info(&self, state: *mut c_void) -> pipe_compute_state_object_info;
compute_state_subgroup_size(&self, state: *mut c_void, block: &[u32; 3]) -> u32111     fn compute_state_subgroup_size(&self, state: *mut c_void, block: &[u32; 3]) -> u32;
112 
map_buffer_unsynchronized( &self, res: &PipeResource, offset: i32, size: i32, rw: RWFlags, ) -> Option<PipeTransfer>113     fn map_buffer_unsynchronized(
114         &self,
115         res: &PipeResource,
116         offset: i32,
117         size: i32,
118         rw: RWFlags,
119     ) -> Option<PipeTransfer>;
120 
map_texture_unsynchronized( &self, res: &PipeResource, bx: &pipe_box, rw: RWFlags, ) -> Option<PipeTransfer>121     fn map_texture_unsynchronized(
122         &self,
123         res: &PipeResource,
124         bx: &pipe_box,
125         rw: RWFlags,
126     ) -> Option<PipeTransfer>;
127 
is_create_fence_fd_supported(&self) -> bool128     fn is_create_fence_fd_supported(&self) -> bool;
import_fence(&self, fence_fd: &FenceFd) -> PipeFence129     fn import_fence(&self, fence_fd: &FenceFd) -> PipeFence;
130 }
131 
132 pub struct HelperContext<'a> {
133     lock: MutexGuard<'a, PipeContext>,
134 }
135 
136 impl HelperContext<'_> {
buffer_subdata( &self, res: &PipeResource, offset: c_uint, data: *const c_void, size: c_uint, )137     pub fn buffer_subdata(
138         &self,
139         res: &PipeResource,
140         offset: c_uint,
141         data: *const c_void,
142         size: c_uint,
143     ) {
144         self.lock.buffer_subdata(res, offset, data, size)
145     }
146 
texture_subdata( &self, res: &PipeResource, bx: &pipe_box, data: *const c_void, stride: u32, layer_stride: usize, )147     pub fn texture_subdata(
148         &self,
149         res: &PipeResource,
150         bx: &pipe_box,
151         data: *const c_void,
152         stride: u32,
153         layer_stride: usize,
154     ) {
155         self.lock
156             .texture_subdata(res, bx, data, stride, layer_stride)
157     }
158 }
159 
160 impl HelperContextWrapper for HelperContext<'_> {
exec<F>(&self, func: F) -> PipeFence where F: Fn(&HelperContext),161     fn exec<F>(&self, func: F) -> PipeFence
162     where
163         F: Fn(&HelperContext),
164     {
165         func(self);
166         self.lock.flush()
167     }
168 
create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void169     fn create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void {
170         self.lock.create_compute_state(nir, static_local_mem)
171     }
172 
delete_compute_state(&self, cso: *mut c_void)173     fn delete_compute_state(&self, cso: *mut c_void) {
174         self.lock.delete_compute_state(cso)
175     }
176 
compute_state_info(&self, state: *mut c_void) -> pipe_compute_state_object_info177     fn compute_state_info(&self, state: *mut c_void) -> pipe_compute_state_object_info {
178         self.lock.compute_state_info(state)
179     }
180 
compute_state_subgroup_size(&self, state: *mut c_void, block: &[u32; 3]) -> u32181     fn compute_state_subgroup_size(&self, state: *mut c_void, block: &[u32; 3]) -> u32 {
182         self.lock.compute_state_subgroup_size(state, block)
183     }
184 
map_buffer_unsynchronized( &self, res: &PipeResource, offset: i32, size: i32, rw: RWFlags, ) -> Option<PipeTransfer>185     fn map_buffer_unsynchronized(
186         &self,
187         res: &PipeResource,
188         offset: i32,
189         size: i32,
190         rw: RWFlags,
191     ) -> Option<PipeTransfer> {
192         self.lock.buffer_map_flags(
193             res,
194             offset,
195             size,
196             pipe_map_flags::PIPE_MAP_UNSYNCHRONIZED | rw.into(),
197         )
198     }
199 
map_texture_unsynchronized( &self, res: &PipeResource, bx: &pipe_box, rw: RWFlags, ) -> Option<PipeTransfer>200     fn map_texture_unsynchronized(
201         &self,
202         res: &PipeResource,
203         bx: &pipe_box,
204         rw: RWFlags,
205     ) -> Option<PipeTransfer> {
206         self.lock
207             .texture_map_flags(res, bx, pipe_map_flags::PIPE_MAP_UNSYNCHRONIZED | rw.into())
208     }
209 
is_create_fence_fd_supported(&self) -> bool210     fn is_create_fence_fd_supported(&self) -> bool {
211         self.lock.is_create_fence_fd_supported()
212     }
213 
import_fence(&self, fd: &FenceFd) -> PipeFence214     fn import_fence(&self, fd: &FenceFd) -> PipeFence {
215         self.lock.import_fence(fd)
216     }
217 }
218 
219 impl_cl_type_trait_base!(cl_device_id, Device, [Device], CL_INVALID_DEVICE);
220 
221 impl Device {
new(screen: PipeScreen) -> Option<Device>222     fn new(screen: PipeScreen) -> Option<Device> {
223         if !Self::check_valid(&screen) {
224             return None;
225         }
226 
227         let screen = Arc::new(screen);
228         // Create before loading libclc as llvmpipe only creates the shader cache with the first
229         // context being created.
230         let helper_ctx = screen.create_context()?;
231         let lib_clc = spirv::SPIRVBin::get_lib_clc(&screen);
232         if lib_clc.is_none() {
233             eprintln!("Libclc failed to load. Please make sure it is installed and provides spirv-mesa3d-.spv and/or spirv64-mesa3d-.spv");
234         }
235 
236         let mut d = Self {
237             caps: DeviceCaps::new(&screen),
238             base: CLObjectBase::new(RusticlTypes::Device),
239             helper_ctx: Mutex::new(helper_ctx),
240             screen: screen,
241             cl_version: CLVersion::Cl3_0,
242             clc_version: CLVersion::Cl3_0,
243             clc_versions: Vec::new(),
244             device_type: 0,
245             embedded: false,
246             extension_string: String::from(""),
247             extensions: Vec::new(),
248             spirv_extensions: Vec::new(),
249             clc_features: Vec::new(),
250             formats: HashMap::new(),
251             lib_clc: lib_clc?,
252             reusable_ctx: Mutex::new(Vec::new()),
253         };
254 
255         // check if we are embedded or full profile first
256         d.embedded = d.check_embedded_profile();
257 
258         d.set_device_type();
259 
260         d.fill_format_tables();
261 
262         // query supported extensions
263         d.fill_extensions();
264 
265         // now figure out what version we are
266         d.check_version();
267 
268         Some(d)
269     }
270 
271     /// Converts a temporary reference to a static if and only if this device lives inside static
272     /// memory.
to_static(&self) -> Option<&'static Self>273     pub fn to_static(&self) -> Option<&'static Self> {
274         devs().iter().find(|&dev| self == dev)
275     }
276 
fill_format_tables(&mut self)277     fn fill_format_tables(&mut self) {
278         // no need to do this if we don't support images
279         if !self.caps.has_images {
280             return;
281         }
282 
283         for f in FORMATS {
284             let mut fs = HashMap::new();
285             for t in CL_IMAGE_TYPES {
286                 // depth images are only valid for 2D and 2DArray
287                 if [CL_DEPTH, CL_DEPTH_STENCIL].contains(&f.cl_image_format.image_channel_order)
288                     && ![CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY].contains(&t)
289                 {
290                     continue;
291                 }
292 
293                 // the CTS doesn't test them, so let's not advertize them by accident if they are
294                 // broken
295                 if t == CL_MEM_OBJECT_IMAGE1D_BUFFER
296                     && [CL_RGB, CL_RGBx].contains(&f.cl_image_format.image_channel_order)
297                     && ![CL_UNORM_SHORT_565, CL_UNORM_SHORT_555]
298                         .contains(&f.cl_image_format.image_channel_data_type)
299                 {
300                     continue;
301                 }
302 
303                 let mut flags: cl_uint = 0;
304                 if self.screen.is_format_supported(
305                     f.pipe,
306                     cl_mem_type_to_texture_target(t),
307                     PIPE_BIND_SAMPLER_VIEW,
308                 ) {
309                     flags |= CL_MEM_READ_ONLY;
310                 }
311 
312                 // TODO: cl_khr_srgb_image_writes
313                 if !f.is_srgb
314                     && self.screen.is_format_supported(
315                         f.pipe,
316                         cl_mem_type_to_texture_target(t),
317                         PIPE_BIND_SHADER_IMAGE,
318                     )
319                 {
320                     flags |= CL_MEM_WRITE_ONLY | CL_MEM_KERNEL_READ_AND_WRITE;
321                 }
322 
323                 // TODO: cl_khr_srgb_image_writes
324                 if !f.is_srgb
325                     && self.screen.is_format_supported(
326                         f.pipe,
327                         cl_mem_type_to_texture_target(t),
328                         PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE,
329                     )
330                 {
331                     flags |= CL_MEM_READ_WRITE;
332                 }
333 
334                 fs.insert(t, flags as cl_mem_flags);
335             }
336 
337             // Restrict supported formats with 1DBuffer images. This is an OpenCL CTS workaround.
338             // See https://github.com/KhronosGroup/OpenCL-CTS/issues/1889
339             let image1d_mask = fs.get(&CL_MEM_OBJECT_IMAGE1D).copied().unwrap_or_default();
340             if let Some(entry) = fs.get_mut(&CL_MEM_OBJECT_IMAGE1D_BUFFER) {
341                 *entry &= image1d_mask;
342             }
343 
344             self.formats.insert(f.cl_image_format, fs);
345         }
346 
347         // now enable some caps based on advertized formats
348         self.caps.has_3d_image_writes = !FORMATS
349             .iter()
350             .filter(|f| {
351                 if self.embedded {
352                     f.req_for_embeded_read_or_write
353                 } else {
354                     f.req_for_full_read_or_write
355                 }
356             })
357             .map(|f| self.formats[&f.cl_image_format][&CL_MEM_OBJECT_IMAGE3D])
358             .any(|f| f & cl_mem_flags::from(CL_MEM_WRITE_ONLY) == 0);
359 
360         self.caps.has_depth_images = self
361             .formats
362             .iter()
363             .filter_map(|(k, v)| (k.image_channel_order == CL_DEPTH).then_some(v.values()))
364             .flatten()
365             .any(|mask| *mask != 0);
366 
367         // if we can't advertize 3d image write ext, we have to disable them all
368         if !self.caps.has_3d_image_writes {
369             for f in &mut self.formats.values_mut() {
370                 *f.get_mut(&CL_MEM_OBJECT_IMAGE3D).unwrap() &= !cl_mem_flags::from(
371                     CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE | CL_MEM_KERNEL_READ_AND_WRITE,
372                 );
373             }
374         }
375 
376         // we require formatted loads
377         if self.screen.caps().image_load_formatted {
378             // "For embedded profiles devices that support reading from and writing to the same
379             // image object from the same kernel instance (see CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS)
380             // there is no required minimum list of supported image formats."
381             self.caps.has_rw_images = if self.embedded {
382                 FORMATS
383                     .iter()
384                     .flat_map(|f| self.formats[&f.cl_image_format].values())
385                     .any(|f| f & cl_mem_flags::from(CL_MEM_KERNEL_READ_AND_WRITE) != 0)
386             } else {
387                 !FORMATS
388                     .iter()
389                     .filter(|f| f.req_for_full_read_and_write)
390                     .flat_map(|f| &self.formats[&f.cl_image_format])
391                     // maybe? things being all optional is kinda a mess
392                     .filter(|(target, _)| **target != CL_MEM_OBJECT_IMAGE3D)
393                     .any(|(_, mask)| mask & cl_mem_flags::from(CL_MEM_KERNEL_READ_AND_WRITE) == 0)
394             }
395         }
396 
397         // if we can't advertize read_write images, disable them all
398         if !self.caps.has_rw_images {
399             self.formats
400                 .values_mut()
401                 .flat_map(|f| f.values_mut())
402                 .for_each(|f| *f &= !cl_mem_flags::from(CL_MEM_KERNEL_READ_AND_WRITE));
403         }
404     }
405 
check_valid(screen: &PipeScreen) -> bool406     fn check_valid(screen: &PipeScreen) -> bool {
407         if !screen.caps().compute
408             || screen.shader_param(
409                 pipe_shader_type::PIPE_SHADER_COMPUTE,
410                 pipe_shader_cap::PIPE_SHADER_CAP_SUPPORTED_IRS,
411             ) & (1 << (pipe_shader_ir::PIPE_SHADER_IR_NIR as i32))
412                 == 0
413         {
414             return false;
415         }
416 
417         // CL_DEVICE_MAX_PARAMETER_SIZE
418         // For this minimum value, only a maximum of 128 arguments can be passed to a kernel
419         if (screen.shader_param(
420             pipe_shader_type::PIPE_SHADER_COMPUTE,
421             pipe_shader_cap::PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE,
422         ) as u32)
423             < 128
424         {
425             return false;
426         }
427         true
428     }
429 
check_custom(&self) -> bool430     fn check_custom(&self) -> bool {
431         // Max size of memory object allocation in bytes. The minimum value is
432         // max(min(1024 × 1024 × 1024, 1/4th of CL_DEVICE_GLOBAL_MEM_SIZE), 32 × 1024 × 1024)
433         // for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
434         let mut limit = min(1024 * 1024 * 1024, self.global_mem_size() / 4);
435         limit = max(limit, 32 * 1024 * 1024);
436         if self.max_mem_alloc() < limit {
437             return true;
438         }
439 
440         // CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS
441         // The minimum value is 3 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
442         if self.max_grid_dimensions() < 3 {
443             return true;
444         }
445 
446         if self.embedded {
447             // CL_DEVICE_MAX_PARAMETER_SIZE
448             // The minimum value is 256 bytes for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
449             if self.param_max_size() < 256 {
450                 return true;
451             }
452 
453             // CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
454             // The minimum value is 1 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
455             if self.const_max_size() < 1024 {
456                 return true;
457             }
458 
459             // TODO
460             // CL_DEVICE_MAX_CONSTANT_ARGS
461             // The minimum value is 4 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
462 
463             // CL_DEVICE_LOCAL_MEM_SIZE
464             // The minimum value is 1 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
465             if self.local_mem_size() < 1024 {
466                 return true;
467             }
468         } else {
469             // CL 1.0 spec:
470             // CL_DEVICE_MAX_PARAMETER_SIZE
471             // The minimum value is 256 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
472             if self.param_max_size() < 256 {
473                 return true;
474             }
475 
476             // CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
477             // The minimum value is 64 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
478             if self.const_max_size() < 64 * 1024 {
479                 return true;
480             }
481 
482             // TODO
483             // CL_DEVICE_MAX_CONSTANT_ARGS
484             // The minimum value is 8 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
485 
486             // CL 1.0 spec:
487             // CL_DEVICE_LOCAL_MEM_SIZE
488             // The minimum value is 16 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
489             if self.local_mem_size() < 16 * 1024 {
490                 return true;
491             }
492         }
493 
494         false
495     }
496 
check_embedded_profile(&self) -> bool497     fn check_embedded_profile(&self) -> bool {
498         if self.caps.has_images {
499             // The minimum value is 16 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
500             if self.max_samplers() < 16 ||
501             // The minimum value is 128 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
502             self.caps.max_read_images < 128 ||
503             // The minimum value is 64 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
504             self.caps.max_write_images < 64 ||
505             // The minimum value is 16384 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
506             self.caps.image_2d_size < 16384 ||
507             // The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
508             self.image_array_size() < 2048 ||
509             // The minimum value is 65536 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
510             self.image_buffer_max_size_pixels() < 65536
511             {
512                 return true;
513             }
514 
515             // TODO check req formats
516         }
517         !self.int64_supported()
518     }
519 
parse_env_version() -> Option<CLVersion>520     fn parse_env_version() -> Option<CLVersion> {
521         let val = env::var("RUSTICL_CL_VERSION").ok()?;
522         let (major, minor) = val.split_once('.')?;
523         let major = major.parse().ok()?;
524         let minor = minor.parse().ok()?;
525         mk_cl_version(major, minor, 0).try_into().ok()
526     }
527 
528     // TODO add CLC checks
check_version(&mut self)529     fn check_version(&mut self) {
530         let exts: Vec<&str> = self.extension_string.split(' ').collect();
531         let mut res = CLVersion::Cl3_0;
532 
533         if self.embedded {
534             if self.caps.has_images {
535                 let supports_array_writes = !FORMATS
536                     .iter()
537                     .filter(|f| f.req_for_embeded_read_or_write)
538                     .map(|f| self.formats.get(&f.cl_image_format).unwrap())
539                     .map(|f| f.get(&CL_MEM_OBJECT_IMAGE2D_ARRAY).unwrap())
540                     .any(|f| *f & cl_mem_flags::from(CL_MEM_WRITE_ONLY) == 0);
541                 if self.image_3d_size() < 2048 || !supports_array_writes {
542                     res = CLVersion::Cl1_2;
543                 }
544             }
545         }
546 
547         // TODO: check image 1D, 1Dbuffer, 1Darray and 2Darray support explicitly
548         if self.caps.has_images {
549             // The minimum value is 256 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
550             if self.image_array_size() < 256 ||
551             // The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
552             self.image_buffer_max_size_pixels() < 2048
553             {
554                 res = CLVersion::Cl1_1;
555             }
556         }
557 
558         if self.embedded {
559             // The minimum value for the EMBEDDED profile is 1 KB.
560             if self.printf_buffer_size() < 1024 {
561                 res = CLVersion::Cl1_1;
562             }
563         } else {
564             // The minimum value for the FULL profile is 1 MB.
565             if self.printf_buffer_size() < 1024 * 1024 {
566                 res = CLVersion::Cl1_1;
567             }
568         }
569 
570         if !exts.contains(&"cl_khr_byte_addressable_store")
571          || !exts.contains(&"cl_khr_global_int32_base_atomics")
572          || !exts.contains(&"cl_khr_global_int32_extended_atomics")
573          || !exts.contains(&"cl_khr_local_int32_base_atomics")
574          || !exts.contains(&"cl_khr_local_int32_extended_atomics")
575          // The following modifications are made to the OpenCL 1.1 platform layer and runtime (sections 4 and 5):
576          // The minimum FULL_PROFILE value for CL_DEVICE_MAX_PARAMETER_SIZE increased from 256 to 1024 bytes
577          || self.param_max_size() < 1024
578          // The minimum FULL_PROFILE value for CL_DEVICE_LOCAL_MEM_SIZE increased from 16 KB to 32 KB.
579          || self.local_mem_size() < 32 * 1024
580         {
581             res = CLVersion::Cl1_0;
582         }
583 
584         if let Some(val) = Self::parse_env_version() {
585             res = val;
586         }
587 
588         if res >= CLVersion::Cl3_0 {
589             self.clc_versions
590                 .push(mk_cl_version_ext(3, 0, 0, "OpenCL C"));
591         }
592 
593         if res >= CLVersion::Cl1_2 {
594             self.clc_versions
595                 .push(mk_cl_version_ext(1, 2, 0, "OpenCL C"));
596         }
597 
598         if res >= CLVersion::Cl1_1 {
599             self.clc_versions
600                 .push(mk_cl_version_ext(1, 1, 0, "OpenCL C"));
601         }
602 
603         if res >= CLVersion::Cl1_0 {
604             self.clc_versions
605                 .push(mk_cl_version_ext(1, 0, 0, "OpenCL C"));
606         }
607 
608         self.cl_version = res;
609         self.clc_version = min(CLVersion::Cl1_2, res);
610     }
611 
fill_extensions(&mut self)612     fn fill_extensions(&mut self) {
613         let mut exts_str: Vec<String> = Vec::new();
614         let mut exts = Vec::new();
615         let mut feats = Vec::new();
616         let mut spirv_exts = Vec::new();
617         let mut add_ext = |major, minor, patch, ext: &str| {
618             exts.push(mk_cl_version_ext(major, minor, patch, ext));
619             exts_str.push(ext.to_owned());
620         };
621         let mut add_feat = |major, minor, patch, feat: &str| {
622             feats.push(mk_cl_version_ext(major, minor, patch, feat));
623         };
624         let mut add_spirv = |ext| {
625             spirv_exts.push(ext);
626         };
627 
628         // add extensions all drivers support for now
629         add_ext(1, 0, 0, "cl_khr_byte_addressable_store");
630         add_ext(1, 0, 0, "cl_khr_create_command_queue");
631         add_ext(1, 0, 0, "cl_khr_expect_assume");
632         add_ext(1, 0, 0, "cl_khr_extended_versioning");
633         add_ext(1, 0, 0, "cl_khr_global_int32_base_atomics");
634         add_ext(1, 0, 0, "cl_khr_global_int32_extended_atomics");
635         add_ext(1, 0, 0, "cl_khr_il_program");
636         add_ext(1, 0, 0, "cl_khr_local_int32_base_atomics");
637         add_ext(1, 0, 0, "cl_khr_local_int32_extended_atomics");
638         add_ext(2, 0, 0, "cl_khr_integer_dot_product");
639         add_ext(1, 0, 0, "cl_khr_spirv_no_integer_wrap_decoration");
640         add_ext(1, 0, 0, "cl_khr_suggested_local_work_size");
641 
642         add_feat(2, 0, 0, "__opencl_c_integer_dot_product_input_4x8bit");
643         add_feat(
644             2,
645             0,
646             0,
647             "__opencl_c_integer_dot_product_input_4x8bit_packed",
648         );
649 
650         add_spirv(c"SPV_KHR_expect_assume");
651         add_spirv(c"SPV_KHR_float_controls");
652         add_spirv(c"SPV_KHR_integer_dot_product");
653         add_spirv(c"SPV_KHR_no_integer_wrap_decoration");
654 
655         if self.fp16_supported() {
656             add_ext(1, 0, 0, "cl_khr_fp16");
657         }
658 
659         if self.fp64_supported() {
660             add_ext(1, 0, 0, "cl_khr_fp64");
661             add_feat(1, 0, 0, "__opencl_c_fp64");
662         }
663 
664         if self.is_gl_sharing_supported() {
665             add_ext(1, 0, 0, "cl_khr_gl_sharing");
666         }
667 
668         if self.int64_supported() {
669             if self.embedded {
670                 add_ext(1, 0, 0, "cles_khr_int64");
671             };
672 
673             add_feat(1, 0, 0, "__opencl_c_int64");
674         }
675 
676         if self.caps.has_images {
677             add_feat(1, 0, 0, "__opencl_c_images");
678 
679             if self.image2d_from_buffer_supported() {
680                 add_ext(1, 0, 0, "cl_khr_image2d_from_buffer");
681             }
682 
683             if self.caps.has_rw_images {
684                 add_feat(1, 0, 0, "__opencl_c_read_write_images");
685             }
686 
687             if self.caps.has_3d_image_writes {
688                 add_ext(1, 0, 0, "cl_khr_3d_image_writes");
689                 add_feat(1, 0, 0, "__opencl_c_3d_image_writes");
690             }
691 
692             if self.caps.has_depth_images {
693                 add_ext(1, 0, 0, "cl_khr_depth_images");
694             }
695         }
696 
697         if self.pci_info().is_some() {
698             add_ext(1, 0, 0, "cl_khr_pci_bus_info");
699         }
700 
701         if self.screen().device_uuid().is_some() && self.screen().driver_uuid().is_some() {
702             static_assert!(PIPE_UUID_SIZE == CL_UUID_SIZE_KHR);
703             static_assert!(PIPE_LUID_SIZE == CL_LUID_SIZE_KHR);
704 
705             add_ext(1, 0, 0, "cl_khr_device_uuid");
706         }
707 
708         if self.subgroups_supported() {
709             // requires CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS
710             //add_ext(1, 0, 0, "cl_khr_subgroups");
711             add_feat(1, 0, 0, "__opencl_c_subgroups");
712 
713             // we have lowering in `nir_lower_subgroups`, drivers can just use that
714             add_ext(1, 0, 0, "cl_khr_subgroup_shuffle");
715             add_ext(1, 0, 0, "cl_khr_subgroup_shuffle_relative");
716         }
717 
718         if self.svm_supported() {
719             add_ext(1, 0, 0, "cl_arm_shared_virtual_memory");
720         }
721 
722         self.extensions = exts;
723         self.clc_features = feats;
724         self.extension_string = exts_str.join(" ");
725         self.spirv_extensions = spirv_exts;
726     }
727 
shader_param(&self, cap: pipe_shader_cap) -> i32728     fn shader_param(&self, cap: pipe_shader_cap) -> i32 {
729         self.screen
730             .shader_param(pipe_shader_type::PIPE_SHADER_COMPUTE, cap)
731     }
732 
all() -> Vec<Device>733     pub fn all() -> Vec<Device> {
734         let mut devs: Vec<_> = load_screens().filter_map(Device::new).collect();
735 
736         // Pick a default device. One must be the default one no matter what. And custom devices can
737         // only be that one if they are the only devices available.
738         //
739         // The entry with the highest value will be the default device.
740         let default = devs.iter_mut().max_by_key(|dev| {
741             let mut val = if dev.device_type == CL_DEVICE_TYPE_CUSTOM {
742                 // needs to be small enough so it's always going to be the smallest value
743                 -100
744             } else if dev.device_type == CL_DEVICE_TYPE_CPU {
745                 0
746             } else if dev.unified_memory() {
747                 // we give unified memory devices max priority, because we don't want to spin up the
748                 // discrete GPU on laptops by default.
749                 100
750             } else {
751                 10
752             };
753 
754             // we deprioritize zink for now.
755             if dev.screen.driver_name() == c"zink" {
756                 val -= 1;
757             }
758 
759             val
760         });
761 
762         if let Some(default) = default {
763             default.device_type |= CL_DEVICE_TYPE_DEFAULT;
764         }
765 
766         devs
767     }
768 
address_bits(&self) -> cl_uint769     pub fn address_bits(&self) -> cl_uint {
770         self.screen
771             .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_ADDRESS_BITS)
772     }
773 
const_max_size(&self) -> cl_ulong774     pub fn const_max_size(&self) -> cl_ulong {
775         min(
776             // Needed to fix the `api min_max_constant_buffer_size` CL CTS test as it can't really
777             // handle arbitrary values here. We might want to reconsider later and figure out how to
778             // advertize higher values without tripping of the test.
779             // should be at least 1 << 16 (native UBO size on NVidia)
780             // advertising more just in case it benefits other hardware
781             1 << 26,
782             min(
783                 self.max_mem_alloc(),
784                 self.screen.caps().max_shader_buffer_size as u64,
785             ),
786         )
787     }
788 
const_max_count(&self) -> cl_uint789     pub fn const_max_count(&self) -> cl_uint {
790         self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_MAX_CONST_BUFFERS) as cl_uint
791     }
792 
set_device_type(&mut self)793     fn set_device_type(&mut self) {
794         let env = env::var("RUSTICL_DEVICE_TYPE").ok().and_then(|env| {
795             Some(match &*env.to_ascii_lowercase() {
796                 "accelerator" => CL_DEVICE_TYPE_ACCELERATOR,
797                 "cpu" => CL_DEVICE_TYPE_CPU,
798                 "custom" => CL_DEVICE_TYPE_CUSTOM,
799                 "gpu" => CL_DEVICE_TYPE_GPU,
800                 // if no valid string is set we treat is as no value was set
801                 _ => return None,
802             })
803         });
804 
805         self.device_type = if let Some(env) = env {
806             env
807         } else if self.check_custom() {
808             CL_DEVICE_TYPE_CUSTOM
809         } else {
810             match self.screen.device_type() {
811                 pipe_loader_device_type::PIPE_LOADER_DEVICE_SOFTWARE => CL_DEVICE_TYPE_CPU,
812                 pipe_loader_device_type::PIPE_LOADER_DEVICE_PCI => CL_DEVICE_TYPE_GPU,
813                 pipe_loader_device_type::PIPE_LOADER_DEVICE_PLATFORM => CL_DEVICE_TYPE_GPU,
814                 pipe_loader_device_type::NUM_PIPE_LOADER_DEVICE_TYPES => CL_DEVICE_TYPE_CUSTOM,
815             }
816         };
817     }
818 
fp16_supported(&self) -> bool819     pub fn fp16_supported(&self) -> bool {
820         if !Platform::features().fp16 {
821             return false;
822         }
823 
824         self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_FP16) != 0
825     }
826 
fp64_supported(&self) -> bool827     pub fn fp64_supported(&self) -> bool {
828         if !Platform::features().fp64 {
829             return false;
830         }
831 
832         self.screen.caps().doubles
833     }
834 
is_gl_sharing_supported(&self) -> bool835     pub fn is_gl_sharing_supported(&self) -> bool {
836         self.screen.caps().cl_gl_sharing
837             && self.screen.caps().dmabuf != 0
838             && !self.is_device_software()
839             && self.screen.is_res_handle_supported()
840             && self.screen.device_uuid().is_some()
841             && self.helper_ctx().is_create_fence_fd_supported()
842     }
843 
is_device_software(&self) -> bool844     pub fn is_device_software(&self) -> bool {
845         self.screen.device_type() == pipe_loader_device_type::PIPE_LOADER_DEVICE_SOFTWARE
846     }
847 
get_nir_options(&self) -> nir_shader_compiler_options848     pub fn get_nir_options(&self) -> nir_shader_compiler_options {
849         unsafe {
850             *self
851                 .screen
852                 .nir_shader_compiler_options(pipe_shader_type::PIPE_SHADER_COMPUTE)
853         }
854     }
855 
sdot_4x8_supported(&self) -> bool856     pub fn sdot_4x8_supported(&self) -> bool {
857         self.get_nir_options().has_sdot_4x8
858     }
859 
udot_4x8_supported(&self) -> bool860     pub fn udot_4x8_supported(&self) -> bool {
861         self.get_nir_options().has_udot_4x8
862     }
863 
sudot_4x8_supported(&self) -> bool864     pub fn sudot_4x8_supported(&self) -> bool {
865         self.get_nir_options().has_sudot_4x8
866     }
867 
pack_32_4x8_supported(&self) -> bool868     pub fn pack_32_4x8_supported(&self) -> bool {
869         self.get_nir_options().has_pack_32_4x8
870     }
871 
sdot_4x8_sat_supported(&self) -> bool872     pub fn sdot_4x8_sat_supported(&self) -> bool {
873         self.get_nir_options().has_sdot_4x8_sat
874     }
875 
udot_4x8_sat_supported(&self) -> bool876     pub fn udot_4x8_sat_supported(&self) -> bool {
877         self.get_nir_options().has_udot_4x8_sat
878     }
879 
sudot_4x8_sat_supported(&self) -> bool880     pub fn sudot_4x8_sat_supported(&self) -> bool {
881         self.get_nir_options().has_sudot_4x8_sat
882     }
883 
fp64_is_softfp(&self) -> bool884     pub fn fp64_is_softfp(&self) -> bool {
885         bit_check(
886             self.get_nir_options().lower_doubles_options as u32,
887             nir_lower_doubles_options::nir_lower_fp64_full_software as u32,
888         )
889     }
890 
int64_supported(&self) -> bool891     pub fn int64_supported(&self) -> bool {
892         self.screen.caps().int64
893     }
894 
global_mem_size(&self) -> cl_ulong895     pub fn global_mem_size(&self) -> cl_ulong {
896         if let Some(memory_info) = self.screen().query_memory_info() {
897             let memory: cl_ulong = if memory_info.total_device_memory != 0 {
898                 memory_info.total_device_memory.into()
899             } else {
900                 memory_info.total_staging_memory.into()
901             };
902             memory * 1024
903         } else {
904             self.screen
905                 .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE)
906         }
907     }
908 
image_3d_size(&self) -> usize909     pub fn image_3d_size(&self) -> usize {
910         if self.caps.has_images {
911             1 << (self.screen.caps().max_texture_3d_levels - 1)
912         } else {
913             0
914         }
915     }
916 
image_3d_supported(&self) -> bool917     pub fn image_3d_supported(&self) -> bool {
918         self.caps.has_images && self.screen.caps().max_texture_3d_levels != 0
919     }
920 
image_array_size(&self) -> usize921     pub fn image_array_size(&self) -> usize {
922         if self.caps.has_images {
923             self.screen.caps().max_texture_array_layers as usize
924         } else {
925             0
926         }
927     }
928 
image_pitch_alignment(&self) -> cl_uint929     pub fn image_pitch_alignment(&self) -> cl_uint {
930         if self.caps.has_images {
931             self.screen.caps().linear_image_pitch_alignment
932         } else {
933             0
934         }
935     }
936 
image_base_address_alignment(&self) -> cl_uint937     pub fn image_base_address_alignment(&self) -> cl_uint {
938         if self.caps.has_images {
939             self.screen.caps().linear_image_base_address_alignment
940         } else {
941             0
942         }
943     }
944 
image_buffer_max_size_pixels(&self) -> usize945     pub fn image_buffer_max_size_pixels(&self) -> usize {
946         if self.caps.has_images {
947             min(
948                 // The CTS requires it to not exceed `CL_MAX_MEM_ALLOC_SIZE`, also we need to divide
949                 // by the max pixel size, because this cap is in pixels, not bytes.
950                 //
951                 // The CTS also casts this to int in a couple of places,
952                 // see: https://github.com/KhronosGroup/OpenCL-CTS/issues/2056
953                 min(
954                     self.max_mem_alloc() / MAX_PIXEL_SIZE_BYTES,
955                     c_int::MAX as cl_ulong,
956                 ),
957                 self.screen.caps().max_texel_buffer_elements as cl_ulong,
958             ) as usize
959         } else {
960             0
961         }
962     }
963 
image2d_from_buffer_supported(&self) -> bool964     pub fn image2d_from_buffer_supported(&self) -> bool {
965         self.image_pitch_alignment() != 0 && self.image_base_address_alignment() != 0
966     }
967 
little_endian(&self) -> bool968     pub fn little_endian(&self) -> bool {
969         let endianness = self.screen.caps().endianness;
970         endianness == pipe_endian::PIPE_ENDIAN_LITTLE
971     }
972 
local_mem_size(&self) -> cl_ulong973     pub fn local_mem_size(&self) -> cl_ulong {
974         self.screen
975             .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE)
976     }
977 
max_block_sizes(&self) -> Vec<usize>978     pub fn max_block_sizes(&self) -> Vec<usize> {
979         let v: Vec<u64> = self
980             .screen
981             .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
982         v.into_iter().map(|v| v as usize).collect()
983     }
984 
max_grid_size(&self) -> Vec<u64>985     pub fn max_grid_size(&self) -> Vec<u64> {
986         let v: Vec<u64> = self
987             .screen
988             .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_GRID_SIZE);
989 
990         v.into_iter()
991             .map(|a| min(a, Platform::dbg().max_grid_size))
992             .collect()
993     }
994 
max_clock_freq(&self) -> cl_uint995     pub fn max_clock_freq(&self) -> cl_uint {
996         self.screen
997             .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY)
998     }
999 
max_compute_units(&self) -> cl_uint1000     pub fn max_compute_units(&self) -> cl_uint {
1001         self.screen
1002             .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)
1003     }
1004 
max_grid_dimensions(&self) -> cl_uint1005     pub fn max_grid_dimensions(&self) -> cl_uint {
1006         ComputeParam::<u64>::compute_param(
1007             self.screen.as_ref(),
1008             pipe_compute_cap::PIPE_COMPUTE_CAP_GRID_DIMENSION,
1009         ) as cl_uint
1010     }
1011 
max_mem_alloc(&self) -> cl_ulong1012     pub fn max_mem_alloc(&self) -> cl_ulong {
1013         // TODO: at the moment gallium doesn't support bigger buffers
1014         min(
1015             self.screen
1016                 .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE),
1017             0x80000000,
1018         )
1019     }
1020 
max_samplers(&self) -> cl_uint1021     pub fn max_samplers(&self) -> cl_uint {
1022         self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS) as cl_uint
1023     }
1024 
max_threads_per_block(&self) -> usize1025     pub fn max_threads_per_block(&self) -> usize {
1026         ComputeParam::<u64>::compute_param(
1027             self.screen.as_ref(),
1028             pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK,
1029         ) as usize
1030     }
1031 
param_max_size(&self) -> usize1032     pub fn param_max_size(&self) -> usize {
1033         min(
1034             self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE) as u32,
1035             4 * 1024,
1036         ) as usize
1037     }
1038 
printf_buffer_size(&self) -> usize1039     pub fn printf_buffer_size(&self) -> usize {
1040         1024 * 1024
1041     }
1042 
pci_info(&self) -> Option<cl_device_pci_bus_info_khr>1043     pub fn pci_info(&self) -> Option<cl_device_pci_bus_info_khr> {
1044         if self.screen.device_type() != pipe_loader_device_type::PIPE_LOADER_DEVICE_PCI {
1045             return None;
1046         }
1047 
1048         let pci_domain = self.screen.caps().pci_group as cl_uint;
1049         let pci_bus = self.screen.caps().pci_bus as cl_uint;
1050         let pci_device = self.screen.caps().pci_device as cl_uint;
1051         let pci_function = self.screen.caps().pci_function as cl_uint;
1052 
1053         Some(cl_device_pci_bus_info_khr {
1054             pci_domain,
1055             pci_bus,
1056             pci_device,
1057             pci_function,
1058         })
1059     }
1060 
reusable_ctx(&self) -> MutexGuard<Vec<PipeContext>>1061     fn reusable_ctx(&self) -> MutexGuard<Vec<PipeContext>> {
1062         self.reusable_ctx.lock().unwrap()
1063     }
1064 
screen(&self) -> &Arc<PipeScreen>1065     pub fn screen(&self) -> &Arc<PipeScreen> {
1066         &self.screen
1067     }
1068 
create_context(&self) -> Option<PipeContext>1069     pub fn create_context(&self) -> Option<PipeContext> {
1070         self.reusable_ctx()
1071             .pop()
1072             .or_else(|| self.screen.create_context())
1073     }
1074 
recycle_context(&self, ctx: PipeContext)1075     pub fn recycle_context(&self, ctx: PipeContext) {
1076         if Platform::dbg().reuse_context {
1077             self.reusable_ctx().push(ctx);
1078         }
1079     }
1080 
subgroup_sizes(&self) -> Vec<usize>1081     pub fn subgroup_sizes(&self) -> Vec<usize> {
1082         let subgroup_size = ComputeParam::<u32>::compute_param(
1083             self.screen.as_ref(),
1084             pipe_compute_cap::PIPE_COMPUTE_CAP_SUBGROUP_SIZES,
1085         );
1086 
1087         SetBitIndices::from_msb(subgroup_size)
1088             .map(|bit| 1 << bit)
1089             .collect()
1090     }
1091 
max_subgroups(&self) -> u321092     pub fn max_subgroups(&self) -> u32 {
1093         ComputeParam::<u32>::compute_param(
1094             self.screen.as_ref(),
1095             pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_SUBGROUPS,
1096         )
1097     }
1098 
subgroups_supported(&self) -> bool1099     pub fn subgroups_supported(&self) -> bool {
1100         let subgroup_sizes = self.subgroup_sizes().len();
1101 
1102         // we need to be able to query a CSO for subgroup sizes if multiple sub group sizes are
1103         // supported, doing it without shareable shaders isn't practical
1104         self.max_subgroups() > 0
1105             && (subgroup_sizes == 1 || (subgroup_sizes > 1 && self.shareable_shaders()))
1106     }
1107 
svm_supported(&self) -> bool1108     pub fn svm_supported(&self) -> bool {
1109         self.screen.caps().system_svm
1110     }
1111 
unified_memory(&self) -> bool1112     pub fn unified_memory(&self) -> bool {
1113         self.screen.caps().uma
1114     }
1115 
vendor_id(&self) -> cl_uint1116     pub fn vendor_id(&self) -> cl_uint {
1117         let id = self.screen.caps().vendor_id;
1118         if id == 0xFFFFFFFF {
1119             return 0;
1120         }
1121         id
1122     }
1123 
prefers_real_buffer_in_cb0(&self) -> bool1124     pub fn prefers_real_buffer_in_cb0(&self) -> bool {
1125         self.screen.caps().prefer_real_buffer_in_constbuf0
1126     }
1127 
shareable_shaders(&self) -> bool1128     pub fn shareable_shaders(&self) -> bool {
1129         self.screen.caps().shareable_shaders
1130     }
1131 
images_as_deref(&self) -> bool1132     pub fn images_as_deref(&self) -> bool {
1133         self.screen.caps().nir_images_as_deref
1134     }
1135 
samplers_as_deref(&self) -> bool1136     pub fn samplers_as_deref(&self) -> bool {
1137         self.screen.caps().nir_samplers_as_deref
1138     }
1139 
helper_ctx(&self) -> impl HelperContextWrapper + '_1140     pub fn helper_ctx(&self) -> impl HelperContextWrapper + '_ {
1141         HelperContext {
1142             lock: self.helper_ctx.lock().unwrap(),
1143         }
1144     }
1145 
cl_features(&self) -> clc_optional_features1146     pub fn cl_features(&self) -> clc_optional_features {
1147         let subgroups_supported = self.subgroups_supported();
1148         clc_optional_features {
1149             fp16: self.fp16_supported(),
1150             fp64: self.fp64_supported(),
1151             int64: self.int64_supported(),
1152             images: self.caps.has_images,
1153             images_depth: self.caps.has_depth_images,
1154             images_read_write: self.caps.has_rw_images,
1155             images_write_3d: self.caps.has_3d_image_writes,
1156             integer_dot_product: true,
1157             subgroups: subgroups_supported,
1158             subgroups_shuffle: subgroups_supported,
1159             subgroups_shuffle_relative: subgroups_supported,
1160             ..Default::default()
1161         }
1162     }
1163 }
1164 
devs() -> &'static Vec<Device>1165 pub fn devs() -> &'static Vec<Device> {
1166     &Platform::get().devs
1167 }
1168 
get_devs_for_type(device_type: cl_device_type) -> Vec<&'static Device>1169 pub fn get_devs_for_type(device_type: cl_device_type) -> Vec<&'static Device> {
1170     devs()
1171         .iter()
1172         .filter(|d| device_type & d.device_type as cl_device_type != 0)
1173         .collect()
1174 }
1175 
get_dev_for_uuid(uuid: [c_char; UUID_SIZE]) -> Option<&'static Device>1176 pub fn get_dev_for_uuid(uuid: [c_char; UUID_SIZE]) -> Option<&'static Device> {
1177     devs().iter().find(|d| {
1178         let uuid: [c_uchar; UUID_SIZE] = unsafe { transmute(uuid) };
1179         uuid == d.screen().device_uuid().unwrap()
1180     })
1181 }
1182