• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 use crate::api::icd::*;
2 use crate::api::util::*;
3 use crate::core::format::*;
4 use crate::core::platform::*;
5 use crate::core::util::*;
6 use crate::core::version::*;
7 use crate::impl_cl_type_trait_base;
8 
9 use mesa_rust::compiler::clc::*;
10 use mesa_rust::compiler::nir::*;
11 use mesa_rust::pipe::context::*;
12 use mesa_rust::pipe::device::load_screens;
13 use mesa_rust::pipe::fence::*;
14 use mesa_rust::pipe::resource::*;
15 use mesa_rust::pipe::screen::*;
16 use mesa_rust::pipe::transfer::*;
17 use mesa_rust_gen::*;
18 use mesa_rust_util::math::SetBitIndices;
19 use mesa_rust_util::static_assert;
20 use rusticl_opencl_gen::*;
21 
22 use std::cmp::max;
23 use std::cmp::min;
24 use std::collections::HashMap;
25 use std::convert::TryInto;
26 use std::env;
27 use std::ffi::CString;
28 use std::mem::transmute;
29 use std::os::raw::*;
30 use std::sync::Arc;
31 use std::sync::Mutex;
32 use std::sync::MutexGuard;
33 
34 pub struct Device {
35     pub base: CLObjectBase<CL_INVALID_DEVICE>,
36     pub screen: Arc<PipeScreen>,
37     pub cl_version: CLVersion,
38     pub clc_version: CLVersion,
39     pub clc_versions: Vec<cl_name_version>,
40     pub custom: bool,
41     pub embedded: bool,
42     pub has_timestamp: bool, // Cached to keep API fast
43     pub extension_string: String,
44     pub extensions: Vec<cl_name_version>,
45     pub spirv_extensions: Vec<CString>,
46     pub clc_features: Vec<cl_name_version>,
47     pub formats: HashMap<cl_image_format, HashMap<cl_mem_object_type, cl_mem_flags>>,
48     pub lib_clc: NirShader,
49     helper_ctx: Mutex<PipeContext>,
50 }
51 
52 pub trait HelperContextWrapper {
53     #[must_use]
exec<F>(&self, func: F) -> PipeFence where F: Fn(&HelperContext)54     fn exec<F>(&self, func: F) -> PipeFence
55     where
56         F: Fn(&HelperContext);
57 
buffer_map_directly( &self, res: &PipeResource, offset: i32, size: i32, rw: RWFlags, ) -> Option<PipeTransfer>58     fn buffer_map_directly(
59         &self,
60         res: &PipeResource,
61         offset: i32,
62         size: i32,
63         rw: RWFlags,
64     ) -> Option<PipeTransfer>;
65 
buffer_map_coherent( &self, res: &PipeResource, offset: i32, size: i32, rw: RWFlags, ) -> Option<PipeTransfer>66     fn buffer_map_coherent(
67         &self,
68         res: &PipeResource,
69         offset: i32,
70         size: i32,
71         rw: RWFlags,
72     ) -> Option<PipeTransfer>;
73 
texture_map_directly( &self, res: &PipeResource, bx: &pipe_box, rw: RWFlags, ) -> Option<PipeTransfer>74     fn texture_map_directly(
75         &self,
76         res: &PipeResource,
77         bx: &pipe_box,
78         rw: RWFlags,
79     ) -> Option<PipeTransfer>;
80 
texture_map_coherent( &self, res: &PipeResource, bx: &pipe_box, rw: RWFlags, ) -> Option<PipeTransfer>81     fn texture_map_coherent(
82         &self,
83         res: &PipeResource,
84         bx: &pipe_box,
85         rw: RWFlags,
86     ) -> Option<PipeTransfer>;
87 
create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void88     fn create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void;
delete_compute_state(&self, cso: *mut c_void)89     fn delete_compute_state(&self, cso: *mut c_void);
compute_state_info(&self, state: *mut c_void) -> pipe_compute_state_object_info90     fn compute_state_info(&self, state: *mut c_void) -> pipe_compute_state_object_info;
compute_state_subgroup_size(&self, state: *mut c_void, block: &[u32; 3]) -> u3291     fn compute_state_subgroup_size(&self, state: *mut c_void, block: &[u32; 3]) -> u32;
92 
unmap(&self, tx: PipeTransfer)93     fn unmap(&self, tx: PipeTransfer);
94 
is_create_fence_fd_supported(&self) -> bool95     fn is_create_fence_fd_supported(&self) -> bool;
import_fence(&self, fence_fd: &FenceFd) -> PipeFence96     fn import_fence(&self, fence_fd: &FenceFd) -> PipeFence;
97 }
98 
99 pub struct HelperContext<'a> {
100     lock: MutexGuard<'a, PipeContext>,
101 }
102 
103 impl<'a> HelperContext<'a> {
resource_copy_region( &self, src: &PipeResource, dst: &PipeResource, dst_offset: &[u32; 3], bx: &pipe_box, )104     pub fn resource_copy_region(
105         &self,
106         src: &PipeResource,
107         dst: &PipeResource,
108         dst_offset: &[u32; 3],
109         bx: &pipe_box,
110     ) {
111         self.lock.resource_copy_region(src, dst, dst_offset, bx);
112     }
113 
buffer_subdata( &self, res: &PipeResource, offset: c_uint, data: *const c_void, size: c_uint, )114     pub fn buffer_subdata(
115         &self,
116         res: &PipeResource,
117         offset: c_uint,
118         data: *const c_void,
119         size: c_uint,
120     ) {
121         self.lock.buffer_subdata(res, offset, data, size)
122     }
123 
texture_subdata( &self, res: &PipeResource, bx: &pipe_box, data: *const c_void, stride: u32, layer_stride: usize, )124     pub fn texture_subdata(
125         &self,
126         res: &PipeResource,
127         bx: &pipe_box,
128         data: *const c_void,
129         stride: u32,
130         layer_stride: usize,
131     ) {
132         self.lock
133             .texture_subdata(res, bx, data, stride, layer_stride)
134     }
135 }
136 
137 impl<'a> HelperContextWrapper for HelperContext<'a> {
exec<F>(&self, func: F) -> PipeFence where F: Fn(&HelperContext),138     fn exec<F>(&self, func: F) -> PipeFence
139     where
140         F: Fn(&HelperContext),
141     {
142         func(self);
143         self.lock.flush()
144     }
145 
buffer_map_directly( &self, res: &PipeResource, offset: i32, size: i32, rw: RWFlags, ) -> Option<PipeTransfer>146     fn buffer_map_directly(
147         &self,
148         res: &PipeResource,
149         offset: i32,
150         size: i32,
151         rw: RWFlags,
152     ) -> Option<PipeTransfer> {
153         self.lock.buffer_map_directly(res, offset, size, rw)
154     }
155 
buffer_map_coherent( &self, res: &PipeResource, offset: i32, size: i32, rw: RWFlags, ) -> Option<PipeTransfer>156     fn buffer_map_coherent(
157         &self,
158         res: &PipeResource,
159         offset: i32,
160         size: i32,
161         rw: RWFlags,
162     ) -> Option<PipeTransfer> {
163         self.lock
164             .buffer_map(res, offset, size, rw, ResourceMapType::Coherent)
165     }
166 
texture_map_directly( &self, res: &PipeResource, bx: &pipe_box, rw: RWFlags, ) -> Option<PipeTransfer>167     fn texture_map_directly(
168         &self,
169         res: &PipeResource,
170         bx: &pipe_box,
171         rw: RWFlags,
172     ) -> Option<PipeTransfer> {
173         self.lock.texture_map_directly(res, bx, rw)
174     }
175 
texture_map_coherent( &self, res: &PipeResource, bx: &pipe_box, rw: RWFlags, ) -> Option<PipeTransfer>176     fn texture_map_coherent(
177         &self,
178         res: &PipeResource,
179         bx: &pipe_box,
180         rw: RWFlags,
181     ) -> Option<PipeTransfer> {
182         self.lock
183             .texture_map(res, bx, rw, ResourceMapType::Coherent)
184     }
185 
create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void186     fn create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void {
187         self.lock.create_compute_state(nir, static_local_mem)
188     }
189 
delete_compute_state(&self, cso: *mut c_void)190     fn delete_compute_state(&self, cso: *mut c_void) {
191         self.lock.delete_compute_state(cso)
192     }
193 
compute_state_info(&self, state: *mut c_void) -> pipe_compute_state_object_info194     fn compute_state_info(&self, state: *mut c_void) -> pipe_compute_state_object_info {
195         self.lock.compute_state_info(state)
196     }
197 
compute_state_subgroup_size(&self, state: *mut c_void, block: &[u32; 3]) -> u32198     fn compute_state_subgroup_size(&self, state: *mut c_void, block: &[u32; 3]) -> u32 {
199         self.lock.compute_state_subgroup_size(state, block)
200     }
201 
unmap(&self, tx: PipeTransfer)202     fn unmap(&self, tx: PipeTransfer) {
203         tx.with_ctx(&self.lock);
204     }
205 
is_create_fence_fd_supported(&self) -> bool206     fn is_create_fence_fd_supported(&self) -> bool {
207         self.lock.is_create_fence_fd_supported()
208     }
209 
import_fence(&self, fd: &FenceFd) -> PipeFence210     fn import_fence(&self, fd: &FenceFd) -> PipeFence {
211         self.lock.import_fence(fd)
212     }
213 }
214 
215 impl_cl_type_trait_base!(cl_device_id, Device, [Device], CL_INVALID_DEVICE);
216 
217 impl Device {
new(screen: PipeScreen) -> Option<Device>218     fn new(screen: PipeScreen) -> Option<Device> {
219         if !Self::check_valid(&screen) {
220             return None;
221         }
222 
223         let screen = Arc::new(screen);
224         // Create before loading libclc as llvmpipe only creates the shader cache with the first
225         // context being created.
226         let helper_ctx = screen.create_context()?;
227         let lib_clc = spirv::SPIRVBin::get_lib_clc(&screen);
228         if lib_clc.is_none() {
229             eprintln!("Libclc failed to load. Please make sure it is installed and provides spirv-mesa3d-.spv and/or spirv64-mesa3d-.spv");
230         }
231 
232         let mut d = Self {
233             base: CLObjectBase::new(RusticlTypes::Device),
234             helper_ctx: Mutex::new(helper_ctx),
235             screen: screen,
236             cl_version: CLVersion::Cl3_0,
237             clc_version: CLVersion::Cl3_0,
238             clc_versions: Vec::new(),
239             custom: false,
240             embedded: false,
241             has_timestamp: false,
242             extension_string: String::from(""),
243             extensions: Vec::new(),
244             spirv_extensions: Vec::new(),
245             clc_features: Vec::new(),
246             formats: HashMap::new(),
247             lib_clc: lib_clc?,
248         };
249 
250         d.fill_format_tables();
251 
252         // check if we are embedded or full profile first
253         d.embedded = d.check_embedded_profile();
254 
255         // check if we have to report it as a custom device
256         d.custom = d.check_custom();
257 
258         let cap_timestamp = d.screen.param(pipe_cap::PIPE_CAP_QUERY_TIMESTAMP);
259         let cap_timestamp_res = d.timer_resolution();
260         d.has_timestamp = cap_timestamp != 0 && cap_timestamp_res > 0;
261 
262         // query supported extensions
263         d.fill_extensions();
264 
265         // now figure out what version we are
266         d.check_version();
267 
268         Some(d)
269     }
270 
271     /// Converts a temporary reference to a static if and only if this device lives inside static
272     /// memory.
to_static(&self) -> Option<&'static Self>273     pub fn to_static(&self) -> Option<&'static Self> {
274         devs().iter().find(|&dev| self == dev)
275     }
276 
fill_format_tables(&mut self)277     fn fill_format_tables(&mut self) {
278         for f in FORMATS {
279             let mut fs = HashMap::new();
280             for t in CL_IMAGE_TYPES {
281                 // the CTS doesn't test them, so let's not advertize them by accident if they are
282                 // broken
283                 if t == CL_MEM_OBJECT_IMAGE1D_BUFFER
284                     && [CL_RGB, CL_RGBx].contains(&f.cl_image_format.image_channel_order)
285                     && ![CL_UNORM_SHORT_565, CL_UNORM_SHORT_555]
286                         .contains(&f.cl_image_format.image_channel_data_type)
287                 {
288                     continue;
289                 }
290 
291                 let mut flags: cl_uint = 0;
292                 if self.screen.is_format_supported(
293                     f.pipe,
294                     cl_mem_type_to_texture_target(t),
295                     PIPE_BIND_SAMPLER_VIEW,
296                 ) {
297                     flags |= CL_MEM_READ_ONLY;
298                 }
299 
300                 // TODO: cl_khr_srgb_image_writes
301                 if !f.is_srgb
302                     && self.screen.is_format_supported(
303                         f.pipe,
304                         cl_mem_type_to_texture_target(t),
305                         PIPE_BIND_SHADER_IMAGE,
306                     )
307                 {
308                     flags |= CL_MEM_WRITE_ONLY;
309                     // TODO: enable once we support it
310                     // flags |= CL_MEM_KERNEL_READ_AND_WRITE;
311                 }
312 
313                 // TODO: cl_khr_srgb_image_writes
314                 if !f.is_srgb
315                     && self.screen.is_format_supported(
316                         f.pipe,
317                         cl_mem_type_to_texture_target(t),
318                         PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE,
319                     )
320                 {
321                     flags |= CL_MEM_READ_WRITE;
322                 }
323 
324                 fs.insert(t, flags as cl_mem_flags);
325             }
326             self.formats.insert(f.cl_image_format, fs);
327         }
328     }
329 
check_valid(screen: &PipeScreen) -> bool330     fn check_valid(screen: &PipeScreen) -> bool {
331         if screen.param(pipe_cap::PIPE_CAP_COMPUTE) == 0
332             || screen.shader_param(
333                 pipe_shader_type::PIPE_SHADER_COMPUTE,
334                 pipe_shader_cap::PIPE_SHADER_CAP_SUPPORTED_IRS,
335             ) & (1 << (pipe_shader_ir::PIPE_SHADER_IR_NIR as i32))
336                 == 0
337         {
338             return false;
339         }
340 
341         // CL_DEVICE_MAX_PARAMETER_SIZE
342         // For this minimum value, only a maximum of 128 arguments can be passed to a kernel
343         if (screen.shader_param(
344             pipe_shader_type::PIPE_SHADER_COMPUTE,
345             pipe_shader_cap::PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE,
346         ) as u32)
347             < 128
348         {
349             return false;
350         }
351         true
352     }
353 
check_custom(&self) -> bool354     fn check_custom(&self) -> bool {
355         // Max size of memory object allocation in bytes. The minimum value is
356         // max(min(1024 × 1024 × 1024, 1/4th of CL_DEVICE_GLOBAL_MEM_SIZE), 32 × 1024 × 1024)
357         // for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
358         let mut limit = min(1024 * 1024 * 1024, self.global_mem_size() / 4);
359         limit = max(limit, 32 * 1024 * 1024);
360         if self.max_mem_alloc() < limit {
361             return true;
362         }
363 
364         // CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS
365         // The minimum value is 3 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
366         if self.max_grid_dimensions() < 3 {
367             return true;
368         }
369 
370         if self.embedded {
371             // CL_DEVICE_MAX_PARAMETER_SIZE
372             // The minimum value is 256 bytes for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
373             if self.param_max_size() < 256 {
374                 return true;
375             }
376 
377             // CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
378             // The minimum value is 1 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
379             if self.const_max_size() < 1024 {
380                 return true;
381             }
382 
383             // TODO
384             // CL_DEVICE_MAX_CONSTANT_ARGS
385             // The minimum value is 4 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
386 
387             // CL_DEVICE_LOCAL_MEM_SIZE
388             // The minimum value is 1 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
389             if self.local_mem_size() < 1024 {
390                 return true;
391             }
392         } else {
393             // CL 1.0 spec:
394             // CL_DEVICE_MAX_PARAMETER_SIZE
395             // The minimum value is 256 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
396             if self.param_max_size() < 256 {
397                 return true;
398             }
399 
400             // CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
401             // The minimum value is 64 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
402             if self.const_max_size() < 64 * 1024 {
403                 return true;
404             }
405 
406             // TODO
407             // CL_DEVICE_MAX_CONSTANT_ARGS
408             // The minimum value is 8 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
409 
410             // CL 1.0 spec:
411             // CL_DEVICE_LOCAL_MEM_SIZE
412             // The minimum value is 16 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
413             if self.local_mem_size() < 16 * 1024 {
414                 return true;
415             }
416         }
417 
418         false
419     }
420 
check_embedded_profile(&self) -> bool421     fn check_embedded_profile(&self) -> bool {
422         if self.image_supported() {
423             // The minimum value is 16 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
424             if self.max_samplers() < 16 ||
425             // The minimum value is 128 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
426             self.image_read_count() < 128 ||
427             // The minimum value is 64 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
428             self.image_write_count() < 64 ||
429             // The minimum value is 16384 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
430             self.image_2d_size() < 16384 ||
431             // The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
432             self.image_array_size() < 2048 ||
433             // The minimum value is 65536 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
434             self.image_buffer_size() < 65536
435             {
436                 return true;
437             }
438 
439             // TODO check req formats
440         }
441         !self.int64_supported()
442     }
443 
parse_env_device_type() -> Option<cl_device_type>444     fn parse_env_device_type() -> Option<cl_device_type> {
445         let mut val = env::var("RUSTICL_DEVICE_TYPE").ok()?;
446         val.make_ascii_lowercase();
447         Some(
448             match &*val {
449                 "accelerator" => CL_DEVICE_TYPE_ACCELERATOR,
450                 "cpu" => CL_DEVICE_TYPE_CPU,
451                 "custom" => CL_DEVICE_TYPE_CUSTOM,
452                 "gpu" => CL_DEVICE_TYPE_GPU,
453                 _ => return None,
454             }
455             .into(),
456         )
457     }
458 
parse_env_version() -> Option<CLVersion>459     fn parse_env_version() -> Option<CLVersion> {
460         let val = env::var("RUSTICL_CL_VERSION").ok()?;
461         let (major, minor) = val.split_once('.')?;
462         let major = major.parse().ok()?;
463         let minor = minor.parse().ok()?;
464         mk_cl_version(major, minor, 0).try_into().ok()
465     }
466 
467     // TODO add CLC checks
check_version(&mut self)468     fn check_version(&mut self) {
469         let exts: Vec<&str> = self.extension_string.split(' ').collect();
470         let mut res = CLVersion::Cl3_0;
471 
472         if self.embedded {
473             if self.image_supported() {
474                 let supports_array_writes = !FORMATS
475                     .iter()
476                     .filter(|f| f.req_for_embeded_read_or_write)
477                     .map(|f| self.formats.get(&f.cl_image_format).unwrap())
478                     .map(|f| f.get(&CL_MEM_OBJECT_IMAGE2D_ARRAY).unwrap())
479                     .any(|f| *f & cl_mem_flags::from(CL_MEM_WRITE_ONLY) == 0);
480                 if self.image_3d_size() < 2048 || !supports_array_writes {
481                     res = CLVersion::Cl1_2;
482                 }
483             }
484         }
485 
486         // TODO: check image 1D, 1Dbuffer, 1Darray and 2Darray support explicitly
487         if self.image_supported() {
488             // The minimum value is 256 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
489             if self.image_array_size() < 256 ||
490             // The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
491             self.image_buffer_size() < 2048
492             {
493                 res = CLVersion::Cl1_1;
494             }
495         }
496 
497         if self.embedded {
498             // The minimum value for the EMBEDDED profile is 1 KB.
499             if self.printf_buffer_size() < 1024 {
500                 res = CLVersion::Cl1_1;
501             }
502         } else {
503             // The minimum value for the FULL profile is 1 MB.
504             if self.printf_buffer_size() < 1024 * 1024 {
505                 res = CLVersion::Cl1_1;
506             }
507         }
508 
509         if !exts.contains(&"cl_khr_byte_addressable_store")
510          || !exts.contains(&"cl_khr_global_int32_base_atomics")
511          || !exts.contains(&"cl_khr_global_int32_extended_atomics")
512          || !exts.contains(&"cl_khr_local_int32_base_atomics")
513          || !exts.contains(&"cl_khr_local_int32_extended_atomics")
514          // The following modifications are made to the OpenCL 1.1 platform layer and runtime (sections 4 and 5):
515          // The minimum FULL_PROFILE value for CL_DEVICE_MAX_PARAMETER_SIZE increased from 256 to 1024 bytes
516          || self.param_max_size() < 1024
517          // The minimum FULL_PROFILE value for CL_DEVICE_LOCAL_MEM_SIZE increased from 16 KB to 32 KB.
518          || self.local_mem_size() < 32 * 1024
519         {
520             res = CLVersion::Cl1_0;
521         }
522 
523         if let Some(val) = Self::parse_env_version() {
524             res = val;
525         }
526 
527         if res >= CLVersion::Cl3_0 {
528             self.clc_versions
529                 .push(mk_cl_version_ext(3, 0, 0, "OpenCL C"));
530         }
531 
532         if res >= CLVersion::Cl1_2 {
533             self.clc_versions
534                 .push(mk_cl_version_ext(1, 2, 0, "OpenCL C"));
535         }
536 
537         if res >= CLVersion::Cl1_1 {
538             self.clc_versions
539                 .push(mk_cl_version_ext(1, 1, 0, "OpenCL C"));
540         }
541 
542         if res >= CLVersion::Cl1_0 {
543             self.clc_versions
544                 .push(mk_cl_version_ext(1, 0, 0, "OpenCL C"));
545         }
546 
547         self.cl_version = res;
548         self.clc_version = min(CLVersion::Cl1_2, res);
549     }
550 
fill_extensions(&mut self)551     fn fill_extensions(&mut self) {
552         let mut exts_str: Vec<String> = Vec::new();
553         let mut exts = PLATFORM_EXTENSIONS.to_vec();
554         let mut feats = Vec::new();
555         let mut spirv_exts = Vec::new();
556         let mut add_ext = |major, minor, patch, ext: &str| {
557             exts.push(mk_cl_version_ext(major, minor, patch, ext));
558             exts_str.push(ext.to_owned());
559         };
560         let mut add_feat = |major, minor, patch, feat: &str| {
561             feats.push(mk_cl_version_ext(major, minor, patch, feat));
562         };
563         let mut add_spirv = |ext: &str| {
564             spirv_exts.push(CString::new(ext).unwrap());
565         };
566 
567         // add extensions all drivers support for now
568         add_ext(1, 0, 0, "cl_khr_global_int32_base_atomics");
569         add_ext(1, 0, 0, "cl_khr_global_int32_extended_atomics");
570         add_ext(2, 0, 0, "cl_khr_integer_dot_product");
571         add_feat(
572             2,
573             0,
574             0,
575             "__opencl_c_integer_dot_product_input_4x8bit_packed",
576         );
577         add_feat(2, 0, 0, "__opencl_c_integer_dot_product_input_4x8bit");
578         add_ext(1, 0, 0, "cl_khr_local_int32_base_atomics");
579         add_ext(1, 0, 0, "cl_khr_local_int32_extended_atomics");
580 
581         add_spirv("SPV_KHR_expect_assume");
582         add_spirv("SPV_KHR_float_controls");
583         add_spirv("SPV_KHR_integer_dot_product");
584         add_spirv("SPV_KHR_no_integer_wrap_decoration");
585 
586         if self.fp16_supported() {
587             add_ext(1, 0, 0, "cl_khr_fp16");
588         }
589 
590         if self.fp64_supported() {
591             add_ext(1, 0, 0, "cl_khr_fp64");
592             add_feat(1, 0, 0, "__opencl_c_fp64");
593         }
594 
595         if self.is_gl_sharing_supported() {
596             add_ext(1, 0, 0, "cl_khr_gl_sharing");
597         }
598 
599         if self.int64_supported() {
600             if self.embedded {
601                 add_ext(1, 0, 0, "cles_khr_int64");
602             };
603 
604             add_feat(1, 0, 0, "__opencl_c_int64");
605         }
606 
607         if self.image_supported() {
608             add_feat(1, 0, 0, "__opencl_c_images");
609 
610             if self.image2d_from_buffer_supported() {
611                 add_ext(1, 0, 0, "cl_khr_image2d_from_buffer");
612             }
613 
614             if self.image_read_write_supported() {
615                 add_feat(1, 0, 0, "__opencl_c_read_write_images");
616             }
617 
618             if self.image_3d_write_supported() {
619                 add_ext(1, 0, 0, "cl_khr_3d_image_writes");
620                 add_feat(1, 0, 0, "__opencl_c_3d_image_writes");
621             }
622         }
623 
624         if self.pci_info().is_some() {
625             add_ext(1, 0, 0, "cl_khr_pci_bus_info");
626         }
627 
628         if self.screen().device_uuid().is_some() && self.screen().driver_uuid().is_some() {
629             static_assert!(PIPE_UUID_SIZE == CL_UUID_SIZE_KHR);
630             static_assert!(PIPE_LUID_SIZE == CL_LUID_SIZE_KHR);
631 
632             add_ext(1, 0, 0, "cl_khr_device_uuid");
633         }
634 
635         if self.subgroups_supported() {
636             // requires CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS
637             //add_ext(1, 0, 0, "cl_khr_subgroups");
638             add_feat(1, 0, 0, "__opencl_c_subgroups");
639 
640             // we have lowering in `nir_lower_subgroups`, drivers can just use that
641             add_ext(1, 0, 0, "cl_khr_subgroup_shuffle");
642             add_ext(1, 0, 0, "cl_khr_subgroup_shuffle_relative");
643         }
644 
645         if self.svm_supported() {
646             add_ext(1, 0, 0, "cl_arm_shared_virtual_memory");
647         }
648 
649         self.extensions = exts;
650         self.clc_features = feats;
651         self.extension_string = format!("{} {}", PLATFORM_EXTENSION_STR, exts_str.join(" "));
652         self.spirv_extensions = spirv_exts;
653     }
654 
shader_param(&self, cap: pipe_shader_cap) -> i32655     fn shader_param(&self, cap: pipe_shader_cap) -> i32 {
656         self.screen
657             .shader_param(pipe_shader_type::PIPE_SHADER_COMPUTE, cap)
658     }
659 
all() -> impl Iterator<Item = Device>660     pub fn all() -> impl Iterator<Item = Device> {
661         load_screens().filter_map(Device::new)
662     }
663 
address_bits(&self) -> cl_uint664     pub fn address_bits(&self) -> cl_uint {
665         self.screen
666             .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_ADDRESS_BITS)
667     }
668 
const_max_size(&self) -> cl_ulong669     pub fn const_max_size(&self) -> cl_ulong {
670         min(
671             // Needed to fix the `api min_max_constant_buffer_size` CL CTS test as it can't really
672             // handle arbitrary values here. We might want to reconsider later and figure out how to
673             // advertize higher values without tripping of the test.
674             // should be at least 1 << 16 (native UBO size on NVidia)
675             // advertising more just in case it benefits other hardware
676             1 << 26,
677             min(
678                 self.max_mem_alloc(),
679                 self.screen
680                     .param(pipe_cap::PIPE_CAP_MAX_SHADER_BUFFER_SIZE_UINT) as u64,
681             ),
682         )
683     }
684 
const_max_count(&self) -> cl_uint685     pub fn const_max_count(&self) -> cl_uint {
686         self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_MAX_CONST_BUFFERS) as cl_uint
687     }
688 
device_type(&self, internal: bool) -> cl_device_type689     pub fn device_type(&self, internal: bool) -> cl_device_type {
690         if let Some(env) = Self::parse_env_device_type() {
691             return env;
692         }
693 
694         if self.custom {
695             return CL_DEVICE_TYPE_CUSTOM as cl_device_type;
696         }
697         let mut res = match self.screen.device_type() {
698             pipe_loader_device_type::PIPE_LOADER_DEVICE_SOFTWARE => CL_DEVICE_TYPE_CPU,
699             pipe_loader_device_type::PIPE_LOADER_DEVICE_PCI => CL_DEVICE_TYPE_GPU,
700             pipe_loader_device_type::PIPE_LOADER_DEVICE_PLATFORM => CL_DEVICE_TYPE_GPU,
701             pipe_loader_device_type::NUM_PIPE_LOADER_DEVICE_TYPES => CL_DEVICE_TYPE_CUSTOM,
702         };
703 
704         if internal && res == CL_DEVICE_TYPE_GPU && self.screen.driver_name() != "zink" {
705             res |= CL_DEVICE_TYPE_DEFAULT;
706         }
707 
708         res as cl_device_type
709     }
710 
fp16_supported(&self) -> bool711     pub fn fp16_supported(&self) -> bool {
712         if !Platform::features().fp16 {
713             return false;
714         }
715 
716         self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_FP16) != 0
717     }
718 
fp64_supported(&self) -> bool719     pub fn fp64_supported(&self) -> bool {
720         if !Platform::features().fp64 {
721             return false;
722         }
723 
724         self.screen.param(pipe_cap::PIPE_CAP_DOUBLES) == 1
725     }
726 
is_gl_sharing_supported(&self) -> bool727     pub fn is_gl_sharing_supported(&self) -> bool {
728         self.screen.param(pipe_cap::PIPE_CAP_CL_GL_SHARING) != 0
729             && self.screen.param(pipe_cap::PIPE_CAP_DMABUF) != 0
730             && !self.is_device_software()
731             && self.screen.is_res_handle_supported()
732             && self.screen.device_uuid().is_some()
733             && self.helper_ctx().is_create_fence_fd_supported()
734     }
735 
is_device_software(&self) -> bool736     pub fn is_device_software(&self) -> bool {
737         self.screen.device_type() == pipe_loader_device_type::PIPE_LOADER_DEVICE_SOFTWARE
738     }
739 
get_nir_options(&self) -> nir_shader_compiler_options740     pub fn get_nir_options(&self) -> nir_shader_compiler_options {
741         unsafe {
742             *self
743                 .screen
744                 .nir_shader_compiler_options(pipe_shader_type::PIPE_SHADER_COMPUTE)
745         }
746     }
747 
sdot_4x8_supported(&self) -> bool748     pub fn sdot_4x8_supported(&self) -> bool {
749         self.get_nir_options().has_sdot_4x8
750     }
751 
udot_4x8_supported(&self) -> bool752     pub fn udot_4x8_supported(&self) -> bool {
753         self.get_nir_options().has_udot_4x8
754     }
755 
sudot_4x8_supported(&self) -> bool756     pub fn sudot_4x8_supported(&self) -> bool {
757         self.get_nir_options().has_sudot_4x8
758     }
759 
pack_32_4x8_supported(&self) -> bool760     pub fn pack_32_4x8_supported(&self) -> bool {
761         self.get_nir_options().has_pack_32_4x8
762     }
763 
sdot_4x8_sat_supported(&self) -> bool764     pub fn sdot_4x8_sat_supported(&self) -> bool {
765         self.get_nir_options().has_sdot_4x8_sat
766     }
767 
udot_4x8_sat_supported(&self) -> bool768     pub fn udot_4x8_sat_supported(&self) -> bool {
769         self.get_nir_options().has_udot_4x8_sat
770     }
771 
sudot_4x8_sat_supported(&self) -> bool772     pub fn sudot_4x8_sat_supported(&self) -> bool {
773         self.get_nir_options().has_sudot_4x8_sat
774     }
775 
fp64_is_softfp(&self) -> bool776     pub fn fp64_is_softfp(&self) -> bool {
777         bit_check(
778             self.get_nir_options().lower_doubles_options as u32,
779             nir_lower_doubles_options::nir_lower_fp64_full_software as u32,
780         )
781     }
782 
int64_supported(&self) -> bool783     pub fn int64_supported(&self) -> bool {
784         self.screen.param(pipe_cap::PIPE_CAP_INT64) == 1
785     }
786 
global_mem_size(&self) -> cl_ulong787     pub fn global_mem_size(&self) -> cl_ulong {
788         self.screen
789             .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE)
790     }
791 
image_2d_size(&self) -> usize792     pub fn image_2d_size(&self) -> usize {
793         self.screen.param(pipe_cap::PIPE_CAP_MAX_TEXTURE_2D_SIZE) as usize
794     }
795 
image_3d_size(&self) -> usize796     pub fn image_3d_size(&self) -> usize {
797         1 << (self.screen.param(pipe_cap::PIPE_CAP_MAX_TEXTURE_3D_LEVELS) - 1)
798     }
799 
image_3d_supported(&self) -> bool800     pub fn image_3d_supported(&self) -> bool {
801         self.screen.param(pipe_cap::PIPE_CAP_MAX_TEXTURE_3D_LEVELS) != 0
802     }
803 
image_array_size(&self) -> usize804     pub fn image_array_size(&self) -> usize {
805         self.screen
806             .param(pipe_cap::PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS) as usize
807     }
808 
image_pitch_alignment(&self) -> cl_uint809     pub fn image_pitch_alignment(&self) -> cl_uint {
810         self.screen
811             .param(pipe_cap::PIPE_CAP_LINEAR_IMAGE_PITCH_ALIGNMENT) as u32
812     }
813 
image_base_address_alignment(&self) -> cl_uint814     pub fn image_base_address_alignment(&self) -> cl_uint {
815         self.screen
816             .param(pipe_cap::PIPE_CAP_LINEAR_IMAGE_BASE_ADDRESS_ALIGNMENT) as u32
817     }
818 
image_buffer_size(&self) -> usize819     pub fn image_buffer_size(&self) -> usize {
820         min(
821             // the CTS requires it to not exceed `CL_MAX_MEM_ALLOC_SIZE`
822             self.max_mem_alloc(),
823             self.screen
824                 .param(pipe_cap::PIPE_CAP_MAX_TEXEL_BUFFER_ELEMENTS_UINT) as cl_ulong,
825         ) as usize
826     }
827 
image_read_count(&self) -> cl_uint828     pub fn image_read_count(&self) -> cl_uint {
829         self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS) as cl_uint
830     }
831 
image2d_from_buffer_supported(&self) -> bool832     pub fn image2d_from_buffer_supported(&self) -> bool {
833         self.image_pitch_alignment() != 0 && self.image_base_address_alignment() != 0
834     }
835 
image_supported(&self) -> bool836     pub fn image_supported(&self) -> bool {
837         // TODO check CL_DEVICE_IMAGE_SUPPORT reqs
838         self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_MAX_SHADER_IMAGES) != 0 &&
839       // The minimum value is 8 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
840       self.image_read_count() >= 8 &&
841       // The minimum value is 8 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
842       self.image_write_count() >= 8 &&
843       // The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
844       self.image_2d_size() >= 2048
845     }
846 
image_read_write_supported(&self) -> bool847     pub fn image_read_write_supported(&self) -> bool {
848         !FORMATS
849             .iter()
850             .filter(|f| f.req_for_full_read_and_write)
851             .map(|f| self.formats.get(&f.cl_image_format).unwrap())
852             .map(|f| f.get(&CL_MEM_OBJECT_IMAGE3D).unwrap())
853             .any(|f| *f & cl_mem_flags::from(CL_MEM_KERNEL_READ_AND_WRITE) == 0)
854     }
855 
image_3d_write_supported(&self) -> bool856     pub fn image_3d_write_supported(&self) -> bool {
857         !FORMATS
858             .iter()
859             .filter(|f| f.req_for_full_read_or_write)
860             .map(|f| self.formats.get(&f.cl_image_format).unwrap())
861             .map(|f| f.get(&CL_MEM_OBJECT_IMAGE3D).unwrap())
862             .any(|f| *f & cl_mem_flags::from(CL_MEM_WRITE_ONLY) == 0)
863     }
864 
image_write_count(&self) -> cl_uint865     pub fn image_write_count(&self) -> cl_uint {
866         self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_MAX_SHADER_IMAGES) as cl_uint
867     }
868 
little_endian(&self) -> bool869     pub fn little_endian(&self) -> bool {
870         let endianness = self.screen.param(pipe_cap::PIPE_CAP_ENDIANNESS);
871         endianness == (pipe_endian::PIPE_ENDIAN_LITTLE as i32)
872     }
873 
local_mem_size(&self) -> cl_ulong874     pub fn local_mem_size(&self) -> cl_ulong {
875         self.screen
876             .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE)
877     }
878 
max_block_sizes(&self) -> Vec<usize>879     pub fn max_block_sizes(&self) -> Vec<usize> {
880         let v: Vec<u64> = self
881             .screen
882             .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
883         v.into_iter().map(|v| v as usize).collect()
884     }
885 
max_clock_freq(&self) -> cl_uint886     pub fn max_clock_freq(&self) -> cl_uint {
887         self.screen
888             .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY)
889     }
890 
max_compute_units(&self) -> cl_uint891     pub fn max_compute_units(&self) -> cl_uint {
892         self.screen
893             .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)
894     }
895 
max_grid_dimensions(&self) -> cl_uint896     pub fn max_grid_dimensions(&self) -> cl_uint {
897         ComputeParam::<u64>::compute_param(
898             self.screen.as_ref(),
899             pipe_compute_cap::PIPE_COMPUTE_CAP_GRID_DIMENSION,
900         ) as cl_uint
901     }
902 
max_mem_alloc(&self) -> cl_ulong903     pub fn max_mem_alloc(&self) -> cl_ulong {
904         // TODO: at the moment gallium doesn't support bigger buffers
905         min(
906             self.screen
907                 .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE),
908             0x80000000,
909         )
910     }
911 
max_samplers(&self) -> cl_uint912     pub fn max_samplers(&self) -> cl_uint {
913         self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS) as cl_uint
914     }
915 
max_threads_per_block(&self) -> usize916     pub fn max_threads_per_block(&self) -> usize {
917         ComputeParam::<u64>::compute_param(
918             self.screen.as_ref(),
919             pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK,
920         ) as usize
921     }
922 
param_max_size(&self) -> usize923     pub fn param_max_size(&self) -> usize {
924         min(
925             self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE) as u32,
926             4 * 1024,
927         ) as usize
928     }
929 
printf_buffer_size(&self) -> usize930     pub fn printf_buffer_size(&self) -> usize {
931         1024 * 1024
932     }
933 
pci_info(&self) -> Option<cl_device_pci_bus_info_khr>934     pub fn pci_info(&self) -> Option<cl_device_pci_bus_info_khr> {
935         if self.screen.device_type() != pipe_loader_device_type::PIPE_LOADER_DEVICE_PCI {
936             return None;
937         }
938 
939         let pci_domain = self.screen.param(pipe_cap::PIPE_CAP_PCI_GROUP) as cl_uint;
940         let pci_bus = self.screen.param(pipe_cap::PIPE_CAP_PCI_BUS) as cl_uint;
941         let pci_device = self.screen.param(pipe_cap::PIPE_CAP_PCI_DEVICE) as cl_uint;
942         let pci_function = self.screen.param(pipe_cap::PIPE_CAP_PCI_FUNCTION) as cl_uint;
943 
944         Some(cl_device_pci_bus_info_khr {
945             pci_domain,
946             pci_bus,
947             pci_device,
948             pci_function,
949         })
950     }
951 
screen(&self) -> &Arc<PipeScreen>952     pub fn screen(&self) -> &Arc<PipeScreen> {
953         &self.screen
954     }
955 
subgroup_sizes(&self) -> Vec<usize>956     pub fn subgroup_sizes(&self) -> Vec<usize> {
957         let subgroup_size = ComputeParam::<u32>::compute_param(
958             self.screen.as_ref(),
959             pipe_compute_cap::PIPE_COMPUTE_CAP_SUBGROUP_SIZES,
960         );
961 
962         SetBitIndices::from_msb(subgroup_size)
963             .map(|bit| 1 << bit)
964             .collect()
965     }
966 
max_subgroups(&self) -> u32967     pub fn max_subgroups(&self) -> u32 {
968         ComputeParam::<u32>::compute_param(
969             self.screen.as_ref(),
970             pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_SUBGROUPS,
971         )
972     }
973 
subgroups_supported(&self) -> bool974     pub fn subgroups_supported(&self) -> bool {
975         let subgroup_sizes = self.subgroup_sizes().len();
976 
977         // we need to be able to query a CSO for subgroup sizes if multiple sub group sizes are
978         // supported, doing it without shareable shaders isn't practical
979         self.max_subgroups() > 0
980             && (subgroup_sizes == 1 || (subgroup_sizes > 1 && self.shareable_shaders()))
981     }
982 
svm_supported(&self) -> bool983     pub fn svm_supported(&self) -> bool {
984         self.screen.param(pipe_cap::PIPE_CAP_SYSTEM_SVM) == 1
985     }
986 
timer_resolution(&self) -> usize987     pub fn timer_resolution(&self) -> usize {
988         self.screen.param(pipe_cap::PIPE_CAP_TIMER_RESOLUTION) as usize
989     }
990 
unified_memory(&self) -> bool991     pub fn unified_memory(&self) -> bool {
992         self.screen.param(pipe_cap::PIPE_CAP_UMA) == 1
993     }
994 
vendor_id(&self) -> cl_uint995     pub fn vendor_id(&self) -> cl_uint {
996         let id = self.screen.param(pipe_cap::PIPE_CAP_VENDOR_ID);
997         if id == -1 {
998             return 0;
999         }
1000         id as u32
1001     }
1002 
prefers_real_buffer_in_cb0(&self) -> bool1003     pub fn prefers_real_buffer_in_cb0(&self) -> bool {
1004         self.screen
1005             .param(pipe_cap::PIPE_CAP_PREFER_REAL_BUFFER_IN_CONSTBUF0)
1006             == 1
1007     }
1008 
shareable_shaders(&self) -> bool1009     pub fn shareable_shaders(&self) -> bool {
1010         self.screen.param(pipe_cap::PIPE_CAP_SHAREABLE_SHADERS) == 1
1011     }
1012 
images_as_deref(&self) -> bool1013     pub fn images_as_deref(&self) -> bool {
1014         self.screen.param(pipe_cap::PIPE_CAP_NIR_IMAGES_AS_DEREF) == 1
1015     }
1016 
samplers_as_deref(&self) -> bool1017     pub fn samplers_as_deref(&self) -> bool {
1018         self.screen.param(pipe_cap::PIPE_CAP_NIR_SAMPLERS_AS_DEREF) == 1
1019     }
1020 
helper_ctx(&self) -> impl HelperContextWrapper + '_1021     pub fn helper_ctx(&self) -> impl HelperContextWrapper + '_ {
1022         HelperContext {
1023             lock: self.helper_ctx.lock().unwrap(),
1024         }
1025     }
1026 
cl_features(&self) -> clc_optional_features1027     pub fn cl_features(&self) -> clc_optional_features {
1028         let subgroups_supported = self.subgroups_supported();
1029         clc_optional_features {
1030             fp16: self.fp16_supported(),
1031             fp64: self.fp64_supported(),
1032             int64: self.int64_supported(),
1033             images: self.image_supported(),
1034             images_read_write: self.image_read_write_supported(),
1035             images_write_3d: self.image_3d_write_supported(),
1036             integer_dot_product: true,
1037             subgroups: subgroups_supported,
1038             subgroups_shuffle: subgroups_supported,
1039             subgroups_shuffle_relative: subgroups_supported,
1040             ..Default::default()
1041         }
1042     }
1043 }
1044 
devs() -> &'static Vec<Device>1045 pub fn devs() -> &'static Vec<Device> {
1046     &Platform::get().devs
1047 }
1048 
get_devs_for_type(device_type: cl_device_type) -> Vec<&'static Device>1049 pub fn get_devs_for_type(device_type: cl_device_type) -> Vec<&'static Device> {
1050     devs()
1051         .iter()
1052         .filter(|d| device_type & d.device_type(true) != 0)
1053         .collect()
1054 }
1055 
get_dev_for_uuid(uuid: [c_char; UUID_SIZE]) -> Option<&'static Device>1056 pub fn get_dev_for_uuid(uuid: [c_char; UUID_SIZE]) -> Option<&'static Device> {
1057     devs().iter().find(|d| {
1058         let uuid: [c_uchar; UUID_SIZE] = unsafe { transmute(uuid) };
1059         uuid == d.screen().device_uuid().unwrap()
1060     })
1061 }
1062