1 use crate::api::icd::*;
2 use crate::api::util::*;
3 use crate::core::format::*;
4 use crate::core::platform::*;
5 use crate::core::util::*;
6 use crate::core::version::*;
7 use crate::impl_cl_type_trait_base;
8
9 use mesa_rust::compiler::clc::*;
10 use mesa_rust::compiler::nir::*;
11 use mesa_rust::pipe::context::*;
12 use mesa_rust::pipe::device::load_screens;
13 use mesa_rust::pipe::fence::*;
14 use mesa_rust::pipe::resource::*;
15 use mesa_rust::pipe::screen::*;
16 use mesa_rust::pipe::transfer::*;
17 use mesa_rust_gen::*;
18 use mesa_rust_util::math::SetBitIndices;
19 use mesa_rust_util::static_assert;
20 use rusticl_opencl_gen::*;
21
22 use std::cmp::max;
23 use std::cmp::min;
24 use std::collections::HashMap;
25 use std::convert::TryInto;
26 use std::env;
27 use std::ffi::CString;
28 use std::mem::transmute;
29 use std::os::raw::*;
30 use std::sync::Arc;
31 use std::sync::Mutex;
32 use std::sync::MutexGuard;
33
34 pub struct Device {
35 pub base: CLObjectBase<CL_INVALID_DEVICE>,
36 pub screen: Arc<PipeScreen>,
37 pub cl_version: CLVersion,
38 pub clc_version: CLVersion,
39 pub clc_versions: Vec<cl_name_version>,
40 pub custom: bool,
41 pub embedded: bool,
42 pub has_timestamp: bool, // Cached to keep API fast
43 pub extension_string: String,
44 pub extensions: Vec<cl_name_version>,
45 pub spirv_extensions: Vec<CString>,
46 pub clc_features: Vec<cl_name_version>,
47 pub formats: HashMap<cl_image_format, HashMap<cl_mem_object_type, cl_mem_flags>>,
48 pub lib_clc: NirShader,
49 helper_ctx: Mutex<PipeContext>,
50 }
51
52 pub trait HelperContextWrapper {
53 #[must_use]
exec<F>(&self, func: F) -> PipeFence where F: Fn(&HelperContext)54 fn exec<F>(&self, func: F) -> PipeFence
55 where
56 F: Fn(&HelperContext);
57
buffer_map_directly( &self, res: &PipeResource, offset: i32, size: i32, rw: RWFlags, ) -> Option<PipeTransfer>58 fn buffer_map_directly(
59 &self,
60 res: &PipeResource,
61 offset: i32,
62 size: i32,
63 rw: RWFlags,
64 ) -> Option<PipeTransfer>;
65
buffer_map_coherent( &self, res: &PipeResource, offset: i32, size: i32, rw: RWFlags, ) -> Option<PipeTransfer>66 fn buffer_map_coherent(
67 &self,
68 res: &PipeResource,
69 offset: i32,
70 size: i32,
71 rw: RWFlags,
72 ) -> Option<PipeTransfer>;
73
texture_map_directly( &self, res: &PipeResource, bx: &pipe_box, rw: RWFlags, ) -> Option<PipeTransfer>74 fn texture_map_directly(
75 &self,
76 res: &PipeResource,
77 bx: &pipe_box,
78 rw: RWFlags,
79 ) -> Option<PipeTransfer>;
80
texture_map_coherent( &self, res: &PipeResource, bx: &pipe_box, rw: RWFlags, ) -> Option<PipeTransfer>81 fn texture_map_coherent(
82 &self,
83 res: &PipeResource,
84 bx: &pipe_box,
85 rw: RWFlags,
86 ) -> Option<PipeTransfer>;
87
create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void88 fn create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void;
delete_compute_state(&self, cso: *mut c_void)89 fn delete_compute_state(&self, cso: *mut c_void);
compute_state_info(&self, state: *mut c_void) -> pipe_compute_state_object_info90 fn compute_state_info(&self, state: *mut c_void) -> pipe_compute_state_object_info;
compute_state_subgroup_size(&self, state: *mut c_void, block: &[u32; 3]) -> u3291 fn compute_state_subgroup_size(&self, state: *mut c_void, block: &[u32; 3]) -> u32;
92
unmap(&self, tx: PipeTransfer)93 fn unmap(&self, tx: PipeTransfer);
94
is_create_fence_fd_supported(&self) -> bool95 fn is_create_fence_fd_supported(&self) -> bool;
import_fence(&self, fence_fd: &FenceFd) -> PipeFence96 fn import_fence(&self, fence_fd: &FenceFd) -> PipeFence;
97 }
98
99 pub struct HelperContext<'a> {
100 lock: MutexGuard<'a, PipeContext>,
101 }
102
103 impl<'a> HelperContext<'a> {
resource_copy_region( &self, src: &PipeResource, dst: &PipeResource, dst_offset: &[u32; 3], bx: &pipe_box, )104 pub fn resource_copy_region(
105 &self,
106 src: &PipeResource,
107 dst: &PipeResource,
108 dst_offset: &[u32; 3],
109 bx: &pipe_box,
110 ) {
111 self.lock.resource_copy_region(src, dst, dst_offset, bx);
112 }
113
buffer_subdata( &self, res: &PipeResource, offset: c_uint, data: *const c_void, size: c_uint, )114 pub fn buffer_subdata(
115 &self,
116 res: &PipeResource,
117 offset: c_uint,
118 data: *const c_void,
119 size: c_uint,
120 ) {
121 self.lock.buffer_subdata(res, offset, data, size)
122 }
123
texture_subdata( &self, res: &PipeResource, bx: &pipe_box, data: *const c_void, stride: u32, layer_stride: usize, )124 pub fn texture_subdata(
125 &self,
126 res: &PipeResource,
127 bx: &pipe_box,
128 data: *const c_void,
129 stride: u32,
130 layer_stride: usize,
131 ) {
132 self.lock
133 .texture_subdata(res, bx, data, stride, layer_stride)
134 }
135 }
136
137 impl<'a> HelperContextWrapper for HelperContext<'a> {
exec<F>(&self, func: F) -> PipeFence where F: Fn(&HelperContext),138 fn exec<F>(&self, func: F) -> PipeFence
139 where
140 F: Fn(&HelperContext),
141 {
142 func(self);
143 self.lock.flush()
144 }
145
buffer_map_directly( &self, res: &PipeResource, offset: i32, size: i32, rw: RWFlags, ) -> Option<PipeTransfer>146 fn buffer_map_directly(
147 &self,
148 res: &PipeResource,
149 offset: i32,
150 size: i32,
151 rw: RWFlags,
152 ) -> Option<PipeTransfer> {
153 self.lock.buffer_map_directly(res, offset, size, rw)
154 }
155
buffer_map_coherent( &self, res: &PipeResource, offset: i32, size: i32, rw: RWFlags, ) -> Option<PipeTransfer>156 fn buffer_map_coherent(
157 &self,
158 res: &PipeResource,
159 offset: i32,
160 size: i32,
161 rw: RWFlags,
162 ) -> Option<PipeTransfer> {
163 self.lock
164 .buffer_map(res, offset, size, rw, ResourceMapType::Coherent)
165 }
166
texture_map_directly( &self, res: &PipeResource, bx: &pipe_box, rw: RWFlags, ) -> Option<PipeTransfer>167 fn texture_map_directly(
168 &self,
169 res: &PipeResource,
170 bx: &pipe_box,
171 rw: RWFlags,
172 ) -> Option<PipeTransfer> {
173 self.lock.texture_map_directly(res, bx, rw)
174 }
175
texture_map_coherent( &self, res: &PipeResource, bx: &pipe_box, rw: RWFlags, ) -> Option<PipeTransfer>176 fn texture_map_coherent(
177 &self,
178 res: &PipeResource,
179 bx: &pipe_box,
180 rw: RWFlags,
181 ) -> Option<PipeTransfer> {
182 self.lock
183 .texture_map(res, bx, rw, ResourceMapType::Coherent)
184 }
185
create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void186 fn create_compute_state(&self, nir: &NirShader, static_local_mem: u32) -> *mut c_void {
187 self.lock.create_compute_state(nir, static_local_mem)
188 }
189
delete_compute_state(&self, cso: *mut c_void)190 fn delete_compute_state(&self, cso: *mut c_void) {
191 self.lock.delete_compute_state(cso)
192 }
193
compute_state_info(&self, state: *mut c_void) -> pipe_compute_state_object_info194 fn compute_state_info(&self, state: *mut c_void) -> pipe_compute_state_object_info {
195 self.lock.compute_state_info(state)
196 }
197
compute_state_subgroup_size(&self, state: *mut c_void, block: &[u32; 3]) -> u32198 fn compute_state_subgroup_size(&self, state: *mut c_void, block: &[u32; 3]) -> u32 {
199 self.lock.compute_state_subgroup_size(state, block)
200 }
201
unmap(&self, tx: PipeTransfer)202 fn unmap(&self, tx: PipeTransfer) {
203 tx.with_ctx(&self.lock);
204 }
205
is_create_fence_fd_supported(&self) -> bool206 fn is_create_fence_fd_supported(&self) -> bool {
207 self.lock.is_create_fence_fd_supported()
208 }
209
import_fence(&self, fd: &FenceFd) -> PipeFence210 fn import_fence(&self, fd: &FenceFd) -> PipeFence {
211 self.lock.import_fence(fd)
212 }
213 }
214
215 impl_cl_type_trait_base!(cl_device_id, Device, [Device], CL_INVALID_DEVICE);
216
217 impl Device {
new(screen: PipeScreen) -> Option<Device>218 fn new(screen: PipeScreen) -> Option<Device> {
219 if !Self::check_valid(&screen) {
220 return None;
221 }
222
223 let screen = Arc::new(screen);
224 // Create before loading libclc as llvmpipe only creates the shader cache with the first
225 // context being created.
226 let helper_ctx = screen.create_context()?;
227 let lib_clc = spirv::SPIRVBin::get_lib_clc(&screen);
228 if lib_clc.is_none() {
229 eprintln!("Libclc failed to load. Please make sure it is installed and provides spirv-mesa3d-.spv and/or spirv64-mesa3d-.spv");
230 }
231
232 let mut d = Self {
233 base: CLObjectBase::new(RusticlTypes::Device),
234 helper_ctx: Mutex::new(helper_ctx),
235 screen: screen,
236 cl_version: CLVersion::Cl3_0,
237 clc_version: CLVersion::Cl3_0,
238 clc_versions: Vec::new(),
239 custom: false,
240 embedded: false,
241 has_timestamp: false,
242 extension_string: String::from(""),
243 extensions: Vec::new(),
244 spirv_extensions: Vec::new(),
245 clc_features: Vec::new(),
246 formats: HashMap::new(),
247 lib_clc: lib_clc?,
248 };
249
250 d.fill_format_tables();
251
252 // check if we are embedded or full profile first
253 d.embedded = d.check_embedded_profile();
254
255 // check if we have to report it as a custom device
256 d.custom = d.check_custom();
257
258 let cap_timestamp = d.screen.param(pipe_cap::PIPE_CAP_QUERY_TIMESTAMP);
259 let cap_timestamp_res = d.timer_resolution();
260 d.has_timestamp = cap_timestamp != 0 && cap_timestamp_res > 0;
261
262 // query supported extensions
263 d.fill_extensions();
264
265 // now figure out what version we are
266 d.check_version();
267
268 Some(d)
269 }
270
271 /// Converts a temporary reference to a static if and only if this device lives inside static
272 /// memory.
to_static(&self) -> Option<&'static Self>273 pub fn to_static(&self) -> Option<&'static Self> {
274 devs().iter().find(|&dev| self == dev)
275 }
276
fill_format_tables(&mut self)277 fn fill_format_tables(&mut self) {
278 for f in FORMATS {
279 let mut fs = HashMap::new();
280 for t in CL_IMAGE_TYPES {
281 // the CTS doesn't test them, so let's not advertize them by accident if they are
282 // broken
283 if t == CL_MEM_OBJECT_IMAGE1D_BUFFER
284 && [CL_RGB, CL_RGBx].contains(&f.cl_image_format.image_channel_order)
285 && ![CL_UNORM_SHORT_565, CL_UNORM_SHORT_555]
286 .contains(&f.cl_image_format.image_channel_data_type)
287 {
288 continue;
289 }
290
291 let mut flags: cl_uint = 0;
292 if self.screen.is_format_supported(
293 f.pipe,
294 cl_mem_type_to_texture_target(t),
295 PIPE_BIND_SAMPLER_VIEW,
296 ) {
297 flags |= CL_MEM_READ_ONLY;
298 }
299
300 // TODO: cl_khr_srgb_image_writes
301 if !f.is_srgb
302 && self.screen.is_format_supported(
303 f.pipe,
304 cl_mem_type_to_texture_target(t),
305 PIPE_BIND_SHADER_IMAGE,
306 )
307 {
308 flags |= CL_MEM_WRITE_ONLY;
309 // TODO: enable once we support it
310 // flags |= CL_MEM_KERNEL_READ_AND_WRITE;
311 }
312
313 // TODO: cl_khr_srgb_image_writes
314 if !f.is_srgb
315 && self.screen.is_format_supported(
316 f.pipe,
317 cl_mem_type_to_texture_target(t),
318 PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE,
319 )
320 {
321 flags |= CL_MEM_READ_WRITE;
322 }
323
324 fs.insert(t, flags as cl_mem_flags);
325 }
326 self.formats.insert(f.cl_image_format, fs);
327 }
328 }
329
check_valid(screen: &PipeScreen) -> bool330 fn check_valid(screen: &PipeScreen) -> bool {
331 if screen.param(pipe_cap::PIPE_CAP_COMPUTE) == 0
332 || screen.shader_param(
333 pipe_shader_type::PIPE_SHADER_COMPUTE,
334 pipe_shader_cap::PIPE_SHADER_CAP_SUPPORTED_IRS,
335 ) & (1 << (pipe_shader_ir::PIPE_SHADER_IR_NIR as i32))
336 == 0
337 {
338 return false;
339 }
340
341 // CL_DEVICE_MAX_PARAMETER_SIZE
342 // For this minimum value, only a maximum of 128 arguments can be passed to a kernel
343 if (screen.shader_param(
344 pipe_shader_type::PIPE_SHADER_COMPUTE,
345 pipe_shader_cap::PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE,
346 ) as u32)
347 < 128
348 {
349 return false;
350 }
351 true
352 }
353
check_custom(&self) -> bool354 fn check_custom(&self) -> bool {
355 // Max size of memory object allocation in bytes. The minimum value is
356 // max(min(1024 × 1024 × 1024, 1/4th of CL_DEVICE_GLOBAL_MEM_SIZE), 32 × 1024 × 1024)
357 // for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
358 let mut limit = min(1024 * 1024 * 1024, self.global_mem_size() / 4);
359 limit = max(limit, 32 * 1024 * 1024);
360 if self.max_mem_alloc() < limit {
361 return true;
362 }
363
364 // CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS
365 // The minimum value is 3 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
366 if self.max_grid_dimensions() < 3 {
367 return true;
368 }
369
370 if self.embedded {
371 // CL_DEVICE_MAX_PARAMETER_SIZE
372 // The minimum value is 256 bytes for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
373 if self.param_max_size() < 256 {
374 return true;
375 }
376
377 // CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
378 // The minimum value is 1 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
379 if self.const_max_size() < 1024 {
380 return true;
381 }
382
383 // TODO
384 // CL_DEVICE_MAX_CONSTANT_ARGS
385 // The minimum value is 4 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
386
387 // CL_DEVICE_LOCAL_MEM_SIZE
388 // The minimum value is 1 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
389 if self.local_mem_size() < 1024 {
390 return true;
391 }
392 } else {
393 // CL 1.0 spec:
394 // CL_DEVICE_MAX_PARAMETER_SIZE
395 // The minimum value is 256 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
396 if self.param_max_size() < 256 {
397 return true;
398 }
399
400 // CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
401 // The minimum value is 64 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
402 if self.const_max_size() < 64 * 1024 {
403 return true;
404 }
405
406 // TODO
407 // CL_DEVICE_MAX_CONSTANT_ARGS
408 // The minimum value is 8 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
409
410 // CL 1.0 spec:
411 // CL_DEVICE_LOCAL_MEM_SIZE
412 // The minimum value is 16 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
413 if self.local_mem_size() < 16 * 1024 {
414 return true;
415 }
416 }
417
418 false
419 }
420
check_embedded_profile(&self) -> bool421 fn check_embedded_profile(&self) -> bool {
422 if self.image_supported() {
423 // The minimum value is 16 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
424 if self.max_samplers() < 16 ||
425 // The minimum value is 128 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
426 self.image_read_count() < 128 ||
427 // The minimum value is 64 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
428 self.image_write_count() < 64 ||
429 // The minimum value is 16384 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
430 self.image_2d_size() < 16384 ||
431 // The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
432 self.image_array_size() < 2048 ||
433 // The minimum value is 65536 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
434 self.image_buffer_size() < 65536
435 {
436 return true;
437 }
438
439 // TODO check req formats
440 }
441 !self.int64_supported()
442 }
443
parse_env_device_type() -> Option<cl_device_type>444 fn parse_env_device_type() -> Option<cl_device_type> {
445 let mut val = env::var("RUSTICL_DEVICE_TYPE").ok()?;
446 val.make_ascii_lowercase();
447 Some(
448 match &*val {
449 "accelerator" => CL_DEVICE_TYPE_ACCELERATOR,
450 "cpu" => CL_DEVICE_TYPE_CPU,
451 "custom" => CL_DEVICE_TYPE_CUSTOM,
452 "gpu" => CL_DEVICE_TYPE_GPU,
453 _ => return None,
454 }
455 .into(),
456 )
457 }
458
parse_env_version() -> Option<CLVersion>459 fn parse_env_version() -> Option<CLVersion> {
460 let val = env::var("RUSTICL_CL_VERSION").ok()?;
461 let (major, minor) = val.split_once('.')?;
462 let major = major.parse().ok()?;
463 let minor = minor.parse().ok()?;
464 mk_cl_version(major, minor, 0).try_into().ok()
465 }
466
467 // TODO add CLC checks
check_version(&mut self)468 fn check_version(&mut self) {
469 let exts: Vec<&str> = self.extension_string.split(' ').collect();
470 let mut res = CLVersion::Cl3_0;
471
472 if self.embedded {
473 if self.image_supported() {
474 let supports_array_writes = !FORMATS
475 .iter()
476 .filter(|f| f.req_for_embeded_read_or_write)
477 .map(|f| self.formats.get(&f.cl_image_format).unwrap())
478 .map(|f| f.get(&CL_MEM_OBJECT_IMAGE2D_ARRAY).unwrap())
479 .any(|f| *f & cl_mem_flags::from(CL_MEM_WRITE_ONLY) == 0);
480 if self.image_3d_size() < 2048 || !supports_array_writes {
481 res = CLVersion::Cl1_2;
482 }
483 }
484 }
485
486 // TODO: check image 1D, 1Dbuffer, 1Darray and 2Darray support explicitly
487 if self.image_supported() {
488 // The minimum value is 256 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
489 if self.image_array_size() < 256 ||
490 // The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
491 self.image_buffer_size() < 2048
492 {
493 res = CLVersion::Cl1_1;
494 }
495 }
496
497 if self.embedded {
498 // The minimum value for the EMBEDDED profile is 1 KB.
499 if self.printf_buffer_size() < 1024 {
500 res = CLVersion::Cl1_1;
501 }
502 } else {
503 // The minimum value for the FULL profile is 1 MB.
504 if self.printf_buffer_size() < 1024 * 1024 {
505 res = CLVersion::Cl1_1;
506 }
507 }
508
509 if !exts.contains(&"cl_khr_byte_addressable_store")
510 || !exts.contains(&"cl_khr_global_int32_base_atomics")
511 || !exts.contains(&"cl_khr_global_int32_extended_atomics")
512 || !exts.contains(&"cl_khr_local_int32_base_atomics")
513 || !exts.contains(&"cl_khr_local_int32_extended_atomics")
514 // The following modifications are made to the OpenCL 1.1 platform layer and runtime (sections 4 and 5):
515 // The minimum FULL_PROFILE value for CL_DEVICE_MAX_PARAMETER_SIZE increased from 256 to 1024 bytes
516 || self.param_max_size() < 1024
517 // The minimum FULL_PROFILE value for CL_DEVICE_LOCAL_MEM_SIZE increased from 16 KB to 32 KB.
518 || self.local_mem_size() < 32 * 1024
519 {
520 res = CLVersion::Cl1_0;
521 }
522
523 if let Some(val) = Self::parse_env_version() {
524 res = val;
525 }
526
527 if res >= CLVersion::Cl3_0 {
528 self.clc_versions
529 .push(mk_cl_version_ext(3, 0, 0, "OpenCL C"));
530 }
531
532 if res >= CLVersion::Cl1_2 {
533 self.clc_versions
534 .push(mk_cl_version_ext(1, 2, 0, "OpenCL C"));
535 }
536
537 if res >= CLVersion::Cl1_1 {
538 self.clc_versions
539 .push(mk_cl_version_ext(1, 1, 0, "OpenCL C"));
540 }
541
542 if res >= CLVersion::Cl1_0 {
543 self.clc_versions
544 .push(mk_cl_version_ext(1, 0, 0, "OpenCL C"));
545 }
546
547 self.cl_version = res;
548 self.clc_version = min(CLVersion::Cl1_2, res);
549 }
550
fill_extensions(&mut self)551 fn fill_extensions(&mut self) {
552 let mut exts_str: Vec<String> = Vec::new();
553 let mut exts = PLATFORM_EXTENSIONS.to_vec();
554 let mut feats = Vec::new();
555 let mut spirv_exts = Vec::new();
556 let mut add_ext = |major, minor, patch, ext: &str| {
557 exts.push(mk_cl_version_ext(major, minor, patch, ext));
558 exts_str.push(ext.to_owned());
559 };
560 let mut add_feat = |major, minor, patch, feat: &str| {
561 feats.push(mk_cl_version_ext(major, minor, patch, feat));
562 };
563 let mut add_spirv = |ext: &str| {
564 spirv_exts.push(CString::new(ext).unwrap());
565 };
566
567 // add extensions all drivers support for now
568 add_ext(1, 0, 0, "cl_khr_global_int32_base_atomics");
569 add_ext(1, 0, 0, "cl_khr_global_int32_extended_atomics");
570 add_ext(2, 0, 0, "cl_khr_integer_dot_product");
571 add_feat(
572 2,
573 0,
574 0,
575 "__opencl_c_integer_dot_product_input_4x8bit_packed",
576 );
577 add_feat(2, 0, 0, "__opencl_c_integer_dot_product_input_4x8bit");
578 add_ext(1, 0, 0, "cl_khr_local_int32_base_atomics");
579 add_ext(1, 0, 0, "cl_khr_local_int32_extended_atomics");
580
581 add_spirv("SPV_KHR_expect_assume");
582 add_spirv("SPV_KHR_float_controls");
583 add_spirv("SPV_KHR_integer_dot_product");
584 add_spirv("SPV_KHR_no_integer_wrap_decoration");
585
586 if self.fp16_supported() {
587 add_ext(1, 0, 0, "cl_khr_fp16");
588 }
589
590 if self.fp64_supported() {
591 add_ext(1, 0, 0, "cl_khr_fp64");
592 add_feat(1, 0, 0, "__opencl_c_fp64");
593 }
594
595 if self.is_gl_sharing_supported() {
596 add_ext(1, 0, 0, "cl_khr_gl_sharing");
597 }
598
599 if self.int64_supported() {
600 if self.embedded {
601 add_ext(1, 0, 0, "cles_khr_int64");
602 };
603
604 add_feat(1, 0, 0, "__opencl_c_int64");
605 }
606
607 if self.image_supported() {
608 add_feat(1, 0, 0, "__opencl_c_images");
609
610 if self.image2d_from_buffer_supported() {
611 add_ext(1, 0, 0, "cl_khr_image2d_from_buffer");
612 }
613
614 if self.image_read_write_supported() {
615 add_feat(1, 0, 0, "__opencl_c_read_write_images");
616 }
617
618 if self.image_3d_write_supported() {
619 add_ext(1, 0, 0, "cl_khr_3d_image_writes");
620 add_feat(1, 0, 0, "__opencl_c_3d_image_writes");
621 }
622 }
623
624 if self.pci_info().is_some() {
625 add_ext(1, 0, 0, "cl_khr_pci_bus_info");
626 }
627
628 if self.screen().device_uuid().is_some() && self.screen().driver_uuid().is_some() {
629 static_assert!(PIPE_UUID_SIZE == CL_UUID_SIZE_KHR);
630 static_assert!(PIPE_LUID_SIZE == CL_LUID_SIZE_KHR);
631
632 add_ext(1, 0, 0, "cl_khr_device_uuid");
633 }
634
635 if self.subgroups_supported() {
636 // requires CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS
637 //add_ext(1, 0, 0, "cl_khr_subgroups");
638 add_feat(1, 0, 0, "__opencl_c_subgroups");
639
640 // we have lowering in `nir_lower_subgroups`, drivers can just use that
641 add_ext(1, 0, 0, "cl_khr_subgroup_shuffle");
642 add_ext(1, 0, 0, "cl_khr_subgroup_shuffle_relative");
643 }
644
645 if self.svm_supported() {
646 add_ext(1, 0, 0, "cl_arm_shared_virtual_memory");
647 }
648
649 self.extensions = exts;
650 self.clc_features = feats;
651 self.extension_string = format!("{} {}", PLATFORM_EXTENSION_STR, exts_str.join(" "));
652 self.spirv_extensions = spirv_exts;
653 }
654
shader_param(&self, cap: pipe_shader_cap) -> i32655 fn shader_param(&self, cap: pipe_shader_cap) -> i32 {
656 self.screen
657 .shader_param(pipe_shader_type::PIPE_SHADER_COMPUTE, cap)
658 }
659
all() -> impl Iterator<Item = Device>660 pub fn all() -> impl Iterator<Item = Device> {
661 load_screens().filter_map(Device::new)
662 }
663
address_bits(&self) -> cl_uint664 pub fn address_bits(&self) -> cl_uint {
665 self.screen
666 .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_ADDRESS_BITS)
667 }
668
const_max_size(&self) -> cl_ulong669 pub fn const_max_size(&self) -> cl_ulong {
670 min(
671 // Needed to fix the `api min_max_constant_buffer_size` CL CTS test as it can't really
672 // handle arbitrary values here. We might want to reconsider later and figure out how to
673 // advertize higher values without tripping of the test.
674 // should be at least 1 << 16 (native UBO size on NVidia)
675 // advertising more just in case it benefits other hardware
676 1 << 26,
677 min(
678 self.max_mem_alloc(),
679 self.screen
680 .param(pipe_cap::PIPE_CAP_MAX_SHADER_BUFFER_SIZE_UINT) as u64,
681 ),
682 )
683 }
684
const_max_count(&self) -> cl_uint685 pub fn const_max_count(&self) -> cl_uint {
686 self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_MAX_CONST_BUFFERS) as cl_uint
687 }
688
device_type(&self, internal: bool) -> cl_device_type689 pub fn device_type(&self, internal: bool) -> cl_device_type {
690 if let Some(env) = Self::parse_env_device_type() {
691 return env;
692 }
693
694 if self.custom {
695 return CL_DEVICE_TYPE_CUSTOM as cl_device_type;
696 }
697 let mut res = match self.screen.device_type() {
698 pipe_loader_device_type::PIPE_LOADER_DEVICE_SOFTWARE => CL_DEVICE_TYPE_CPU,
699 pipe_loader_device_type::PIPE_LOADER_DEVICE_PCI => CL_DEVICE_TYPE_GPU,
700 pipe_loader_device_type::PIPE_LOADER_DEVICE_PLATFORM => CL_DEVICE_TYPE_GPU,
701 pipe_loader_device_type::NUM_PIPE_LOADER_DEVICE_TYPES => CL_DEVICE_TYPE_CUSTOM,
702 };
703
704 if internal && res == CL_DEVICE_TYPE_GPU && self.screen.driver_name() != "zink" {
705 res |= CL_DEVICE_TYPE_DEFAULT;
706 }
707
708 res as cl_device_type
709 }
710
fp16_supported(&self) -> bool711 pub fn fp16_supported(&self) -> bool {
712 if !Platform::features().fp16 {
713 return false;
714 }
715
716 self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_FP16) != 0
717 }
718
fp64_supported(&self) -> bool719 pub fn fp64_supported(&self) -> bool {
720 if !Platform::features().fp64 {
721 return false;
722 }
723
724 self.screen.param(pipe_cap::PIPE_CAP_DOUBLES) == 1
725 }
726
is_gl_sharing_supported(&self) -> bool727 pub fn is_gl_sharing_supported(&self) -> bool {
728 self.screen.param(pipe_cap::PIPE_CAP_CL_GL_SHARING) != 0
729 && self.screen.param(pipe_cap::PIPE_CAP_DMABUF) != 0
730 && !self.is_device_software()
731 && self.screen.is_res_handle_supported()
732 && self.screen.device_uuid().is_some()
733 && self.helper_ctx().is_create_fence_fd_supported()
734 }
735
is_device_software(&self) -> bool736 pub fn is_device_software(&self) -> bool {
737 self.screen.device_type() == pipe_loader_device_type::PIPE_LOADER_DEVICE_SOFTWARE
738 }
739
get_nir_options(&self) -> nir_shader_compiler_options740 pub fn get_nir_options(&self) -> nir_shader_compiler_options {
741 unsafe {
742 *self
743 .screen
744 .nir_shader_compiler_options(pipe_shader_type::PIPE_SHADER_COMPUTE)
745 }
746 }
747
sdot_4x8_supported(&self) -> bool748 pub fn sdot_4x8_supported(&self) -> bool {
749 self.get_nir_options().has_sdot_4x8
750 }
751
udot_4x8_supported(&self) -> bool752 pub fn udot_4x8_supported(&self) -> bool {
753 self.get_nir_options().has_udot_4x8
754 }
755
sudot_4x8_supported(&self) -> bool756 pub fn sudot_4x8_supported(&self) -> bool {
757 self.get_nir_options().has_sudot_4x8
758 }
759
pack_32_4x8_supported(&self) -> bool760 pub fn pack_32_4x8_supported(&self) -> bool {
761 self.get_nir_options().has_pack_32_4x8
762 }
763
sdot_4x8_sat_supported(&self) -> bool764 pub fn sdot_4x8_sat_supported(&self) -> bool {
765 self.get_nir_options().has_sdot_4x8_sat
766 }
767
udot_4x8_sat_supported(&self) -> bool768 pub fn udot_4x8_sat_supported(&self) -> bool {
769 self.get_nir_options().has_udot_4x8_sat
770 }
771
sudot_4x8_sat_supported(&self) -> bool772 pub fn sudot_4x8_sat_supported(&self) -> bool {
773 self.get_nir_options().has_sudot_4x8_sat
774 }
775
fp64_is_softfp(&self) -> bool776 pub fn fp64_is_softfp(&self) -> bool {
777 bit_check(
778 self.get_nir_options().lower_doubles_options as u32,
779 nir_lower_doubles_options::nir_lower_fp64_full_software as u32,
780 )
781 }
782
int64_supported(&self) -> bool783 pub fn int64_supported(&self) -> bool {
784 self.screen.param(pipe_cap::PIPE_CAP_INT64) == 1
785 }
786
global_mem_size(&self) -> cl_ulong787 pub fn global_mem_size(&self) -> cl_ulong {
788 self.screen
789 .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE)
790 }
791
image_2d_size(&self) -> usize792 pub fn image_2d_size(&self) -> usize {
793 self.screen.param(pipe_cap::PIPE_CAP_MAX_TEXTURE_2D_SIZE) as usize
794 }
795
image_3d_size(&self) -> usize796 pub fn image_3d_size(&self) -> usize {
797 1 << (self.screen.param(pipe_cap::PIPE_CAP_MAX_TEXTURE_3D_LEVELS) - 1)
798 }
799
image_3d_supported(&self) -> bool800 pub fn image_3d_supported(&self) -> bool {
801 self.screen.param(pipe_cap::PIPE_CAP_MAX_TEXTURE_3D_LEVELS) != 0
802 }
803
image_array_size(&self) -> usize804 pub fn image_array_size(&self) -> usize {
805 self.screen
806 .param(pipe_cap::PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS) as usize
807 }
808
image_pitch_alignment(&self) -> cl_uint809 pub fn image_pitch_alignment(&self) -> cl_uint {
810 self.screen
811 .param(pipe_cap::PIPE_CAP_LINEAR_IMAGE_PITCH_ALIGNMENT) as u32
812 }
813
image_base_address_alignment(&self) -> cl_uint814 pub fn image_base_address_alignment(&self) -> cl_uint {
815 self.screen
816 .param(pipe_cap::PIPE_CAP_LINEAR_IMAGE_BASE_ADDRESS_ALIGNMENT) as u32
817 }
818
image_buffer_size(&self) -> usize819 pub fn image_buffer_size(&self) -> usize {
820 min(
821 // the CTS requires it to not exceed `CL_MAX_MEM_ALLOC_SIZE`
822 self.max_mem_alloc(),
823 self.screen
824 .param(pipe_cap::PIPE_CAP_MAX_TEXEL_BUFFER_ELEMENTS_UINT) as cl_ulong,
825 ) as usize
826 }
827
image_read_count(&self) -> cl_uint828 pub fn image_read_count(&self) -> cl_uint {
829 self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS) as cl_uint
830 }
831
image2d_from_buffer_supported(&self) -> bool832 pub fn image2d_from_buffer_supported(&self) -> bool {
833 self.image_pitch_alignment() != 0 && self.image_base_address_alignment() != 0
834 }
835
image_supported(&self) -> bool836 pub fn image_supported(&self) -> bool {
837 // TODO check CL_DEVICE_IMAGE_SUPPORT reqs
838 self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_MAX_SHADER_IMAGES) != 0 &&
839 // The minimum value is 8 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
840 self.image_read_count() >= 8 &&
841 // The minimum value is 8 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
842 self.image_write_count() >= 8 &&
843 // The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
844 self.image_2d_size() >= 2048
845 }
846
image_read_write_supported(&self) -> bool847 pub fn image_read_write_supported(&self) -> bool {
848 !FORMATS
849 .iter()
850 .filter(|f| f.req_for_full_read_and_write)
851 .map(|f| self.formats.get(&f.cl_image_format).unwrap())
852 .map(|f| f.get(&CL_MEM_OBJECT_IMAGE3D).unwrap())
853 .any(|f| *f & cl_mem_flags::from(CL_MEM_KERNEL_READ_AND_WRITE) == 0)
854 }
855
image_3d_write_supported(&self) -> bool856 pub fn image_3d_write_supported(&self) -> bool {
857 !FORMATS
858 .iter()
859 .filter(|f| f.req_for_full_read_or_write)
860 .map(|f| self.formats.get(&f.cl_image_format).unwrap())
861 .map(|f| f.get(&CL_MEM_OBJECT_IMAGE3D).unwrap())
862 .any(|f| *f & cl_mem_flags::from(CL_MEM_WRITE_ONLY) == 0)
863 }
864
image_write_count(&self) -> cl_uint865 pub fn image_write_count(&self) -> cl_uint {
866 self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_MAX_SHADER_IMAGES) as cl_uint
867 }
868
little_endian(&self) -> bool869 pub fn little_endian(&self) -> bool {
870 let endianness = self.screen.param(pipe_cap::PIPE_CAP_ENDIANNESS);
871 endianness == (pipe_endian::PIPE_ENDIAN_LITTLE as i32)
872 }
873
local_mem_size(&self) -> cl_ulong874 pub fn local_mem_size(&self) -> cl_ulong {
875 self.screen
876 .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE)
877 }
878
max_block_sizes(&self) -> Vec<usize>879 pub fn max_block_sizes(&self) -> Vec<usize> {
880 let v: Vec<u64> = self
881 .screen
882 .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);
883 v.into_iter().map(|v| v as usize).collect()
884 }
885
max_clock_freq(&self) -> cl_uint886 pub fn max_clock_freq(&self) -> cl_uint {
887 self.screen
888 .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY)
889 }
890
max_compute_units(&self) -> cl_uint891 pub fn max_compute_units(&self) -> cl_uint {
892 self.screen
893 .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)
894 }
895
max_grid_dimensions(&self) -> cl_uint896 pub fn max_grid_dimensions(&self) -> cl_uint {
897 ComputeParam::<u64>::compute_param(
898 self.screen.as_ref(),
899 pipe_compute_cap::PIPE_COMPUTE_CAP_GRID_DIMENSION,
900 ) as cl_uint
901 }
902
max_mem_alloc(&self) -> cl_ulong903 pub fn max_mem_alloc(&self) -> cl_ulong {
904 // TODO: at the moment gallium doesn't support bigger buffers
905 min(
906 self.screen
907 .compute_param(pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE),
908 0x80000000,
909 )
910 }
911
max_samplers(&self) -> cl_uint912 pub fn max_samplers(&self) -> cl_uint {
913 self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS) as cl_uint
914 }
915
max_threads_per_block(&self) -> usize916 pub fn max_threads_per_block(&self) -> usize {
917 ComputeParam::<u64>::compute_param(
918 self.screen.as_ref(),
919 pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK,
920 ) as usize
921 }
922
param_max_size(&self) -> usize923 pub fn param_max_size(&self) -> usize {
924 min(
925 self.shader_param(pipe_shader_cap::PIPE_SHADER_CAP_MAX_CONST_BUFFER0_SIZE) as u32,
926 4 * 1024,
927 ) as usize
928 }
929
printf_buffer_size(&self) -> usize930 pub fn printf_buffer_size(&self) -> usize {
931 1024 * 1024
932 }
933
pci_info(&self) -> Option<cl_device_pci_bus_info_khr>934 pub fn pci_info(&self) -> Option<cl_device_pci_bus_info_khr> {
935 if self.screen.device_type() != pipe_loader_device_type::PIPE_LOADER_DEVICE_PCI {
936 return None;
937 }
938
939 let pci_domain = self.screen.param(pipe_cap::PIPE_CAP_PCI_GROUP) as cl_uint;
940 let pci_bus = self.screen.param(pipe_cap::PIPE_CAP_PCI_BUS) as cl_uint;
941 let pci_device = self.screen.param(pipe_cap::PIPE_CAP_PCI_DEVICE) as cl_uint;
942 let pci_function = self.screen.param(pipe_cap::PIPE_CAP_PCI_FUNCTION) as cl_uint;
943
944 Some(cl_device_pci_bus_info_khr {
945 pci_domain,
946 pci_bus,
947 pci_device,
948 pci_function,
949 })
950 }
951
screen(&self) -> &Arc<PipeScreen>952 pub fn screen(&self) -> &Arc<PipeScreen> {
953 &self.screen
954 }
955
subgroup_sizes(&self) -> Vec<usize>956 pub fn subgroup_sizes(&self) -> Vec<usize> {
957 let subgroup_size = ComputeParam::<u32>::compute_param(
958 self.screen.as_ref(),
959 pipe_compute_cap::PIPE_COMPUTE_CAP_SUBGROUP_SIZES,
960 );
961
962 SetBitIndices::from_msb(subgroup_size)
963 .map(|bit| 1 << bit)
964 .collect()
965 }
966
max_subgroups(&self) -> u32967 pub fn max_subgroups(&self) -> u32 {
968 ComputeParam::<u32>::compute_param(
969 self.screen.as_ref(),
970 pipe_compute_cap::PIPE_COMPUTE_CAP_MAX_SUBGROUPS,
971 )
972 }
973
subgroups_supported(&self) -> bool974 pub fn subgroups_supported(&self) -> bool {
975 let subgroup_sizes = self.subgroup_sizes().len();
976
977 // we need to be able to query a CSO for subgroup sizes if multiple sub group sizes are
978 // supported, doing it without shareable shaders isn't practical
979 self.max_subgroups() > 0
980 && (subgroup_sizes == 1 || (subgroup_sizes > 1 && self.shareable_shaders()))
981 }
982
svm_supported(&self) -> bool983 pub fn svm_supported(&self) -> bool {
984 self.screen.param(pipe_cap::PIPE_CAP_SYSTEM_SVM) == 1
985 }
986
timer_resolution(&self) -> usize987 pub fn timer_resolution(&self) -> usize {
988 self.screen.param(pipe_cap::PIPE_CAP_TIMER_RESOLUTION) as usize
989 }
990
unified_memory(&self) -> bool991 pub fn unified_memory(&self) -> bool {
992 self.screen.param(pipe_cap::PIPE_CAP_UMA) == 1
993 }
994
vendor_id(&self) -> cl_uint995 pub fn vendor_id(&self) -> cl_uint {
996 let id = self.screen.param(pipe_cap::PIPE_CAP_VENDOR_ID);
997 if id == -1 {
998 return 0;
999 }
1000 id as u32
1001 }
1002
prefers_real_buffer_in_cb0(&self) -> bool1003 pub fn prefers_real_buffer_in_cb0(&self) -> bool {
1004 self.screen
1005 .param(pipe_cap::PIPE_CAP_PREFER_REAL_BUFFER_IN_CONSTBUF0)
1006 == 1
1007 }
1008
shareable_shaders(&self) -> bool1009 pub fn shareable_shaders(&self) -> bool {
1010 self.screen.param(pipe_cap::PIPE_CAP_SHAREABLE_SHADERS) == 1
1011 }
1012
images_as_deref(&self) -> bool1013 pub fn images_as_deref(&self) -> bool {
1014 self.screen.param(pipe_cap::PIPE_CAP_NIR_IMAGES_AS_DEREF) == 1
1015 }
1016
samplers_as_deref(&self) -> bool1017 pub fn samplers_as_deref(&self) -> bool {
1018 self.screen.param(pipe_cap::PIPE_CAP_NIR_SAMPLERS_AS_DEREF) == 1
1019 }
1020
helper_ctx(&self) -> impl HelperContextWrapper + '_1021 pub fn helper_ctx(&self) -> impl HelperContextWrapper + '_ {
1022 HelperContext {
1023 lock: self.helper_ctx.lock().unwrap(),
1024 }
1025 }
1026
cl_features(&self) -> clc_optional_features1027 pub fn cl_features(&self) -> clc_optional_features {
1028 let subgroups_supported = self.subgroups_supported();
1029 clc_optional_features {
1030 fp16: self.fp16_supported(),
1031 fp64: self.fp64_supported(),
1032 int64: self.int64_supported(),
1033 images: self.image_supported(),
1034 images_read_write: self.image_read_write_supported(),
1035 images_write_3d: self.image_3d_write_supported(),
1036 integer_dot_product: true,
1037 subgroups: subgroups_supported,
1038 subgroups_shuffle: subgroups_supported,
1039 subgroups_shuffle_relative: subgroups_supported,
1040 ..Default::default()
1041 }
1042 }
1043 }
1044
devs() -> &'static Vec<Device>1045 pub fn devs() -> &'static Vec<Device> {
1046 &Platform::get().devs
1047 }
1048
get_devs_for_type(device_type: cl_device_type) -> Vec<&'static Device>1049 pub fn get_devs_for_type(device_type: cl_device_type) -> Vec<&'static Device> {
1050 devs()
1051 .iter()
1052 .filter(|d| device_type & d.device_type(true) != 0)
1053 .collect()
1054 }
1055
get_dev_for_uuid(uuid: [c_char; UUID_SIZE]) -> Option<&'static Device>1056 pub fn get_dev_for_uuid(uuid: [c_char; UUID_SIZE]) -> Option<&'static Device> {
1057 devs().iter().find(|d| {
1058 let uuid: [c_uchar; UUID_SIZE] = unsafe { transmute(uuid) };
1059 uuid == d.screen().device_uuid().unwrap()
1060 })
1061 }
1062