• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright © 2024 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3 
4 extern crate nvidia_headers;
5 
6 use compiler::bindings::*;
7 use nak_bindings::*;
8 use nvidia_headers::classes::{cla0c0, clc0c0, clc3c0, clc6c0};
9 
10 use bitview::*;
11 use paste::paste;
12 
13 type QMDBitView<'a> = BitMutView<'a, [u32]>;
14 
15 trait QMD {
16     const GLOBAL_SIZE_LAYOUT: nak_qmd_dispatch_size_layout;
17 
new() -> Self18     fn new() -> Self;
set_barrier_count(&mut self, barrier_count: u8)19     fn set_barrier_count(&mut self, barrier_count: u8);
set_cbuf(&mut self, idx: u8, addr: u64, size: u32)20     fn set_cbuf(&mut self, idx: u8, addr: u64, size: u32);
cbuf_desc_layout(idx: u8) -> nak_qmd_cbuf_desc_layout21     fn cbuf_desc_layout(idx: u8) -> nak_qmd_cbuf_desc_layout;
set_global_size(&mut self, width: u32, height: u32, depth: u32)22     fn set_global_size(&mut self, width: u32, height: u32, depth: u32);
set_local_size(&mut self, width: u16, height: u16, depth: u16)23     fn set_local_size(&mut self, width: u16, height: u16, depth: u16);
set_prog_addr(&mut self, addr: u64)24     fn set_prog_addr(&mut self, addr: u64);
set_register_count(&mut self, register_count: u8)25     fn set_register_count(&mut self, register_count: u8);
set_crs_size(&mut self, crs_size: u32)26     fn set_crs_size(&mut self, crs_size: u32);
set_slm_size(&mut self, slm_size: u32)27     fn set_slm_size(&mut self, slm_size: u32);
set_smem_size(&mut self, smem_size: u32, smem_max: u32)28     fn set_smem_size(&mut self, smem_size: u32, smem_max: u32);
29 }
30 
31 macro_rules! set_enum {
32     ($bv:expr, $cls:ident, $strct:ident, $field:ident, $enum:ident) => {
33         $bv.set_field(
34             paste! {$cls::[<$strct _ $field>]},
35             paste! {$cls::[<$strct _ $field _ $enum>]},
36         )
37     };
38 }
39 
40 macro_rules! set_field {
41     ($bv:expr, $cls:ident, $strct:ident, $field:ident, $val:expr) => {
42         $bv.set_field(paste! {$cls::[<$strct _ $field>]}, $val)
43     };
44 }
45 
46 macro_rules! set_array {
47     ($bv:expr, $cls:ident, $strct:ident, $f:ident, $i:expr, $x:expr) => {
48         $bv.set_field(paste! {$cls::[<$strct _ $f>]}($i), $x)
49     };
50 }
51 
52 macro_rules! qmd_init {
53     ($bv: expr, $c:ident, $s:ident, $mjv:expr, $mnv:expr) => {
54         set_field!($bv, $c, $s, QMD_MAJOR_VERSION, $mjv);
55         set_field!($bv, $c, $s, QMD_VERSION, $mnv);
56 
57         set_enum!($bv, $c, $s, API_VISIBLE_CALL_LIMIT, NO_CHECK);
58         set_enum!($bv, $c, $s, SAMPLER_INDEX, INDEPENDENTLY);
59     };
60 }
61 
62 macro_rules! qmd_impl_common {
63     ($c:ident, $s:ident) => {
64         fn set_barrier_count(&mut self, barrier_count: u8) {
65             let mut bv = QMDBitView::new(&mut self.qmd);
66             set_field!(bv, $c, $s, BARRIER_COUNT, barrier_count);
67         }
68 
69         const GLOBAL_SIZE_LAYOUT: nak_qmd_dispatch_size_layout = {
70             let w = paste! {$c::[<$s _CTA_RASTER_WIDTH>]};
71             let h = paste! {$c::[<$s _CTA_RASTER_HEIGHT>]};
72             let d = paste! {$c::[<$s _CTA_RASTER_DEPTH>]};
73             nak_qmd_dispatch_size_layout {
74                 x_start: w.start as u16,
75                 x_end: w.end as u16,
76                 y_start: h.start as u16,
77                 y_end: h.end as u16,
78                 z_start: d.start as u16,
79                 z_end: d.end as u16,
80             }
81         };
82 
83         fn set_global_size(&mut self, width: u32, height: u32, depth: u32) {
84             let mut bv = QMDBitView::new(&mut self.qmd);
85             set_field!(bv, $c, $s, CTA_RASTER_WIDTH, width);
86             set_field!(bv, $c, $s, CTA_RASTER_HEIGHT, height);
87             set_field!(bv, $c, $s, CTA_RASTER_DEPTH, depth);
88         }
89 
90         fn set_local_size(&mut self, width: u16, height: u16, depth: u16) {
91             let mut bv = QMDBitView::new(&mut self.qmd);
92             set_field!(bv, $c, $s, CTA_THREAD_DIMENSION0, width);
93             set_field!(bv, $c, $s, CTA_THREAD_DIMENSION1, height);
94             set_field!(bv, $c, $s, CTA_THREAD_DIMENSION2, depth);
95         }
96 
97         fn set_slm_size(&mut self, slm_size: u32) {
98             let mut bv = QMDBitView::new(&mut self.qmd);
99             let slm_size = slm_size.next_multiple_of(0x10);
100             set_field!(bv, $c, $s, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0);
101             set_field!(bv, $c, $s, SHADER_LOCAL_MEMORY_LOW_SIZE, slm_size);
102         }
103     };
104 }
105 
106 macro_rules! qmd_impl_set_crs_size {
107     ($c:ident, $s:ident) => {
108         fn set_crs_size(&mut self, crs_size: u32) {
109             let mut bv = QMDBitView::new(&mut self.qmd);
110             let crs_size = crs_size.next_multiple_of(0x200);
111             set_field!(bv, $c, $s, SHADER_LOCAL_MEMORY_CRS_SIZE, crs_size);
112         }
113     };
114 }
115 
116 const SIZE_SHIFT: u8 = 0;
117 const SIZE_SHIFTED4_SHIFT: u8 = 4;
118 
119 macro_rules! qmd_impl_set_cbuf {
120     ($c:ident, $s:ident, $size_field:ident) => {
121         fn set_cbuf(&mut self, idx: u8, addr: u64, size: u32) {
122             let mut bv = QMDBitView::new(&mut self.qmd);
123             let idx = idx.into();
124 
125             let addr_lo = addr as u32;
126             let addr_hi = (addr >> 32) as u32;
127             set_array!(bv, $c, $s, CONSTANT_BUFFER_ADDR_LOWER, idx, addr_lo);
128             set_array!(bv, $c, $s, CONSTANT_BUFFER_ADDR_UPPER, idx, addr_hi);
129 
130             paste! {
131                 let shift = [<$size_field _SHIFT>];
132                 let range = $c::[<$s _CONSTANT_BUFFER_ $size_field>](idx);
133                 assert!(((size >> shift) << shift) == size);
134                 bv.set_field(range, size >> shift);
135             }
136 
137             set_array!(bv, $c, $s, CONSTANT_BUFFER_VALID, idx, true);
138         }
139 
140         fn cbuf_desc_layout(idx: u8) -> nak_qmd_cbuf_desc_layout {
141             let lo =
142                 paste! {$c::[<$s _CONSTANT_BUFFER_ADDR_LOWER>]}(idx.into());
143             let hi =
144                 paste! {$c::[<$s _CONSTANT_BUFFER_ADDR_UPPER>]}(idx.into());
145             nak_qmd_cbuf_desc_layout {
146                 addr_lo_start: lo.start as u16,
147                 addr_lo_end: lo.end as u16,
148                 addr_hi_start: hi.start as u16,
149                 addr_hi_end: hi.end as u16,
150             }
151         }
152     };
153 }
154 
155 macro_rules! qmd_impl_set_prog_addr_32 {
156     ($c:ident, $s:ident) => {
157         fn set_prog_addr(&mut self, addr: u64) {
158             let mut bv = QMDBitView::new(&mut self.qmd);
159             set_field!(bv, $c, $s, PROGRAM_OFFSET, addr);
160         }
161     };
162 }
163 
164 macro_rules! qmd_impl_set_prog_addr_64 {
165     ($c:ident, $s:ident) => {
166         fn set_prog_addr(&mut self, addr: u64) {
167             let mut bv = QMDBitView::new(&mut self.qmd);
168 
169             let addr_lo = addr as u32;
170             let addr_hi = (addr >> 32) as u32;
171             set_field!(bv, $c, $s, PROGRAM_ADDRESS_LOWER, addr_lo);
172             set_field!(bv, $c, $s, PROGRAM_ADDRESS_UPPER, addr_hi);
173         }
174     };
175 }
176 
177 macro_rules! qmd_impl_set_register_count {
178     ($c:ident, $s:ident, $field:ident) => {
179         fn set_register_count(&mut self, register_count: u8) {
180             let mut bv = QMDBitView::new(&mut self.qmd);
181             set_field!(bv, $c, $s, $field, register_count);
182         }
183     };
184 }
185 
186 mod qmd_0_6 {
187     use crate::qmd::*;
188     use nvidia_headers::classes::cla0c0::qmd as cla0c0;
189 
190     #[repr(transparent)]
191     pub struct Qmd0_6 {
192         qmd: [u32; 64],
193     }
194 
195     impl QMD for Qmd0_6 {
new() -> Self196         fn new() -> Self {
197             let mut qmd = [0; 64];
198             let mut bv = QMDBitView::new(&mut qmd);
199             qmd_init!(bv, cla0c0, QMDV00_06, 0, 6);
200             set_field!(bv, cla0c0, QMDV00_06, SASS_VERSION, 0x30);
201             Self { qmd }
202         }
203 
204         qmd_impl_common!(cla0c0, QMDV00_06);
205         qmd_impl_set_crs_size!(cla0c0, QMDV00_06);
206         qmd_impl_set_cbuf!(cla0c0, QMDV00_06, SIZE);
207         qmd_impl_set_prog_addr_32!(cla0c0, QMDV00_06);
208         qmd_impl_set_register_count!(cla0c0, QMDV00_06, REGISTER_COUNT);
209 
set_smem_size(&mut self, smem_size: u32, _smem_max: u32)210         fn set_smem_size(&mut self, smem_size: u32, _smem_max: u32) {
211             let mut bv = QMDBitView::new(&mut self.qmd);
212 
213             let smem_size = smem_size.next_multiple_of(0x100);
214             set_field!(bv, cla0c0, QMDV00_06, SHARED_MEMORY_SIZE, smem_size);
215 
216             let l1_config = if smem_size <= (16 << 10) {
217                 cla0c0::QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB
218             } else if smem_size <= (32 << 10) {
219                 cla0c0::QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB
220             } else if smem_size <= (48 << 10) {
221                 cla0c0::QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB
222             } else {
223                 panic!("Invalid shared memory size");
224             };
225             set_field!(bv, cla0c0, QMDV00_06, L1_CONFIGURATION, l1_config);
226         }
227     }
228 }
229 use qmd_0_6::Qmd0_6;
230 
231 mod qmd_2_1 {
232     use crate::qmd::*;
233     use nvidia_headers::classes::clc0c0::qmd as clc0c0;
234 
235     #[repr(transparent)]
236     pub struct Qmd2_1 {
237         qmd: [u32; 64],
238     }
239 
240     impl QMD for Qmd2_1 {
new() -> Self241         fn new() -> Self {
242             let mut qmd = [0; 64];
243             let mut bv = QMDBitView::new(&mut qmd);
244             qmd_init!(bv, clc0c0, QMDV02_01, 2, 1);
245             set_field!(bv, clc0c0, QMDV02_01, SM_GLOBAL_CACHING_ENABLE, true);
246             Self { qmd }
247         }
248 
249         qmd_impl_common!(clc0c0, QMDV02_01);
250         qmd_impl_set_crs_size!(clc0c0, QMDV02_01);
251         qmd_impl_set_cbuf!(clc0c0, QMDV02_01, SIZE_SHIFTED4);
252         qmd_impl_set_prog_addr_32!(clc0c0, QMDV02_01);
253         qmd_impl_set_register_count!(clc0c0, QMDV02_01, REGISTER_COUNT);
254 
set_smem_size(&mut self, smem_size: u32, _smem_max: u32)255         fn set_smem_size(&mut self, smem_size: u32, _smem_max: u32) {
256             let mut bv = QMDBitView::new(&mut self.qmd);
257 
258             let smem_size = smem_size.next_multiple_of(0x100);
259             set_field!(bv, clc0c0, QMDV02_01, SHARED_MEMORY_SIZE, smem_size);
260         }
261     }
262 }
263 use qmd_2_1::Qmd2_1;
264 
gv100_sm_config_smem_size(size: u32) -> u32265 fn gv100_sm_config_smem_size(size: u32) -> u32 {
266     let size = if size > 64 * 1024 {
267         96 * 1024
268     } else if size > 32 * 1024 {
269         64 * 1024
270     } else if size > 16 * 1024 {
271         32 * 1024
272     } else if size > 8 * 1024 {
273         16 * 1024
274     } else {
275         8 * 1024
276     };
277 
278     size / 4096 + 1
279 }
280 
281 macro_rules! qmd_impl_set_smem_size_bounded {
282     ($c:ident, $s:ident) => {
283         fn set_smem_size(&mut self, smem_size: u32, smem_max: u32) {
284             let mut bv = QMDBitView::new(&mut self.qmd);
285 
286             let smem_size = smem_size.next_multiple_of(0x100);
287             set_field!(bv, $c, $s, SHARED_MEMORY_SIZE, smem_size);
288 
289             let max = gv100_sm_config_smem_size(smem_max);
290             let min = gv100_sm_config_smem_size(smem_size.into());
291             let target = gv100_sm_config_smem_size(smem_size.into());
292             set_field!(bv, $c, $s, MIN_SM_CONFIG_SHARED_MEM_SIZE, min);
293             set_field!(bv, $c, $s, MAX_SM_CONFIG_SHARED_MEM_SIZE, max);
294             set_field!(bv, $c, $s, TARGET_SM_CONFIG_SHARED_MEM_SIZE, target);
295         }
296     };
297 }
298 
299 mod qmd_2_2 {
300     use crate::qmd::*;
301     use nvidia_headers::classes::clc3c0::qmd as clc3c0;
302 
303     #[repr(transparent)]
304     pub struct Qmd2_2 {
305         qmd: [u32; 64],
306     }
307 
308     impl QMD for Qmd2_2 {
new() -> Self309         fn new() -> Self {
310             let mut qmd = [0; 64];
311             let mut bv = QMDBitView::new(&mut qmd);
312             qmd_init!(bv, clc3c0, QMDV02_02, 2, 2);
313             set_field!(bv, clc3c0, QMDV02_02, SM_GLOBAL_CACHING_ENABLE, true);
314             Self { qmd }
315         }
316 
317         qmd_impl_common!(clc3c0, QMDV02_02);
318         qmd_impl_set_crs_size!(clc3c0, QMDV02_02);
319         qmd_impl_set_cbuf!(clc3c0, QMDV02_02, SIZE_SHIFTED4);
320         qmd_impl_set_prog_addr_64!(clc3c0, QMDV02_02);
321         qmd_impl_set_register_count!(clc3c0, QMDV02_02, REGISTER_COUNT_V);
322         qmd_impl_set_smem_size_bounded!(clc3c0, QMDV02_02);
323     }
324 }
325 use qmd_2_2::Qmd2_2;
326 
327 mod qmd_3_0 {
328     use crate::qmd::*;
329     use nvidia_headers::classes::clc6c0::qmd as clc6c0;
330 
331     #[repr(transparent)]
332     pub struct Qmd3_0 {
333         qmd: [u32; 64],
334     }
335 
336     impl QMD for Qmd3_0 {
new() -> Self337         fn new() -> Self {
338             let mut qmd = [0; 64];
339             let mut bv = QMDBitView::new(&mut qmd);
340             qmd_init!(bv, clc6c0, QMDV03_00, 3, 0);
341             set_field!(bv, clc6c0, QMDV03_00, SM_GLOBAL_CACHING_ENABLE, true);
342             Self { qmd }
343         }
344 
345         qmd_impl_common!(clc6c0, QMDV03_00);
346 
set_crs_size(&mut self, crs_size: u32)347         fn set_crs_size(&mut self, crs_size: u32) {
348             assert!(crs_size == 0);
349         }
350 
351         qmd_impl_set_cbuf!(clc6c0, QMDV03_00, SIZE_SHIFTED4);
352         qmd_impl_set_prog_addr_64!(clc6c0, QMDV03_00);
353         qmd_impl_set_register_count!(clc6c0, QMDV03_00, REGISTER_COUNT_V);
354         qmd_impl_set_smem_size_bounded!(clc6c0, QMDV03_00);
355     }
356 }
357 use qmd_3_0::Qmd3_0;
358 
fill_qmd<Q: QMD>(info: &nak_shader_info, qmd_info: &nak_qmd_info) -> Q359 fn fill_qmd<Q: QMD>(info: &nak_shader_info, qmd_info: &nak_qmd_info) -> Q {
360     let cs_info = unsafe {
361         assert!(info.stage == MESA_SHADER_COMPUTE);
362         &info.__bindgen_anon_1.cs
363     };
364 
365     let mut qmd = Q::new();
366 
367     qmd.set_barrier_count(info.num_control_barriers);
368     qmd.set_global_size(
369         qmd_info.global_size[0],
370         qmd_info.global_size[1],
371         qmd_info.global_size[2],
372     );
373     qmd.set_local_size(
374         cs_info.local_size[0],
375         cs_info.local_size[1],
376         cs_info.local_size[2],
377     );
378     qmd.set_prog_addr(qmd_info.addr);
379     qmd.set_register_count(info.num_gprs);
380     qmd.set_crs_size(info.crs_size);
381     qmd.set_slm_size(info.slm_size);
382 
383     assert!(qmd_info.smem_size >= cs_info.smem_size);
384     assert!(qmd_info.smem_size <= qmd_info.smem_max);
385     qmd.set_smem_size(qmd_info.smem_size.into(), qmd_info.smem_max.into());
386 
387     for i in 0..qmd_info.num_cbufs {
388         let cb = &qmd_info.cbufs[usize::try_from(i).unwrap()];
389         if cb.size > 0 {
390             qmd.set_cbuf(cb.index.try_into().unwrap(), cb.addr, cb.size);
391         }
392     }
393 
394     qmd
395 }
396 
397 #[no_mangle]
nak_fill_qmd( dev: *const nv_device_info, info: *const nak_shader_info, qmd_info: *const nak_qmd_info, qmd_out: *mut ::std::os::raw::c_void, qmd_size: usize, )398 pub extern "C" fn nak_fill_qmd(
399     dev: *const nv_device_info,
400     info: *const nak_shader_info,
401     qmd_info: *const nak_qmd_info,
402     qmd_out: *mut ::std::os::raw::c_void,
403     qmd_size: usize,
404 ) {
405     assert!(!dev.is_null());
406     let dev = unsafe { &*dev };
407 
408     assert!(!info.is_null());
409     let info = unsafe { &*info };
410 
411     assert!(!qmd_info.is_null());
412     let qmd_info = unsafe { &*qmd_info };
413 
414     unsafe {
415         if dev.cls_compute >= clc6c0::AMPERE_COMPUTE_A {
416             let qmd_out = qmd_out as *mut Qmd3_0;
417             assert!(qmd_size == std::mem::size_of_val(&*qmd_out));
418             qmd_out.write(fill_qmd(info, qmd_info));
419         } else if dev.cls_compute >= clc3c0::VOLTA_COMPUTE_A {
420             let qmd_out = qmd_out as *mut Qmd2_2;
421             assert!(qmd_size == std::mem::size_of_val(&*qmd_out));
422             qmd_out.write(fill_qmd(info, qmd_info));
423         } else if dev.cls_compute >= clc0c0::PASCAL_COMPUTE_A {
424             let qmd_out = qmd_out as *mut Qmd2_1;
425             assert!(qmd_size == std::mem::size_of_val(&*qmd_out));
426             qmd_out.write(fill_qmd(info, qmd_info));
427         } else if dev.cls_compute >= cla0c0::KEPLER_COMPUTE_A {
428             let qmd_out = qmd_out as *mut Qmd0_6;
429             assert!(qmd_size == std::mem::size_of_val(&*qmd_out));
430             qmd_out.write(fill_qmd(info, qmd_info));
431         } else {
432             panic!("Unknown shader model");
433         }
434     }
435 }
436 
437 #[no_mangle]
nak_get_qmd_dispatch_size_layout( dev: &nv_device_info, ) -> nak_qmd_dispatch_size_layout438 pub extern "C" fn nak_get_qmd_dispatch_size_layout(
439     dev: &nv_device_info,
440 ) -> nak_qmd_dispatch_size_layout {
441     if dev.cls_compute >= clc6c0::AMPERE_COMPUTE_A {
442         Qmd3_0::GLOBAL_SIZE_LAYOUT.try_into().unwrap()
443     } else if dev.cls_compute >= clc3c0::VOLTA_COMPUTE_A {
444         Qmd2_2::GLOBAL_SIZE_LAYOUT.try_into().unwrap()
445     } else if dev.cls_compute >= clc0c0::PASCAL_COMPUTE_A {
446         Qmd2_1::GLOBAL_SIZE_LAYOUT.try_into().unwrap()
447     } else if dev.cls_compute >= cla0c0::KEPLER_COMPUTE_A {
448         Qmd0_6::GLOBAL_SIZE_LAYOUT.try_into().unwrap()
449     } else {
450         panic!("Unsupported shader model");
451     }
452 }
453 
454 #[no_mangle]
nak_get_qmd_cbuf_desc_layout( dev: &nv_device_info, idx: u8, ) -> nak_qmd_cbuf_desc_layout455 pub extern "C" fn nak_get_qmd_cbuf_desc_layout(
456     dev: &nv_device_info,
457     idx: u8,
458 ) -> nak_qmd_cbuf_desc_layout {
459     if dev.cls_compute >= clc6c0::AMPERE_COMPUTE_A {
460         Qmd3_0::cbuf_desc_layout(idx.into())
461     } else if dev.cls_compute >= clc3c0::VOLTA_COMPUTE_A {
462         Qmd2_2::cbuf_desc_layout(idx.into())
463     } else if dev.cls_compute >= clc0c0::PASCAL_COMPUTE_A {
464         Qmd2_1::cbuf_desc_layout(idx.into())
465     } else if dev.cls_compute >= cla0c0::KEPLER_COMPUTE_A {
466         Qmd0_6::cbuf_desc_layout(idx.into())
467     } else {
468         panic!("Unsupported shader model");
469     }
470 }
471