1 // Copyright © 2024 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3
4 extern crate nvidia_headers;
5
6 use compiler::bindings::*;
7 use nak_bindings::*;
8 use nvidia_headers::classes::{cla0c0, clc0c0, clc3c0, clc6c0};
9
10 use bitview::*;
11 use paste::paste;
12
13 type QMDBitView<'a> = BitMutView<'a, [u32]>;
14
15 trait QMD {
16 const GLOBAL_SIZE_LAYOUT: nak_qmd_dispatch_size_layout;
17
new() -> Self18 fn new() -> Self;
set_barrier_count(&mut self, barrier_count: u8)19 fn set_barrier_count(&mut self, barrier_count: u8);
set_cbuf(&mut self, idx: u8, addr: u64, size: u32)20 fn set_cbuf(&mut self, idx: u8, addr: u64, size: u32);
cbuf_desc_layout(idx: u8) -> nak_qmd_cbuf_desc_layout21 fn cbuf_desc_layout(idx: u8) -> nak_qmd_cbuf_desc_layout;
set_global_size(&mut self, width: u32, height: u32, depth: u32)22 fn set_global_size(&mut self, width: u32, height: u32, depth: u32);
set_local_size(&mut self, width: u16, height: u16, depth: u16)23 fn set_local_size(&mut self, width: u16, height: u16, depth: u16);
set_prog_addr(&mut self, addr: u64)24 fn set_prog_addr(&mut self, addr: u64);
set_register_count(&mut self, register_count: u8)25 fn set_register_count(&mut self, register_count: u8);
set_crs_size(&mut self, crs_size: u32)26 fn set_crs_size(&mut self, crs_size: u32);
set_slm_size(&mut self, slm_size: u32)27 fn set_slm_size(&mut self, slm_size: u32);
set_smem_size(&mut self, smem_size: u32, smem_max: u32)28 fn set_smem_size(&mut self, smem_size: u32, smem_max: u32);
29 }
30
31 macro_rules! set_enum {
32 ($bv:expr, $cls:ident, $strct:ident, $field:ident, $enum:ident) => {
33 $bv.set_field(
34 paste! {$cls::[<$strct _ $field>]},
35 paste! {$cls::[<$strct _ $field _ $enum>]},
36 )
37 };
38 }
39
40 macro_rules! set_field {
41 ($bv:expr, $cls:ident, $strct:ident, $field:ident, $val:expr) => {
42 $bv.set_field(paste! {$cls::[<$strct _ $field>]}, $val)
43 };
44 }
45
46 macro_rules! set_array {
47 ($bv:expr, $cls:ident, $strct:ident, $f:ident, $i:expr, $x:expr) => {
48 $bv.set_field(paste! {$cls::[<$strct _ $f>]}($i), $x)
49 };
50 }
51
52 macro_rules! qmd_init {
53 ($bv: expr, $c:ident, $s:ident, $mjv:expr, $mnv:expr) => {
54 set_field!($bv, $c, $s, QMD_MAJOR_VERSION, $mjv);
55 set_field!($bv, $c, $s, QMD_VERSION, $mnv);
56
57 set_enum!($bv, $c, $s, API_VISIBLE_CALL_LIMIT, NO_CHECK);
58 set_enum!($bv, $c, $s, SAMPLER_INDEX, INDEPENDENTLY);
59 };
60 }
61
62 macro_rules! qmd_impl_common {
63 ($c:ident, $s:ident) => {
64 fn set_barrier_count(&mut self, barrier_count: u8) {
65 let mut bv = QMDBitView::new(&mut self.qmd);
66 set_field!(bv, $c, $s, BARRIER_COUNT, barrier_count);
67 }
68
69 const GLOBAL_SIZE_LAYOUT: nak_qmd_dispatch_size_layout = {
70 let w = paste! {$c::[<$s _CTA_RASTER_WIDTH>]};
71 let h = paste! {$c::[<$s _CTA_RASTER_HEIGHT>]};
72 let d = paste! {$c::[<$s _CTA_RASTER_DEPTH>]};
73 nak_qmd_dispatch_size_layout {
74 x_start: w.start as u16,
75 x_end: w.end as u16,
76 y_start: h.start as u16,
77 y_end: h.end as u16,
78 z_start: d.start as u16,
79 z_end: d.end as u16,
80 }
81 };
82
83 fn set_global_size(&mut self, width: u32, height: u32, depth: u32) {
84 let mut bv = QMDBitView::new(&mut self.qmd);
85 set_field!(bv, $c, $s, CTA_RASTER_WIDTH, width);
86 set_field!(bv, $c, $s, CTA_RASTER_HEIGHT, height);
87 set_field!(bv, $c, $s, CTA_RASTER_DEPTH, depth);
88 }
89
90 fn set_local_size(&mut self, width: u16, height: u16, depth: u16) {
91 let mut bv = QMDBitView::new(&mut self.qmd);
92 set_field!(bv, $c, $s, CTA_THREAD_DIMENSION0, width);
93 set_field!(bv, $c, $s, CTA_THREAD_DIMENSION1, height);
94 set_field!(bv, $c, $s, CTA_THREAD_DIMENSION2, depth);
95 }
96
97 fn set_slm_size(&mut self, slm_size: u32) {
98 let mut bv = QMDBitView::new(&mut self.qmd);
99 let slm_size = slm_size.next_multiple_of(0x10);
100 set_field!(bv, $c, $s, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0);
101 set_field!(bv, $c, $s, SHADER_LOCAL_MEMORY_LOW_SIZE, slm_size);
102 }
103 };
104 }
105
106 macro_rules! qmd_impl_set_crs_size {
107 ($c:ident, $s:ident) => {
108 fn set_crs_size(&mut self, crs_size: u32) {
109 let mut bv = QMDBitView::new(&mut self.qmd);
110 let crs_size = crs_size.next_multiple_of(0x200);
111 set_field!(bv, $c, $s, SHADER_LOCAL_MEMORY_CRS_SIZE, crs_size);
112 }
113 };
114 }
115
116 const SIZE_SHIFT: u8 = 0;
117 const SIZE_SHIFTED4_SHIFT: u8 = 4;
118
119 macro_rules! qmd_impl_set_cbuf {
120 ($c:ident, $s:ident, $size_field:ident) => {
121 fn set_cbuf(&mut self, idx: u8, addr: u64, size: u32) {
122 let mut bv = QMDBitView::new(&mut self.qmd);
123 let idx = idx.into();
124
125 let addr_lo = addr as u32;
126 let addr_hi = (addr >> 32) as u32;
127 set_array!(bv, $c, $s, CONSTANT_BUFFER_ADDR_LOWER, idx, addr_lo);
128 set_array!(bv, $c, $s, CONSTANT_BUFFER_ADDR_UPPER, idx, addr_hi);
129
130 paste! {
131 let shift = [<$size_field _SHIFT>];
132 let range = $c::[<$s _CONSTANT_BUFFER_ $size_field>](idx);
133 assert!(((size >> shift) << shift) == size);
134 bv.set_field(range, size >> shift);
135 }
136
137 set_array!(bv, $c, $s, CONSTANT_BUFFER_VALID, idx, true);
138 }
139
140 fn cbuf_desc_layout(idx: u8) -> nak_qmd_cbuf_desc_layout {
141 let lo =
142 paste! {$c::[<$s _CONSTANT_BUFFER_ADDR_LOWER>]}(idx.into());
143 let hi =
144 paste! {$c::[<$s _CONSTANT_BUFFER_ADDR_UPPER>]}(idx.into());
145 nak_qmd_cbuf_desc_layout {
146 addr_lo_start: lo.start as u16,
147 addr_lo_end: lo.end as u16,
148 addr_hi_start: hi.start as u16,
149 addr_hi_end: hi.end as u16,
150 }
151 }
152 };
153 }
154
155 macro_rules! qmd_impl_set_prog_addr_32 {
156 ($c:ident, $s:ident) => {
157 fn set_prog_addr(&mut self, addr: u64) {
158 let mut bv = QMDBitView::new(&mut self.qmd);
159 set_field!(bv, $c, $s, PROGRAM_OFFSET, addr);
160 }
161 };
162 }
163
164 macro_rules! qmd_impl_set_prog_addr_64 {
165 ($c:ident, $s:ident) => {
166 fn set_prog_addr(&mut self, addr: u64) {
167 let mut bv = QMDBitView::new(&mut self.qmd);
168
169 let addr_lo = addr as u32;
170 let addr_hi = (addr >> 32) as u32;
171 set_field!(bv, $c, $s, PROGRAM_ADDRESS_LOWER, addr_lo);
172 set_field!(bv, $c, $s, PROGRAM_ADDRESS_UPPER, addr_hi);
173 }
174 };
175 }
176
177 macro_rules! qmd_impl_set_register_count {
178 ($c:ident, $s:ident, $field:ident) => {
179 fn set_register_count(&mut self, register_count: u8) {
180 let mut bv = QMDBitView::new(&mut self.qmd);
181 set_field!(bv, $c, $s, $field, register_count);
182 }
183 };
184 }
185
186 mod qmd_0_6 {
187 use crate::qmd::*;
188 use nvidia_headers::classes::cla0c0::qmd as cla0c0;
189
190 #[repr(transparent)]
191 pub struct Qmd0_6 {
192 qmd: [u32; 64],
193 }
194
195 impl QMD for Qmd0_6 {
new() -> Self196 fn new() -> Self {
197 let mut qmd = [0; 64];
198 let mut bv = QMDBitView::new(&mut qmd);
199 qmd_init!(bv, cla0c0, QMDV00_06, 0, 6);
200 set_field!(bv, cla0c0, QMDV00_06, SASS_VERSION, 0x30);
201 Self { qmd }
202 }
203
204 qmd_impl_common!(cla0c0, QMDV00_06);
205 qmd_impl_set_crs_size!(cla0c0, QMDV00_06);
206 qmd_impl_set_cbuf!(cla0c0, QMDV00_06, SIZE);
207 qmd_impl_set_prog_addr_32!(cla0c0, QMDV00_06);
208 qmd_impl_set_register_count!(cla0c0, QMDV00_06, REGISTER_COUNT);
209
set_smem_size(&mut self, smem_size: u32, _smem_max: u32)210 fn set_smem_size(&mut self, smem_size: u32, _smem_max: u32) {
211 let mut bv = QMDBitView::new(&mut self.qmd);
212
213 let smem_size = smem_size.next_multiple_of(0x100);
214 set_field!(bv, cla0c0, QMDV00_06, SHARED_MEMORY_SIZE, smem_size);
215
216 let l1_config = if smem_size <= (16 << 10) {
217 cla0c0::QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB
218 } else if smem_size <= (32 << 10) {
219 cla0c0::QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB
220 } else if smem_size <= (48 << 10) {
221 cla0c0::QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB
222 } else {
223 panic!("Invalid shared memory size");
224 };
225 set_field!(bv, cla0c0, QMDV00_06, L1_CONFIGURATION, l1_config);
226 }
227 }
228 }
229 use qmd_0_6::Qmd0_6;
230
231 mod qmd_2_1 {
232 use crate::qmd::*;
233 use nvidia_headers::classes::clc0c0::qmd as clc0c0;
234
235 #[repr(transparent)]
236 pub struct Qmd2_1 {
237 qmd: [u32; 64],
238 }
239
240 impl QMD for Qmd2_1 {
new() -> Self241 fn new() -> Self {
242 let mut qmd = [0; 64];
243 let mut bv = QMDBitView::new(&mut qmd);
244 qmd_init!(bv, clc0c0, QMDV02_01, 2, 1);
245 set_field!(bv, clc0c0, QMDV02_01, SM_GLOBAL_CACHING_ENABLE, true);
246 Self { qmd }
247 }
248
249 qmd_impl_common!(clc0c0, QMDV02_01);
250 qmd_impl_set_crs_size!(clc0c0, QMDV02_01);
251 qmd_impl_set_cbuf!(clc0c0, QMDV02_01, SIZE_SHIFTED4);
252 qmd_impl_set_prog_addr_32!(clc0c0, QMDV02_01);
253 qmd_impl_set_register_count!(clc0c0, QMDV02_01, REGISTER_COUNT);
254
set_smem_size(&mut self, smem_size: u32, _smem_max: u32)255 fn set_smem_size(&mut self, smem_size: u32, _smem_max: u32) {
256 let mut bv = QMDBitView::new(&mut self.qmd);
257
258 let smem_size = smem_size.next_multiple_of(0x100);
259 set_field!(bv, clc0c0, QMDV02_01, SHARED_MEMORY_SIZE, smem_size);
260 }
261 }
262 }
263 use qmd_2_1::Qmd2_1;
264
gv100_sm_config_smem_size(size: u32) -> u32265 fn gv100_sm_config_smem_size(size: u32) -> u32 {
266 let size = if size > 64 * 1024 {
267 96 * 1024
268 } else if size > 32 * 1024 {
269 64 * 1024
270 } else if size > 16 * 1024 {
271 32 * 1024
272 } else if size > 8 * 1024 {
273 16 * 1024
274 } else {
275 8 * 1024
276 };
277
278 size / 4096 + 1
279 }
280
281 macro_rules! qmd_impl_set_smem_size_bounded {
282 ($c:ident, $s:ident) => {
283 fn set_smem_size(&mut self, smem_size: u32, smem_max: u32) {
284 let mut bv = QMDBitView::new(&mut self.qmd);
285
286 let smem_size = smem_size.next_multiple_of(0x100);
287 set_field!(bv, $c, $s, SHARED_MEMORY_SIZE, smem_size);
288
289 let max = gv100_sm_config_smem_size(smem_max);
290 let min = gv100_sm_config_smem_size(smem_size.into());
291 let target = gv100_sm_config_smem_size(smem_size.into());
292 set_field!(bv, $c, $s, MIN_SM_CONFIG_SHARED_MEM_SIZE, min);
293 set_field!(bv, $c, $s, MAX_SM_CONFIG_SHARED_MEM_SIZE, max);
294 set_field!(bv, $c, $s, TARGET_SM_CONFIG_SHARED_MEM_SIZE, target);
295 }
296 };
297 }
298
299 mod qmd_2_2 {
300 use crate::qmd::*;
301 use nvidia_headers::classes::clc3c0::qmd as clc3c0;
302
303 #[repr(transparent)]
304 pub struct Qmd2_2 {
305 qmd: [u32; 64],
306 }
307
308 impl QMD for Qmd2_2 {
new() -> Self309 fn new() -> Self {
310 let mut qmd = [0; 64];
311 let mut bv = QMDBitView::new(&mut qmd);
312 qmd_init!(bv, clc3c0, QMDV02_02, 2, 2);
313 set_field!(bv, clc3c0, QMDV02_02, SM_GLOBAL_CACHING_ENABLE, true);
314 Self { qmd }
315 }
316
317 qmd_impl_common!(clc3c0, QMDV02_02);
318 qmd_impl_set_crs_size!(clc3c0, QMDV02_02);
319 qmd_impl_set_cbuf!(clc3c0, QMDV02_02, SIZE_SHIFTED4);
320 qmd_impl_set_prog_addr_64!(clc3c0, QMDV02_02);
321 qmd_impl_set_register_count!(clc3c0, QMDV02_02, REGISTER_COUNT_V);
322 qmd_impl_set_smem_size_bounded!(clc3c0, QMDV02_02);
323 }
324 }
325 use qmd_2_2::Qmd2_2;
326
327 mod qmd_3_0 {
328 use crate::qmd::*;
329 use nvidia_headers::classes::clc6c0::qmd as clc6c0;
330
331 #[repr(transparent)]
332 pub struct Qmd3_0 {
333 qmd: [u32; 64],
334 }
335
336 impl QMD for Qmd3_0 {
new() -> Self337 fn new() -> Self {
338 let mut qmd = [0; 64];
339 let mut bv = QMDBitView::new(&mut qmd);
340 qmd_init!(bv, clc6c0, QMDV03_00, 3, 0);
341 set_field!(bv, clc6c0, QMDV03_00, SM_GLOBAL_CACHING_ENABLE, true);
342 Self { qmd }
343 }
344
345 qmd_impl_common!(clc6c0, QMDV03_00);
346
set_crs_size(&mut self, crs_size: u32)347 fn set_crs_size(&mut self, crs_size: u32) {
348 assert!(crs_size == 0);
349 }
350
351 qmd_impl_set_cbuf!(clc6c0, QMDV03_00, SIZE_SHIFTED4);
352 qmd_impl_set_prog_addr_64!(clc6c0, QMDV03_00);
353 qmd_impl_set_register_count!(clc6c0, QMDV03_00, REGISTER_COUNT_V);
354 qmd_impl_set_smem_size_bounded!(clc6c0, QMDV03_00);
355 }
356 }
357 use qmd_3_0::Qmd3_0;
358
fill_qmd<Q: QMD>(info: &nak_shader_info, qmd_info: &nak_qmd_info) -> Q359 fn fill_qmd<Q: QMD>(info: &nak_shader_info, qmd_info: &nak_qmd_info) -> Q {
360 let cs_info = unsafe {
361 assert!(info.stage == MESA_SHADER_COMPUTE);
362 &info.__bindgen_anon_1.cs
363 };
364
365 let mut qmd = Q::new();
366
367 qmd.set_barrier_count(info.num_control_barriers);
368 qmd.set_global_size(
369 qmd_info.global_size[0],
370 qmd_info.global_size[1],
371 qmd_info.global_size[2],
372 );
373 qmd.set_local_size(
374 cs_info.local_size[0],
375 cs_info.local_size[1],
376 cs_info.local_size[2],
377 );
378 qmd.set_prog_addr(qmd_info.addr);
379 qmd.set_register_count(info.num_gprs);
380 qmd.set_crs_size(info.crs_size);
381 qmd.set_slm_size(info.slm_size);
382
383 assert!(qmd_info.smem_size >= cs_info.smem_size);
384 assert!(qmd_info.smem_size <= qmd_info.smem_max);
385 qmd.set_smem_size(qmd_info.smem_size.into(), qmd_info.smem_max.into());
386
387 for i in 0..qmd_info.num_cbufs {
388 let cb = &qmd_info.cbufs[usize::try_from(i).unwrap()];
389 if cb.size > 0 {
390 qmd.set_cbuf(cb.index.try_into().unwrap(), cb.addr, cb.size);
391 }
392 }
393
394 qmd
395 }
396
397 #[no_mangle]
nak_fill_qmd( dev: *const nv_device_info, info: *const nak_shader_info, qmd_info: *const nak_qmd_info, qmd_out: *mut ::std::os::raw::c_void, qmd_size: usize, )398 pub extern "C" fn nak_fill_qmd(
399 dev: *const nv_device_info,
400 info: *const nak_shader_info,
401 qmd_info: *const nak_qmd_info,
402 qmd_out: *mut ::std::os::raw::c_void,
403 qmd_size: usize,
404 ) {
405 assert!(!dev.is_null());
406 let dev = unsafe { &*dev };
407
408 assert!(!info.is_null());
409 let info = unsafe { &*info };
410
411 assert!(!qmd_info.is_null());
412 let qmd_info = unsafe { &*qmd_info };
413
414 unsafe {
415 if dev.cls_compute >= clc6c0::AMPERE_COMPUTE_A {
416 let qmd_out = qmd_out as *mut Qmd3_0;
417 assert!(qmd_size == std::mem::size_of_val(&*qmd_out));
418 qmd_out.write(fill_qmd(info, qmd_info));
419 } else if dev.cls_compute >= clc3c0::VOLTA_COMPUTE_A {
420 let qmd_out = qmd_out as *mut Qmd2_2;
421 assert!(qmd_size == std::mem::size_of_val(&*qmd_out));
422 qmd_out.write(fill_qmd(info, qmd_info));
423 } else if dev.cls_compute >= clc0c0::PASCAL_COMPUTE_A {
424 let qmd_out = qmd_out as *mut Qmd2_1;
425 assert!(qmd_size == std::mem::size_of_val(&*qmd_out));
426 qmd_out.write(fill_qmd(info, qmd_info));
427 } else if dev.cls_compute >= cla0c0::KEPLER_COMPUTE_A {
428 let qmd_out = qmd_out as *mut Qmd0_6;
429 assert!(qmd_size == std::mem::size_of_val(&*qmd_out));
430 qmd_out.write(fill_qmd(info, qmd_info));
431 } else {
432 panic!("Unknown shader model");
433 }
434 }
435 }
436
437 #[no_mangle]
nak_get_qmd_dispatch_size_layout( dev: &nv_device_info, ) -> nak_qmd_dispatch_size_layout438 pub extern "C" fn nak_get_qmd_dispatch_size_layout(
439 dev: &nv_device_info,
440 ) -> nak_qmd_dispatch_size_layout {
441 if dev.cls_compute >= clc6c0::AMPERE_COMPUTE_A {
442 Qmd3_0::GLOBAL_SIZE_LAYOUT.try_into().unwrap()
443 } else if dev.cls_compute >= clc3c0::VOLTA_COMPUTE_A {
444 Qmd2_2::GLOBAL_SIZE_LAYOUT.try_into().unwrap()
445 } else if dev.cls_compute >= clc0c0::PASCAL_COMPUTE_A {
446 Qmd2_1::GLOBAL_SIZE_LAYOUT.try_into().unwrap()
447 } else if dev.cls_compute >= cla0c0::KEPLER_COMPUTE_A {
448 Qmd0_6::GLOBAL_SIZE_LAYOUT.try_into().unwrap()
449 } else {
450 panic!("Unsupported shader model");
451 }
452 }
453
454 #[no_mangle]
nak_get_qmd_cbuf_desc_layout( dev: &nv_device_info, idx: u8, ) -> nak_qmd_cbuf_desc_layout455 pub extern "C" fn nak_get_qmd_cbuf_desc_layout(
456 dev: &nv_device_info,
457 idx: u8,
458 ) -> nak_qmd_cbuf_desc_layout {
459 if dev.cls_compute >= clc6c0::AMPERE_COMPUTE_A {
460 Qmd3_0::cbuf_desc_layout(idx.into())
461 } else if dev.cls_compute >= clc3c0::VOLTA_COMPUTE_A {
462 Qmd2_2::cbuf_desc_layout(idx.into())
463 } else if dev.cls_compute >= clc0c0::PASCAL_COMPUTE_A {
464 Qmd2_1::cbuf_desc_layout(idx.into())
465 } else if dev.cls_compute >= cla0c0::KEPLER_COMPUTE_A {
466 Qmd0_6::cbuf_desc_layout(idx.into())
467 } else {
468 panic!("Unsupported shader model");
469 }
470 }
471