1 // Copyright © 2023 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3
4 extern crate bitview;
5
6 use crate::ir::{ShaderInfo, ShaderIoInfo, ShaderStageInfo};
7 use bitview::{
8 BitMutView, BitMutViewable, BitView, BitViewable, SetBit, SetField,
9 SetFieldU64,
10 };
11 use nak_bindings::*;
12 use std::ops::Range;
13
14 pub const _FERMI_SHADER_HEADER_SIZE: usize = 20;
15 pub const TURING_SHADER_HEADER_SIZE: usize = 32;
16 pub const CURRENT_MAX_SHADER_HEADER_SIZE: usize = TURING_SHADER_HEADER_SIZE;
17
18 type SubSPHView<'a> = BitMutView<'a, [u32; CURRENT_MAX_SHADER_HEADER_SIZE]>;
19
20 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
21 pub enum ShaderType {
22 Vertex,
23 TessellationInit,
24 Tessellation,
25 Geometry,
26 Fragment,
27 }
28
29 impl From<&ShaderStageInfo> for ShaderType {
from(value: &ShaderStageInfo) -> Self30 fn from(value: &ShaderStageInfo) -> Self {
31 match value {
32 ShaderStageInfo::Vertex => ShaderType::Vertex,
33 ShaderStageInfo::Fragment => ShaderType::Fragment,
34 ShaderStageInfo::Geometry(_) => ShaderType::Geometry,
35 ShaderStageInfo::TessellationInit(_) => {
36 ShaderType::TessellationInit
37 }
38 ShaderStageInfo::Tessellation => ShaderType::Tessellation,
39 _ => panic!("Invalid ShaderStageInfo {:?}", value),
40 }
41 }
42 }
43
44 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
45 pub enum OutputTopology {
46 PointList,
47 LineStrip,
48 TriangleStrip,
49 }
50
51 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
52 pub enum PixelImap {
53 Unused,
54 Constant,
55 Perspective,
56 ScreenLinear,
57 }
58
59 impl From<PixelImap> for u8 {
from(value: PixelImap) -> u860 fn from(value: PixelImap) -> u8 {
61 match value {
62 PixelImap::Unused => 0,
63 PixelImap::Constant => 1,
64 PixelImap::Perspective => 2,
65 PixelImap::ScreenLinear => 3,
66 }
67 }
68 }
69
70 #[derive(Debug)]
71 pub struct ShaderProgramHeader {
72 pub data: [u32; CURRENT_MAX_SHADER_HEADER_SIZE],
73 shader_type: ShaderType,
74 sm: u8,
75 }
76
77 impl BitViewable for ShaderProgramHeader {
bits(&self) -> usize78 fn bits(&self) -> usize {
79 BitView::new(&self.data).bits()
80 }
81
get_bit_range_u64(&self, range: Range<usize>) -> u6482 fn get_bit_range_u64(&self, range: Range<usize>) -> u64 {
83 BitView::new(&self.data).get_bit_range_u64(range)
84 }
85 }
86
87 impl BitMutViewable for ShaderProgramHeader {
set_bit_range_u64(&mut self, range: Range<usize>, val: u64)88 fn set_bit_range_u64(&mut self, range: Range<usize>, val: u64) {
89 BitMutView::new(&mut self.data).set_bit_range_u64(range, val);
90 }
91 }
92
93 impl SetFieldU64 for ShaderProgramHeader {
set_field_u64(&mut self, range: Range<usize>, val: u64)94 fn set_field_u64(&mut self, range: Range<usize>, val: u64) {
95 BitMutView::new(&mut self.data).set_field_u64(range, val);
96 }
97 }
98
99 impl ShaderProgramHeader {
new(shader_type: ShaderType, sm: u8) -> Self100 pub fn new(shader_type: ShaderType, sm: u8) -> Self {
101 let mut res = Self {
102 data: [0; CURRENT_MAX_SHADER_HEADER_SIZE],
103 shader_type,
104 sm,
105 };
106
107 let sph_type = if shader_type == ShaderType::Fragment {
108 2
109 } else {
110 1
111 };
112
113 let sph_version = 3;
114 res.set_sph_type(sph_type, sph_version);
115 res.set_shader_type(shader_type);
116
117 res
118 }
119
120 #[inline]
common_word0(&mut self) -> SubSPHView<'_>121 fn common_word0(&mut self) -> SubSPHView<'_> {
122 BitMutView::new_subset(&mut self.data, 0..32)
123 }
124
125 #[inline]
common_word1(&mut self) -> SubSPHView<'_>126 fn common_word1(&mut self) -> SubSPHView<'_> {
127 BitMutView::new_subset(&mut self.data, 32..64)
128 }
129
130 #[inline]
common_word2(&mut self) -> SubSPHView<'_>131 fn common_word2(&mut self) -> SubSPHView<'_> {
132 BitMutView::new_subset(&mut self.data, 64..96)
133 }
134
135 #[inline]
common_word3(&mut self) -> SubSPHView<'_>136 fn common_word3(&mut self) -> SubSPHView<'_> {
137 BitMutView::new_subset(&mut self.data, 96..128)
138 }
139
140 #[inline]
common_word4(&mut self) -> SubSPHView<'_>141 fn common_word4(&mut self) -> SubSPHView<'_> {
142 BitMutView::new_subset(&mut self.data, 128..160)
143 }
144
145 #[inline]
imap_system_values_ab(&mut self) -> SubSPHView<'_>146 fn imap_system_values_ab(&mut self) -> SubSPHView<'_> {
147 BitMutView::new_subset(&mut self.data, 160..192)
148 }
149
150 #[inline]
imap_g_vtg(&mut self) -> SubSPHView<'_>151 fn imap_g_vtg(&mut self) -> SubSPHView<'_> {
152 assert!(self.shader_type != ShaderType::Fragment);
153
154 BitMutView::new_subset(&mut self.data, 192..320)
155 }
156
157 #[inline]
imap_g_ps(&mut self) -> SubSPHView<'_>158 fn imap_g_ps(&mut self) -> SubSPHView<'_> {
159 assert!(self.shader_type == ShaderType::Fragment);
160
161 BitMutView::new_subset(&mut self.data, 192..448)
162 }
163
164 #[inline]
imap_system_values_c(&mut self) -> SubSPHView<'_>165 fn imap_system_values_c(&mut self) -> SubSPHView<'_> {
166 if self.shader_type == ShaderType::Fragment {
167 BitMutView::new_subset(&mut self.data, 464..480)
168 } else {
169 BitMutView::new_subset(&mut self.data, 336..352)
170 }
171 }
172
173 #[inline]
imap_system_values_d_vtg(&mut self) -> SubSPHView<'_>174 fn imap_system_values_d_vtg(&mut self) -> SubSPHView<'_> {
175 assert!(self.shader_type != ShaderType::Fragment);
176 BitMutView::new_subset(&mut self.data, 392..400)
177 }
178
179 #[inline]
omap_system_values_ab(&mut self) -> SubSPHView<'_>180 fn omap_system_values_ab(&mut self) -> SubSPHView<'_> {
181 assert!(self.shader_type != ShaderType::Fragment);
182 BitMutView::new_subset(&mut self.data, 400..432)
183 }
184
185 #[inline]
omap_g(&mut self) -> SubSPHView<'_>186 fn omap_g(&mut self) -> SubSPHView<'_> {
187 assert!(self.shader_type != ShaderType::Fragment);
188
189 BitMutView::new_subset(&mut self.data, 432..560)
190 }
191
192 #[inline]
omap_system_values_c(&mut self) -> SubSPHView<'_>193 fn omap_system_values_c(&mut self) -> SubSPHView<'_> {
194 assert!(self.shader_type != ShaderType::Fragment);
195 BitMutView::new_subset(&mut self.data, 576..592)
196 }
197
198 #[inline]
imap_system_values_d_ps(&mut self) -> SubSPHView<'_>199 fn imap_system_values_d_ps(&mut self) -> SubSPHView<'_> {
200 assert!(self.shader_type == ShaderType::Fragment);
201 BitMutView::new_subset(&mut self.data, 560..576)
202 }
203
204 #[inline]
omap_target(&mut self) -> SubSPHView<'_>205 fn omap_target(&mut self) -> SubSPHView<'_> {
206 assert!(self.shader_type == ShaderType::Fragment);
207
208 BitMutView::new_subset(&mut self.data, 576..608)
209 }
210
211 #[inline]
omap_system_values_d_vtg(&mut self) -> SubSPHView<'_>212 fn omap_system_values_d_vtg(&mut self) -> SubSPHView<'_> {
213 assert!(self.shader_type != ShaderType::Fragment);
214 BitMutView::new_subset(&mut self.data, 632..640)
215 }
216
217 #[inline]
set_sph_type(&mut self, sph_type: u8, sph_version: u8)218 fn set_sph_type(&mut self, sph_type: u8, sph_version: u8) {
219 let mut common_word0 = self.common_word0();
220
221 common_word0.set_field(0..5, sph_type);
222 common_word0.set_field(5..10, sph_version);
223 }
224
225 #[inline]
set_shader_type(&mut self, shader_type: ShaderType)226 fn set_shader_type(&mut self, shader_type: ShaderType) {
227 self.common_word0().set_field(
228 10..14,
229 match shader_type {
230 ShaderType::Vertex => 1_u8,
231 ShaderType::TessellationInit => 2_u8,
232 ShaderType::Tessellation => 3_u8,
233 ShaderType::Geometry => 4_u8,
234 ShaderType::Fragment => 5_u8,
235 },
236 );
237 }
238
239 #[inline]
set_multiple_render_target_enable(&mut self, mrt_enable: bool)240 pub fn set_multiple_render_target_enable(&mut self, mrt_enable: bool) {
241 self.common_word0().set_bit(14, mrt_enable);
242 }
243
244 #[inline]
set_kills_pixels(&mut self, kills_pixels: bool)245 pub fn set_kills_pixels(&mut self, kills_pixels: bool) {
246 self.common_word0().set_bit(15, kills_pixels);
247 }
248
249 #[inline]
set_does_global_store(&mut self, does_global_store: bool)250 pub fn set_does_global_store(&mut self, does_global_store: bool) {
251 self.common_word0().set_bit(16, does_global_store);
252 }
253
254 #[inline]
set_sass_version(&mut self, sass_version: u8)255 pub fn set_sass_version(&mut self, sass_version: u8) {
256 self.common_word0().set_field(17..21, sass_version);
257 }
258
259 #[inline]
set_gs_passthrough_enable(&mut self, gs_passthrough_enable: bool)260 pub fn set_gs_passthrough_enable(&mut self, gs_passthrough_enable: bool) {
261 assert!(self.shader_type == ShaderType::Geometry);
262 self.common_word0().set_bit(24, gs_passthrough_enable);
263 }
264
265 #[inline]
set_does_load_or_store(&mut self, does_load_or_store: bool)266 pub fn set_does_load_or_store(&mut self, does_load_or_store: bool) {
267 self.common_word0().set_bit(26, does_load_or_store);
268 }
269
270 #[inline]
set_does_fp64(&mut self, does_fp64: bool)271 pub fn set_does_fp64(&mut self, does_fp64: bool) {
272 self.common_word0().set_bit(27, does_fp64);
273 }
274
275 #[inline]
set_stream_out_mask(&mut self, stream_out_mask: u8)276 pub fn set_stream_out_mask(&mut self, stream_out_mask: u8) {
277 self.common_word0().set_field(28..32, stream_out_mask);
278 }
279
280 #[inline]
set_shader_local_memory_size( &mut self, shader_local_memory_size: u64, )281 pub fn set_shader_local_memory_size(
282 &mut self,
283 shader_local_memory_size: u64,
284 ) {
285 assert!(shader_local_memory_size <= 0xffffffffffff);
286 assert!(shader_local_memory_size % 0x10 == 0);
287
288 let low = (shader_local_memory_size & 0xffffff) as u32;
289 let high = ((shader_local_memory_size >> 32) & 0xffffff) as u32;
290
291 self.common_word1().set_field(0..24, low);
292 self.common_word2().set_field(0..24, high);
293 }
294
295 #[inline]
set_per_patch_attribute_count( &mut self, per_patch_attribute_count: u8, )296 pub fn set_per_patch_attribute_count(
297 &mut self,
298 per_patch_attribute_count: u8,
299 ) {
300 assert!(self.shader_type == ShaderType::TessellationInit);
301
302 self.common_word1()
303 .set_field(24..32, per_patch_attribute_count);
304
305 // Maxwell changed that encoding.
306 if self.sm > 35 {
307 self.common_word3()
308 .set_field(28..32, per_patch_attribute_count & 0xf);
309 self.common_word4()
310 .set_field(20..24, per_patch_attribute_count >> 4);
311 }
312 }
313
314 #[inline]
set_threads_per_input_primitive( &mut self, threads_per_input_primitive: u8, )315 pub fn set_threads_per_input_primitive(
316 &mut self,
317 threads_per_input_primitive: u8,
318 ) {
319 self.common_word2()
320 .set_field(24..32, threads_per_input_primitive);
321 }
322
323 #[inline]
324 #[allow(dead_code)]
set_shader_local_memory_crs_size( &mut self, shader_local_memory_crs_size: u32, )325 pub fn set_shader_local_memory_crs_size(
326 &mut self,
327 shader_local_memory_crs_size: u32,
328 ) {
329 assert!(shader_local_memory_crs_size <= 0xffffff);
330 self.common_word3()
331 .set_field(0..24, shader_local_memory_crs_size);
332 }
333
334 #[inline]
set_output_topology(&mut self, output_topology: OutputTopology)335 pub fn set_output_topology(&mut self, output_topology: OutputTopology) {
336 self.common_word3().set_field(
337 24..28,
338 match output_topology {
339 OutputTopology::PointList => 1_u8,
340 OutputTopology::LineStrip => 6_u8,
341 OutputTopology::TriangleStrip => 7_u8,
342 },
343 );
344 }
345
346 #[inline]
set_max_output_vertex_count( &mut self, max_output_vertex_count: u16, )347 pub fn set_max_output_vertex_count(
348 &mut self,
349 max_output_vertex_count: u16,
350 ) {
351 assert!(max_output_vertex_count <= 0xfff);
352 self.common_word4()
353 .set_field(0..12, max_output_vertex_count);
354 }
355
356 #[inline]
set_store_req_start(&mut self, store_req_start: u8)357 pub fn set_store_req_start(&mut self, store_req_start: u8) {
358 self.common_word4().set_field(12..20, store_req_start);
359 }
360
361 #[inline]
set_store_req_end(&mut self, store_req_end: u8)362 pub fn set_store_req_end(&mut self, store_req_end: u8) {
363 self.common_word4().set_field(24..32, store_req_end);
364 }
365
set_imap_system_values_ab(&mut self, val: u32)366 pub fn set_imap_system_values_ab(&mut self, val: u32) {
367 self.imap_system_values_ab().set_field(0..32, val);
368 }
369
set_imap_system_values_c(&mut self, val: u16)370 pub fn set_imap_system_values_c(&mut self, val: u16) {
371 self.imap_system_values_c().set_field(0..16, val);
372 }
373
set_imap_system_values_d_vtg(&mut self, val: u8)374 pub fn set_imap_system_values_d_vtg(&mut self, val: u8) {
375 assert!(self.shader_type != ShaderType::Fragment);
376 self.imap_system_values_d_vtg().set_field(0..8, val);
377 }
378
379 #[inline]
set_imap_vector_ps(&mut self, index: usize, value: PixelImap)380 pub fn set_imap_vector_ps(&mut self, index: usize, value: PixelImap) {
381 assert!(index < 128);
382 assert!(self.shader_type == ShaderType::Fragment);
383
384 self.imap_g_ps()
385 .set_field(index * 2..(index + 1) * 2, u8::from(value));
386 }
387
388 #[inline]
set_imap_system_values_d_ps( &mut self, index: usize, value: PixelImap, )389 pub fn set_imap_system_values_d_ps(
390 &mut self,
391 index: usize,
392 value: PixelImap,
393 ) {
394 assert!(index < 8);
395 assert!(self.shader_type == ShaderType::Fragment);
396
397 self.imap_system_values_d_ps()
398 .set_field(index * 2..(index + 1) * 2, u8::from(value));
399 }
400
401 #[inline]
set_imap_vector_vtg(&mut self, index: usize, value: u32)402 pub fn set_imap_vector_vtg(&mut self, index: usize, value: u32) {
403 assert!(index < 4);
404 assert!(self.shader_type != ShaderType::Fragment);
405
406 self.imap_g_vtg()
407 .set_field(index * 32..(index + 1) * 32, value);
408 }
409
410 #[inline]
set_omap_system_values_ab(&mut self, val: u32)411 pub fn set_omap_system_values_ab(&mut self, val: u32) {
412 self.omap_system_values_ab().set_field(0..32, val);
413 }
414
415 #[inline]
set_omap_system_values_c(&mut self, val: u16)416 pub fn set_omap_system_values_c(&mut self, val: u16) {
417 self.omap_system_values_c().set_field(0..16, val);
418 }
419
set_omap_system_values_d_vtg(&mut self, val: u8)420 pub fn set_omap_system_values_d_vtg(&mut self, val: u8) {
421 assert!(self.shader_type != ShaderType::Fragment);
422 self.omap_system_values_d_vtg().set_field(0..8, val);
423 }
424
425 #[inline]
set_omap_vector(&mut self, index: usize, value: u32)426 pub fn set_omap_vector(&mut self, index: usize, value: u32) {
427 assert!(index < 4);
428 assert!(self.shader_type != ShaderType::Fragment);
429
430 self.omap_g().set_field(index * 32..(index + 1) * 32, value);
431 }
432
433 #[inline]
set_omap_targets(&mut self, value: u32)434 pub fn set_omap_targets(&mut self, value: u32) {
435 self.omap_target().set_field(0..32, value)
436 }
437
438 #[inline]
set_omap_sample_mask(&mut self, sample_mask: bool)439 pub fn set_omap_sample_mask(&mut self, sample_mask: bool) {
440 assert!(self.shader_type == ShaderType::Fragment);
441 self.set_bit(608, sample_mask);
442 }
443
444 #[inline]
set_omap_depth(&mut self, depth: bool)445 pub fn set_omap_depth(&mut self, depth: bool) {
446 assert!(self.shader_type == ShaderType::Fragment);
447 self.set_bit(609, depth);
448 }
449
450 #[inline]
set_does_interlock(&mut self, does_interlock: bool)451 pub fn set_does_interlock(&mut self, does_interlock: bool) {
452 assert!(self.shader_type == ShaderType::Fragment);
453 self.set_bit(610, does_interlock);
454 }
455
456 // TODO: This seems always set on fragment shaders, figure out what this is for.
457 #[inline]
set_unknown_bit611(&mut self, value: bool)458 pub fn set_unknown_bit611(&mut self, value: bool) {
459 assert!(self.shader_type == ShaderType::Fragment);
460 self.set_bit(611, value);
461 }
462
463 #[inline]
pervertex_imap_vector_ps(&mut self) -> SubSPHView<'_>464 fn pervertex_imap_vector_ps(&mut self) -> SubSPHView<'_> {
465 assert!(self.shader_type == ShaderType::Fragment);
466
467 BitMutView::new_subset(&mut self.data, 672..800)
468 }
469
470 #[inline]
set_pervertex_imap_vector(&mut self, index: usize, value: u32)471 pub fn set_pervertex_imap_vector(&mut self, index: usize, value: u32) {
472 assert!(index < 4);
473 assert!(self.shader_type == ShaderType::Fragment);
474
475 self.pervertex_imap_vector_ps()
476 .set_field(index * 32..(index + 1) * 32, value);
477 }
478 }
479
encode_header( shader_info: &ShaderInfo, fs_key: Option<&nak_fs_key>, ) -> [u32; CURRENT_MAX_SHADER_HEADER_SIZE]480 pub fn encode_header(
481 shader_info: &ShaderInfo,
482 fs_key: Option<&nak_fs_key>,
483 ) -> [u32; CURRENT_MAX_SHADER_HEADER_SIZE] {
484 if let ShaderStageInfo::Compute(_) = shader_info.stage {
485 return [0_u32; CURRENT_MAX_SHADER_HEADER_SIZE];
486 }
487
488 let mut sph = ShaderProgramHeader::new(
489 ShaderType::from(&shader_info.stage),
490 shader_info.sm,
491 );
492
493 sph.set_sass_version(1);
494 sph.set_does_load_or_store(shader_info.uses_global_mem);
495 sph.set_does_global_store(shader_info.writes_global_mem);
496 sph.set_does_fp64(shader_info.uses_fp64);
497
498 let slm_size = shader_info.slm_size.next_multiple_of(16);
499 sph.set_shader_local_memory_size(slm_size.into());
500
501 match &shader_info.io {
502 ShaderIoInfo::Vtg(io) => {
503 sph.set_imap_system_values_ab(io.sysvals_in.ab);
504 sph.set_imap_system_values_c(io.sysvals_in.c);
505 sph.set_imap_system_values_d_vtg(io.sysvals_in_d);
506
507 for (index, value) in io.attr_in.iter().enumerate() {
508 sph.set_imap_vector_vtg(index, *value);
509 }
510
511 for (index, value) in io.attr_out.iter().enumerate() {
512 sph.set_omap_vector(index, *value);
513 }
514
515 sph.set_store_req_start(io.store_req_start);
516 sph.set_store_req_end(io.store_req_end);
517
518 sph.set_omap_system_values_ab(io.sysvals_out.ab);
519 sph.set_omap_system_values_c(io.sysvals_out.c);
520 sph.set_omap_system_values_d_vtg(io.sysvals_out_d);
521 }
522 ShaderIoInfo::Fragment(io) => {
523 sph.set_imap_system_values_ab(io.sysvals_in.ab);
524 sph.set_imap_system_values_c(io.sysvals_in.c);
525
526 for (index, imap) in io.sysvals_in_d.iter().enumerate() {
527 sph.set_imap_system_values_d_ps(index, *imap);
528 }
529
530 for (index, imap) in io.attr_in.iter().enumerate() {
531 sph.set_imap_vector_ps(index, *imap);
532 }
533
534 let zs_self_dep = fs_key.map_or(false, |key| key.zs_self_dep);
535
536 // This isn't so much a "Do we write multiple render targets?" bit
537 // as a "Should color0 be broadcast to all render targets?" bit. In
538 // other words, it's the gl_FragCoord behavior, not gl_FragData.
539 //
540 // For now, we always set it to true because Vulkan requires
541 // explicit fragment output locations.
542 sph.set_multiple_render_target_enable(true);
543
544 sph.set_kills_pixels(io.uses_kill || zs_self_dep);
545 sph.set_omap_sample_mask(io.writes_sample_mask);
546 sph.set_omap_depth(io.writes_depth);
547 sph.set_omap_targets(io.writes_color);
548 sph.set_does_interlock(io.does_interlock);
549
550 for (index, value) in io.barycentric_attr_in.iter().enumerate() {
551 sph.set_pervertex_imap_vector(index, *value);
552 }
553 }
554 _ => {}
555 }
556
557 match &shader_info.stage {
558 ShaderStageInfo::Geometry(stage) => {
559 sph.set_gs_passthrough_enable(stage.passthrough_enable);
560 sph.set_stream_out_mask(stage.stream_out_mask);
561 sph.set_threads_per_input_primitive(
562 stage.threads_per_input_primitive,
563 );
564 sph.set_output_topology(stage.output_topology);
565 sph.set_max_output_vertex_count(stage.max_output_vertex_count);
566 }
567 ShaderStageInfo::TessellationInit(stage) => {
568 sph.set_per_patch_attribute_count(stage.per_patch_attribute_count);
569 sph.set_threads_per_input_primitive(stage.threads_per_patch);
570 }
571 ShaderStageInfo::Compute(_) => {
572 panic!("Compute shaders don't have a SPH!")
573 }
574 _ => {}
575 };
576
577 sph.data
578 }
579