1 // Copyright © 2022 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3
4 use crate::from_nir::*;
5 use crate::ir::{ShaderIoInfo, ShaderStageInfo};
6 use crate::sph;
7
8 use nak_bindings::*;
9
10 use std::cmp::max;
11 use std::env;
12 use std::ffi::{CStr, CString};
13 use std::fmt::Write;
14 use std::os::raw::c_void;
15 use std::sync::OnceLock;
16
17 #[repr(u8)]
18 enum DebugFlags {
19 Print,
20 Serial,
21 Spill,
22 Annotate,
23 }
24
25 pub struct Debug {
26 flags: u32,
27 }
28
29 impl Debug {
new() -> Debug30 fn new() -> Debug {
31 let debug_var = "NAK_DEBUG";
32 let debug_str = match env::var(debug_var) {
33 Ok(s) => s,
34 Err(_) => {
35 return Debug { flags: 0 };
36 }
37 };
38
39 let mut flags = 0;
40 for flag in debug_str.split(',') {
41 match flag.trim() {
42 "print" => flags |= 1 << DebugFlags::Print as u8,
43 "serial" => flags |= 1 << DebugFlags::Serial as u8,
44 "spill" => flags |= 1 << DebugFlags::Spill as u8,
45 "annotate" => flags |= 1 << DebugFlags::Annotate as u8,
46 unk => eprintln!("Unknown NAK_DEBUG flag \"{}\"", unk),
47 }
48 }
49 Debug { flags: flags }
50 }
51 }
52
53 pub trait GetDebugFlags {
debug_flags(&self) -> u3254 fn debug_flags(&self) -> u32;
55
print(&self) -> bool56 fn print(&self) -> bool {
57 self.debug_flags() & (1 << DebugFlags::Print as u8) != 0
58 }
59
serial(&self) -> bool60 fn serial(&self) -> bool {
61 self.debug_flags() & (1 << DebugFlags::Serial as u8) != 0
62 }
63
spill(&self) -> bool64 fn spill(&self) -> bool {
65 self.debug_flags() & (1 << DebugFlags::Spill as u8) != 0
66 }
67
annotate(&self) -> bool68 fn annotate(&self) -> bool {
69 self.debug_flags() & (1 << DebugFlags::Annotate as u8) != 0
70 }
71 }
72
73 pub static DEBUG: OnceLock<Debug> = OnceLock::new();
74
75 impl GetDebugFlags for OnceLock<Debug> {
debug_flags(&self) -> u3276 fn debug_flags(&self) -> u32 {
77 self.get().unwrap().flags
78 }
79 }
80
81 #[no_mangle]
nak_should_print_nir() -> bool82 pub extern "C" fn nak_should_print_nir() -> bool {
83 DEBUG.print()
84 }
85
nir_options(dev: &nv_device_info) -> nir_shader_compiler_options86 fn nir_options(dev: &nv_device_info) -> nir_shader_compiler_options {
87 let mut op: nir_shader_compiler_options = unsafe { std::mem::zeroed() };
88
89 op.lower_fdiv = true;
90 op.fuse_ffma16 = true;
91 op.fuse_ffma32 = true;
92 op.fuse_ffma64 = true;
93 op.lower_flrp16 = true;
94 op.lower_flrp32 = true;
95 op.lower_flrp64 = true;
96 op.lower_bitfield_extract = dev.sm >= 70;
97 op.lower_bitfield_insert = true;
98 op.lower_pack_half_2x16 = true;
99 op.lower_pack_unorm_2x16 = true;
100 op.lower_pack_snorm_2x16 = true;
101 op.lower_pack_unorm_4x8 = true;
102 op.lower_pack_snorm_4x8 = true;
103 op.lower_unpack_half_2x16 = true;
104 op.lower_unpack_unorm_2x16 = true;
105 op.lower_unpack_snorm_2x16 = true;
106 op.lower_unpack_unorm_4x8 = true;
107 op.lower_unpack_snorm_4x8 = true;
108 op.lower_insert_byte = true;
109 op.lower_insert_word = true;
110 op.lower_cs_local_index_to_id = true;
111 op.lower_device_index_to_zero = true;
112 op.lower_isign = true;
113 op.lower_uadd_sat = dev.sm < 70;
114 op.lower_usub_sat = dev.sm < 70;
115 op.lower_iadd_sat = true; // TODO
116 op.use_interpolated_input_intrinsics = true;
117 op.lower_doubles_options = nir_lower_drcp
118 | nir_lower_dsqrt
119 | nir_lower_drsq
120 | nir_lower_dtrunc
121 | nir_lower_dfloor
122 | nir_lower_dceil
123 | nir_lower_dfract
124 | nir_lower_dround_even
125 | nir_lower_dsat;
126 if dev.sm >= 70 {
127 op.lower_doubles_options |= nir_lower_dminmax;
128 }
129 op.lower_int64_options = !(nir_lower_icmp64
130 | nir_lower_iadd64
131 | nir_lower_ineg64
132 | nir_lower_shift64
133 | nir_lower_imul_2x32_64
134 | nir_lower_conv64);
135 op.lower_ldexp = true;
136 op.lower_fmod = true;
137 op.lower_ffract = true;
138 op.lower_fpow = true;
139 op.lower_scmp = true;
140 op.lower_uadd_carry = true;
141 op.lower_usub_borrow = true;
142 op.has_iadd3 = dev.sm >= 70;
143 op.has_sdot_4x8 = dev.sm >= 70;
144 op.has_udot_4x8 = dev.sm >= 70;
145 op.has_sudot_4x8 = dev.sm >= 70;
146 // We set .ftz on f32 by default so we can support fmulz whenever the client
147 // doesn't explicitly request denorms.
148 op.has_fmulz_no_denorms = true;
149 op.has_find_msb_rev = true;
150 op.has_pack_half_2x16_rtz = true;
151 op.has_bfm = dev.sm >= 70;
152
153 op.max_unroll_iterations = 32;
154
155 op
156 }
157
158 #[no_mangle]
nak_compiler_create( dev: *const nv_device_info, ) -> *mut nak_compiler159 pub extern "C" fn nak_compiler_create(
160 dev: *const nv_device_info,
161 ) -> *mut nak_compiler {
162 assert!(!dev.is_null());
163 let dev = unsafe { &*dev };
164
165 DEBUG.get_or_init(Debug::new);
166
167 let nak = Box::new(nak_compiler {
168 sm: dev.sm,
169 warps_per_sm: dev.max_warps_per_mp,
170 nir_options: nir_options(dev),
171 });
172
173 Box::into_raw(nak)
174 }
175
176 #[no_mangle]
nak_compiler_destroy(nak: *mut nak_compiler)177 pub extern "C" fn nak_compiler_destroy(nak: *mut nak_compiler) {
178 unsafe { drop(Box::from_raw(nak)) };
179 }
180
181 #[no_mangle]
nak_debug_flags(_nak: *const nak_compiler) -> u64182 pub extern "C" fn nak_debug_flags(_nak: *const nak_compiler) -> u64 {
183 DEBUG.debug_flags().into()
184 }
185
186 #[no_mangle]
nak_nir_options( nak: *const nak_compiler, ) -> *const nir_shader_compiler_options187 pub extern "C" fn nak_nir_options(
188 nak: *const nak_compiler,
189 ) -> *const nir_shader_compiler_options {
190 assert!(!nak.is_null());
191 let nak = unsafe { &*nak };
192 &nak.nir_options
193 }
194
195 #[repr(C)]
196 struct ShaderBin {
197 bin: nak_shader_bin,
198 code: Vec<u32>,
199 asm: CString,
200 }
201
202 impl ShaderBin {
new(info: nak_shader_info, code: Vec<u32>, asm: &str) -> ShaderBin203 pub fn new(info: nak_shader_info, code: Vec<u32>, asm: &str) -> ShaderBin {
204 let asm = CString::new(asm)
205 .expect("NAK assembly has unexpected null characters");
206 let bin = nak_shader_bin {
207 info: info,
208 code_size: (code.len() * 4).try_into().unwrap(),
209 code: code.as_ptr() as *const c_void,
210 asm_str: if asm.is_empty() {
211 std::ptr::null()
212 } else {
213 asm.as_ptr()
214 },
215 };
216 ShaderBin {
217 bin: bin,
218 code: code,
219 asm: asm,
220 }
221 }
222 }
223
224 #[no_mangle]
nak_shader_bin_destroy(bin: *mut nak_shader_bin)225 pub extern "C" fn nak_shader_bin_destroy(bin: *mut nak_shader_bin) {
226 unsafe {
227 _ = Box::from_raw(bin as *mut ShaderBin);
228 };
229 }
230
eprint_hex(label: &str, data: &[u32])231 fn eprint_hex(label: &str, data: &[u32]) {
232 eprint!("{}:", label);
233 for i in 0..data.len() {
234 if (i % 8) == 0 {
235 eprintln!("");
236 eprint!(" ");
237 }
238 eprint!(" {:08x}", data[i]);
239 }
240 eprintln!("");
241 }
242
243 #[no_mangle]
nak_compile_shader( nir: *mut nir_shader, dump_asm: bool, nak: *const nak_compiler, robust2_modes: nir_variable_mode, fs_key: *const nak_fs_key, ) -> *mut nak_shader_bin244 pub extern "C" fn nak_compile_shader(
245 nir: *mut nir_shader,
246 dump_asm: bool,
247 nak: *const nak_compiler,
248 robust2_modes: nir_variable_mode,
249 fs_key: *const nak_fs_key,
250 ) -> *mut nak_shader_bin {
251 unsafe { nak_postprocess_nir(nir, nak, robust2_modes, fs_key) };
252 let nak = unsafe { &*nak };
253 let nir = unsafe { &*nir };
254 let fs_key = if fs_key.is_null() {
255 None
256 } else {
257 Some(unsafe { &*fs_key })
258 };
259
260 let mut s = nak_shader_from_nir(nir, nak.sm);
261
262 if DEBUG.print() {
263 eprintln!("NAK IR:\n{}", &s);
264 }
265
266 s.opt_bar_prop();
267 if DEBUG.print() {
268 eprintln!("NAK IR after opt_bar_prop:\n{}", &s);
269 }
270
271 s.opt_copy_prop();
272 if DEBUG.print() {
273 eprintln!("NAK IR after opt_copy_prop:\n{}", &s);
274 }
275
276 s.opt_lop();
277 if DEBUG.print() {
278 eprintln!("NAK IR after opt_lop:\n{}", &s);
279 }
280
281 s.opt_dce();
282 if DEBUG.print() {
283 eprintln!("NAK IR after dce:\n{}", &s);
284 }
285
286 s.opt_out();
287 if DEBUG.print() {
288 eprintln!("NAK IR after opt_out:\n{}", &s);
289 }
290
291 s.legalize();
292 if DEBUG.print() {
293 eprintln!("NAK IR after legalize:\n{}", &s);
294 }
295
296 s.assign_regs();
297 if DEBUG.print() {
298 eprintln!("NAK IR after assign_regs:\n{}", &s);
299 }
300
301 s.lower_ineg();
302 s.lower_par_copies();
303 s.lower_copy_swap();
304 s.opt_jump_thread();
305 s.calc_instr_deps();
306
307 if DEBUG.print() {
308 eprintln!("NAK IR:\n{}", &s);
309 }
310
311 s.gather_global_mem_usage();
312
313 let info = nak_shader_info {
314 stage: nir.info.stage(),
315 num_gprs: if s.info.sm >= 70 {
316 max(4, s.info.num_gprs + 2)
317 } else {
318 max(4, s.info.num_gprs)
319 },
320 num_barriers: s.info.num_barriers,
321 _pad0: Default::default(),
322 slm_size: s.info.slm_size,
323 __bindgen_anon_1: match &s.info.stage {
324 ShaderStageInfo::Compute(cs_info) => {
325 nak_shader_info__bindgen_ty_1 {
326 cs: nak_shader_info__bindgen_ty_1__bindgen_ty_1 {
327 local_size: [
328 cs_info.local_size[0],
329 cs_info.local_size[1],
330 cs_info.local_size[2],
331 ],
332 smem_size: cs_info.smem_size,
333 _pad: Default::default(),
334 },
335 }
336 }
337 ShaderStageInfo::Fragment => {
338 let fs_info = match &s.info.io {
339 ShaderIoInfo::Fragment(io) => io,
340 _ => unreachable!(),
341 };
342
343 let nir_fs_info = unsafe { &nir.info.__bindgen_anon_1.fs };
344 nak_shader_info__bindgen_ty_1 {
345 fs: nak_shader_info__bindgen_ty_1__bindgen_ty_2 {
346 writes_depth: fs_info.writes_depth,
347 reads_sample_mask: fs_info.reads_sample_mask,
348 post_depth_coverage: nir_fs_info.post_depth_coverage(),
349 uses_sample_shading: nir_fs_info.uses_sample_shading(),
350 early_fragment_tests: nir_fs_info
351 .early_fragment_tests(),
352 _pad: Default::default(),
353 },
354 }
355 }
356 ShaderStageInfo::Tessellation => {
357 let nir_ts_info = unsafe { &nir.info.__bindgen_anon_1.tess };
358 nak_shader_info__bindgen_ty_1 {
359 ts: nak_shader_info__bindgen_ty_1__bindgen_ty_3 {
360 domain: match nir_ts_info._primitive_mode {
361 TESS_PRIMITIVE_TRIANGLES => NAK_TS_DOMAIN_TRIANGLE,
362 TESS_PRIMITIVE_QUADS => NAK_TS_DOMAIN_QUAD,
363 TESS_PRIMITIVE_ISOLINES => NAK_TS_DOMAIN_ISOLINE,
364 _ => panic!("Invalid tess_primitive_mode"),
365 },
366
367 spacing: match nir_ts_info.spacing() {
368 TESS_SPACING_EQUAL => NAK_TS_SPACING_INTEGER,
369 TESS_SPACING_FRACTIONAL_ODD => {
370 NAK_TS_SPACING_FRACT_ODD
371 }
372 TESS_SPACING_FRACTIONAL_EVEN => {
373 NAK_TS_SPACING_FRACT_EVEN
374 }
375 _ => panic!("Invalid gl_tess_spacing"),
376 },
377
378 prims: if nir_ts_info.point_mode() {
379 NAK_TS_PRIMS_POINTS
380 } else if nir_ts_info._primitive_mode
381 == TESS_PRIMITIVE_ISOLINES
382 {
383 NAK_TS_PRIMS_LINES
384 } else if nir_ts_info.ccw() {
385 NAK_TS_PRIMS_TRIANGLES_CCW
386 } else {
387 NAK_TS_PRIMS_TRIANGLES_CW
388 },
389
390 _pad: Default::default(),
391 },
392 }
393 }
394 _ => nak_shader_info__bindgen_ty_1 {
395 _pad: Default::default(),
396 },
397 },
398 vtg: match &s.info.stage {
399 ShaderStageInfo::Geometry(_)
400 | ShaderStageInfo::Tessellation
401 | ShaderStageInfo::Vertex => {
402 let writes_layer =
403 nir.info.outputs_written & (1 << VARYING_SLOT_LAYER) != 0;
404 let writes_point_size =
405 nir.info.outputs_written & (1 << VARYING_SLOT_PSIZ) != 0;
406 let num_clip = nir.info.clip_distance_array_size();
407 let num_cull = nir.info.cull_distance_array_size();
408 let clip_enable = (1_u32 << num_clip) - 1;
409 let cull_enable = ((1_u32 << num_cull) - 1) << num_clip;
410 nak_shader_info__bindgen_ty_2 {
411 writes_layer,
412 writes_point_size,
413 clip_enable: clip_enable.try_into().unwrap(),
414 cull_enable: cull_enable.try_into().unwrap(),
415 xfb: unsafe { nak_xfb_from_nir(nir.xfb_info) },
416 }
417 }
418 _ => unsafe { std::mem::zeroed() },
419 },
420 hdr: sph::encode_header(&s.info, fs_key),
421 };
422
423 let mut asm = String::new();
424 if dump_asm {
425 write!(asm, "{}", s).expect("Failed to dump assembly");
426 }
427
428 s.remove_annotations();
429
430 let code = if nak.sm >= 70 {
431 s.encode_sm70()
432 } else if nak.sm >= 50 {
433 s.encode_sm50()
434 } else {
435 panic!("Unsupported shader model");
436 };
437
438 if DEBUG.print() {
439 let stage_name = unsafe {
440 let c_name = _mesa_shader_stage_to_string(info.stage as u32);
441 CStr::from_ptr(c_name).to_str().expect("Invalid UTF-8")
442 };
443 let instruction_count = if nak.sm >= 70 {
444 code.len() / 4
445 } else if nak.sm >= 50 {
446 (code.len() / 8) * 3
447 } else {
448 unreachable!()
449 };
450
451 eprintln!("Stage: {}", stage_name);
452 eprintln!("Instruction count: {}", instruction_count);
453 eprintln!("Num GPRs: {}", info.num_gprs);
454 eprintln!("SLM size: {}", info.slm_size);
455
456 if info.stage != MESA_SHADER_COMPUTE {
457 eprint_hex("Header", &info.hdr);
458 }
459
460 eprint_hex("Encoded shader", &code);
461 }
462
463 let bin = Box::new(ShaderBin::new(info, code, &asm));
464 Box::into_raw(bin) as *mut nak_shader_bin
465 }
466