1 // Copyright © 2022 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3
4 #![allow(non_upper_case_globals)]
5
6 use crate::api::GetDebugFlags;
7 use crate::api::DEBUG;
8 use crate::cfg::CFGBuilder;
9 use crate::ir::*;
10 use crate::nir::*;
11 use crate::nir_instr_printer::NirInstrPrinter;
12 use crate::sph::{OutputTopology, PixelImap};
13
14 use nak_bindings::*;
15
16 use std::cmp::max;
17 use std::collections::{HashMap, HashSet};
18 use std::ops::Index;
19
init_info_from_nir(nir: &nir_shader, sm: u8) -> ShaderInfo20 fn init_info_from_nir(nir: &nir_shader, sm: u8) -> ShaderInfo {
21 ShaderInfo {
22 sm: sm,
23 num_gprs: 0,
24 num_barriers: 0,
25 slm_size: nir.scratch_size,
26 uses_global_mem: false,
27 writes_global_mem: false,
28 // TODO: handle this.
29 uses_fp64: false,
30 stage: match nir.info.stage() {
31 MESA_SHADER_COMPUTE => {
32 ShaderStageInfo::Compute(ComputeShaderInfo {
33 local_size: [
34 nir.info.workgroup_size[0],
35 nir.info.workgroup_size[1],
36 nir.info.workgroup_size[2],
37 ],
38 smem_size: nir.info.shared_size.try_into().unwrap(),
39 })
40 }
41 MESA_SHADER_VERTEX => ShaderStageInfo::Vertex,
42 MESA_SHADER_FRAGMENT => ShaderStageInfo::Fragment,
43 MESA_SHADER_GEOMETRY => {
44 let info_gs = unsafe { &nir.info.__bindgen_anon_1.gs };
45 let output_topology = match info_gs.output_primitive {
46 MESA_PRIM_POINTS => OutputTopology::PointList,
47 MESA_PRIM_LINE_STRIP => OutputTopology::LineStrip,
48 MESA_PRIM_TRIANGLE_STRIP => OutputTopology::TriangleStrip,
49 _ => panic!(
50 "Invalid GS input primitive {}",
51 info_gs.input_primitive
52 ),
53 };
54
55 ShaderStageInfo::Geometry(GeometryShaderInfo {
56 // TODO: Should be set if VK_NV_geometry_shader_passthrough is in use.
57 passthrough_enable: false,
58 stream_out_mask: info_gs.active_stream_mask(),
59 threads_per_input_primitive: info_gs.invocations,
60 output_topology: output_topology,
61 max_output_vertex_count: info_gs.vertices_out,
62 })
63 }
64 MESA_SHADER_TESS_CTRL => {
65 let info_tess = unsafe { &nir.info.__bindgen_anon_1.tess };
66 ShaderStageInfo::TessellationInit(TessellationInitShaderInfo {
67 per_patch_attribute_count: 6,
68 threads_per_patch: info_tess.tcs_vertices_out,
69 })
70 }
71 MESA_SHADER_TESS_EVAL => ShaderStageInfo::Tessellation,
72 _ => panic!("Unknown shader stage"),
73 },
74 io: match nir.info.stage() {
75 MESA_SHADER_COMPUTE => ShaderIoInfo::None,
76 MESA_SHADER_FRAGMENT => ShaderIoInfo::Fragment(FragmentIoInfo {
77 sysvals_in: SysValInfo {
78 // Required on fragment shaders, otherwise it cause a trap.
79 ab: 1 << 31,
80 c: 0,
81 },
82 sysvals_in_d: [PixelImap::Unused; 8],
83 attr_in: [PixelImap::Unused; 128],
84 barycentric_attr_in: [0; 4],
85 reads_sample_mask: false,
86 uses_kill: false,
87 writes_color: 0,
88 writes_sample_mask: false,
89 writes_depth: false,
90 // TODO: Should be set if interlocks are in use. (VK_EXT_fragment_shader_interlock)
91 does_interlock: false,
92 }),
93 MESA_SHADER_VERTEX
94 | MESA_SHADER_GEOMETRY
95 | MESA_SHADER_TESS_CTRL
96 | MESA_SHADER_TESS_EVAL => ShaderIoInfo::Vtg(VtgIoInfo {
97 sysvals_in: SysValInfo::default(),
98 sysvals_in_d: 0,
99 sysvals_out: SysValInfo::default(),
100 sysvals_out_d: 0,
101 attr_in: [0; 4],
102 attr_out: [0; 4],
103
104 // TODO: figure out how to fill this.
105 store_req_start: u8::MAX,
106 store_req_end: 0,
107 }),
108 _ => panic!("Unknown shader stage"),
109 },
110 }
111 }
112
alloc_ssa_for_nir(b: &mut impl SSABuilder, ssa: &nir_def) -> Vec<SSAValue>113 fn alloc_ssa_for_nir(b: &mut impl SSABuilder, ssa: &nir_def) -> Vec<SSAValue> {
114 let (file, comps) = if ssa.bit_size == 1 {
115 (RegFile::Pred, ssa.num_components)
116 } else {
117 let bits = ssa.bit_size * ssa.num_components;
118 (RegFile::GPR, bits.div_ceil(32))
119 };
120
121 let mut vec = Vec::new();
122 for _ in 0..comps {
123 vec.push(b.alloc_ssa(file, 1)[0]);
124 }
125 vec
126 }
127
128 struct PhiAllocMap<'a> {
129 alloc: &'a mut PhiAllocator,
130 map: HashMap<(u32, u8), u32>,
131 }
132
133 impl<'a> PhiAllocMap<'a> {
new(alloc: &'a mut PhiAllocator) -> PhiAllocMap<'a>134 fn new(alloc: &'a mut PhiAllocator) -> PhiAllocMap<'a> {
135 PhiAllocMap {
136 alloc: alloc,
137 map: HashMap::new(),
138 }
139 }
140
get_phi_id(&mut self, phi: &nir_phi_instr, comp: u8) -> u32141 fn get_phi_id(&mut self, phi: &nir_phi_instr, comp: u8) -> u32 {
142 *self
143 .map
144 .entry((phi.def.index, comp))
145 .or_insert_with(|| self.alloc.alloc())
146 }
147 }
148
149 struct PerSizeFloatControls {
150 pub ftz: bool,
151 pub rnd_mode: FRndMode,
152 }
153
154 struct ShaderFloatControls {
155 pub fp16: PerSizeFloatControls,
156 pub fp32: PerSizeFloatControls,
157 pub fp64: PerSizeFloatControls,
158 }
159
160 impl Default for ShaderFloatControls {
default() -> Self161 fn default() -> Self {
162 Self {
163 fp16: PerSizeFloatControls {
164 ftz: false,
165 rnd_mode: FRndMode::NearestEven,
166 },
167 fp32: PerSizeFloatControls {
168 ftz: true, // Default FTZ on fp32
169 rnd_mode: FRndMode::NearestEven,
170 },
171 fp64: PerSizeFloatControls {
172 ftz: false,
173 rnd_mode: FRndMode::NearestEven,
174 },
175 }
176 }
177 }
178
179 impl ShaderFloatControls {
from_nir(nir: &nir_shader) -> ShaderFloatControls180 fn from_nir(nir: &nir_shader) -> ShaderFloatControls {
181 let nir_fc = nir.info.float_controls_execution_mode;
182 let mut fc: ShaderFloatControls = Default::default();
183
184 if (nir_fc & FLOAT_CONTROLS_DENORM_PRESERVE_FP16) != 0 {
185 fc.fp16.ftz = false;
186 } else if (nir_fc & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP16) != 0 {
187 fc.fp16.ftz = true;
188 }
189 if (nir_fc & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16) != 0 {
190 fc.fp16.rnd_mode = FRndMode::NearestEven;
191 } else if (nir_fc & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16) != 0 {
192 fc.fp16.rnd_mode = FRndMode::Zero;
193 }
194
195 if (nir_fc & FLOAT_CONTROLS_DENORM_PRESERVE_FP32) != 0 {
196 fc.fp32.ftz = false;
197 } else if (nir_fc & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) != 0 {
198 fc.fp32.ftz = true;
199 }
200 if (nir_fc & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32) != 0 {
201 fc.fp32.rnd_mode = FRndMode::NearestEven;
202 } else if (nir_fc & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32) != 0 {
203 fc.fp32.rnd_mode = FRndMode::Zero;
204 }
205
206 if (nir_fc & FLOAT_CONTROLS_DENORM_PRESERVE_FP64) != 0 {
207 fc.fp64.ftz = false;
208 } else if (nir_fc & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP64) != 0 {
209 fc.fp64.ftz = true;
210 }
211 if (nir_fc & FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64) != 0 {
212 fc.fp64.rnd_mode = FRndMode::NearestEven;
213 } else if (nir_fc & FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64) != 0 {
214 fc.fp64.rnd_mode = FRndMode::Zero;
215 }
216
217 fc
218 }
219 }
220
221 impl Index<FloatType> for ShaderFloatControls {
222 type Output = PerSizeFloatControls;
223
index(&self, idx: FloatType) -> &PerSizeFloatControls224 fn index(&self, idx: FloatType) -> &PerSizeFloatControls {
225 match idx {
226 FloatType::F16 => &self.fp16,
227 FloatType::F32 => &self.fp32,
228 FloatType::F64 => &self.fp64,
229 }
230 }
231 }
232
233 struct ShaderFromNir<'a> {
234 nir: &'a nir_shader,
235 info: ShaderInfo,
236 float_ctl: ShaderFloatControls,
237 cfg: CFGBuilder<u32, BasicBlock>,
238 label_alloc: LabelAllocator,
239 block_label: HashMap<u32, Label>,
240 bar_label: HashMap<u32, Label>,
241 fs_out_regs: [SSAValue; 34],
242 end_block_id: u32,
243 ssa_map: HashMap<u32, Vec<SSAValue>>,
244 saturated: HashSet<*const nir_def>,
245 nir_instr_printer: NirInstrPrinter,
246 }
247
248 impl<'a> ShaderFromNir<'a> {
new(nir: &'a nir_shader, sm: u8) -> Self249 fn new(nir: &'a nir_shader, sm: u8) -> Self {
250 Self {
251 nir: nir,
252 info: init_info_from_nir(nir, sm),
253 float_ctl: ShaderFloatControls::from_nir(nir),
254 cfg: CFGBuilder::new(),
255 label_alloc: LabelAllocator::new(),
256 block_label: HashMap::new(),
257 bar_label: HashMap::new(),
258 fs_out_regs: [SSAValue::NONE; 34],
259 end_block_id: 0,
260 ssa_map: HashMap::new(),
261 saturated: HashSet::new(),
262 nir_instr_printer: NirInstrPrinter::new(),
263 }
264 }
265
get_block_label(&mut self, block: &nir_block) -> Label266 fn get_block_label(&mut self, block: &nir_block) -> Label {
267 *self
268 .block_label
269 .entry(block.index)
270 .or_insert_with(|| self.label_alloc.alloc())
271 }
272
get_ssa(&mut self, ssa: &nir_def) -> &[SSAValue]273 fn get_ssa(&mut self, ssa: &nir_def) -> &[SSAValue] {
274 self.ssa_map.get(&ssa.index).unwrap()
275 }
276
set_ssa(&mut self, def: &nir_def, vec: Vec<SSAValue>)277 fn set_ssa(&mut self, def: &nir_def, vec: Vec<SSAValue>) {
278 if def.bit_size == 1 {
279 for s in &vec {
280 assert!(s.is_predicate());
281 }
282 } else {
283 for s in &vec {
284 assert!(!s.is_predicate());
285 }
286 let bits =
287 usize::from(def.bit_size) * usize::from(def.num_components);
288 assert!(vec.len() == bits.div_ceil(32));
289 }
290 self.ssa_map
291 .entry(def.index)
292 .and_modify(|_| panic!("Cannot set an SSA def twice"))
293 .or_insert(vec);
294 }
295
get_ssa_comp(&mut self, def: &nir_def, c: u8) -> (SSARef, u8)296 fn get_ssa_comp(&mut self, def: &nir_def, c: u8) -> (SSARef, u8) {
297 let vec = self.get_ssa(def);
298 match def.bit_size {
299 1 => (vec[usize::from(c)].into(), 0),
300 8 => (vec[usize::from(c / 4)].into(), c % 4),
301 16 => (vec[usize::from(c / 2)].into(), (c * 2) % 4),
302 32 => (vec[usize::from(c)].into(), 0),
303 64 => {
304 let comps =
305 [vec[usize::from(c) * 2 + 0], vec[usize::from(c) * 2 + 1]];
306 (comps.into(), 0)
307 }
308 _ => panic!("Unsupported bit size: {}", def.bit_size),
309 }
310 }
311
get_src(&mut self, src: &nir_src) -> Src312 fn get_src(&mut self, src: &nir_src) -> Src {
313 SSARef::try_from(self.get_ssa(src.as_def())).unwrap().into()
314 }
315
get_io_addr_offset( &mut self, addr: &nir_src, imm_bits: u8, ) -> (Src, i32)316 fn get_io_addr_offset(
317 &mut self,
318 addr: &nir_src,
319 imm_bits: u8,
320 ) -> (Src, i32) {
321 let addr = addr.as_def();
322 let addr_offset = unsafe {
323 nak_get_io_addr_offset(addr as *const _ as *mut _, imm_bits)
324 };
325
326 if let Some(base_def) = std::ptr::NonNull::new(addr_offset.base.def) {
327 let base_def = unsafe { base_def.as_ref() };
328 let base_comp = u8::try_from(addr_offset.base.comp).unwrap();
329 let (base, _) = self.get_ssa_comp(base_def, base_comp);
330 (base.into(), addr_offset.offset)
331 } else {
332 (SrcRef::Zero.into(), addr_offset.offset)
333 }
334 }
335
set_dst(&mut self, def: &nir_def, ssa: SSARef)336 fn set_dst(&mut self, def: &nir_def, ssa: SSARef) {
337 self.set_ssa(def, (*ssa).into());
338 }
339
try_saturate_alu_dst(&mut self, def: &nir_def) -> bool340 fn try_saturate_alu_dst(&mut self, def: &nir_def) -> bool {
341 if def.all_uses_are_fsat() {
342 self.saturated.insert(def as *const _);
343 true
344 } else {
345 false
346 }
347 }
348
alu_src_is_saturated(&self, src: &nir_alu_src) -> bool349 fn alu_src_is_saturated(&self, src: &nir_alu_src) -> bool {
350 self.saturated.get(&(src.as_def() as *const _)).is_some()
351 }
352
parse_alu(&mut self, b: &mut impl SSABuilder, alu: &nir_alu_instr)353 fn parse_alu(&mut self, b: &mut impl SSABuilder, alu: &nir_alu_instr) {
354 // Handle vectors and pack ops as a special case since they're the only
355 // ALU ops that can produce more than 16B. They are also the only ALU
356 // ops which we allow to consume small (8 and 16-bit) vector data
357 // scattered across multiple dwords
358 match alu.op {
359 nir_op_mov
360 | nir_op_pack_32_4x8_split
361 | nir_op_pack_32_2x16_split
362 | nir_op_pack_64_2x32_split
363 | nir_op_vec2
364 | nir_op_vec3
365 | nir_op_vec4
366 | nir_op_vec5
367 | nir_op_vec8
368 | nir_op_vec16 => {
369 let src_bit_size = alu.get_src(0).src.bit_size();
370 let bits = usize::from(alu.def.num_components)
371 * usize::from(alu.def.bit_size);
372
373 // Collect the sources into a vec with src_bit_size per SSA
374 // value in the vec. This implicitly makes 64-bit sources look
375 // like two 32-bit values
376 let mut srcs = Vec::new();
377 if alu.op == nir_op_mov {
378 let src = alu.get_src(0);
379 for c in 0..alu.def.num_components {
380 let s = src.swizzle[usize::from(c)];
381 let (src, byte) =
382 self.get_ssa_comp(src.src.as_def(), s);
383 for ssa in src.iter() {
384 srcs.push((*ssa, byte));
385 }
386 }
387 } else {
388 for src in alu.srcs_as_slice().iter() {
389 let s = src.swizzle[0];
390 let (src, byte) =
391 self.get_ssa_comp(src.src.as_def(), s);
392 for ssa in src.iter() {
393 srcs.push((*ssa, byte));
394 }
395 }
396 }
397
398 let mut comps = Vec::new();
399 match src_bit_size {
400 1 | 32 | 64 => {
401 for (ssa, _) in srcs {
402 comps.push(ssa);
403 }
404 }
405 8 => {
406 for dc in 0..bits.div_ceil(32) {
407 let mut psrc = [Src::new_zero(); 4];
408 let mut psel = [0_u8; 4];
409
410 for b in 0..4 {
411 let sc = dc * 4 + b;
412 if sc < srcs.len() {
413 let (ssa, byte) = srcs[sc];
414 for i in 0..4_u8 {
415 let psrc_i = &mut psrc[usize::from(i)];
416 if *psrc_i == Src::new_zero() {
417 *psrc_i = ssa.into();
418 } else if *psrc_i != Src::from(ssa) {
419 continue;
420 }
421 psel[b] = i * 4 + byte;
422 }
423 }
424 }
425 comps.push(b.prmt4(psrc, psel)[0]);
426 }
427 }
428 16 => {
429 for dc in 0..bits.div_ceil(32) {
430 let mut psrc = [Src::new_zero(); 2];
431 let mut psel = [0_u8; 4];
432
433 for w in 0..2 {
434 let sc = dc * 2 + w;
435 if sc < srcs.len() {
436 let (ssa, byte) = srcs[sc];
437 let w_u8 = u8::try_from(w).unwrap();
438 psrc[w] = ssa.into();
439 psel[w * 2 + 0] = (w_u8 * 4) + byte;
440 psel[w * 2 + 1] = (w_u8 * 4) + byte + 1;
441 }
442 }
443 comps.push(b.prmt(psrc[0], psrc[1], psel)[0]);
444 }
445 }
446 _ => panic!("Unknown bit size: {src_bit_size}"),
447 }
448
449 self.set_ssa(&alu.def, comps);
450 return;
451 }
452 _ => (),
453 }
454
455 let mut srcs: Vec<Src> = Vec::new();
456 for (i, alu_src) in alu.srcs_as_slice().iter().enumerate() {
457 let bit_size = alu_src.src.bit_size();
458 let comps = alu.src_components(i.try_into().unwrap());
459 let ssa = self.get_ssa(alu_src.src.as_def());
460
461 match bit_size {
462 1 => {
463 assert!(comps == 1);
464 let s = usize::from(alu_src.swizzle[0]);
465 srcs.push(ssa[s].into());
466 }
467 8 => {
468 assert!(comps <= 4);
469 let s = alu_src.swizzle[0];
470 let dw = ssa[usize::from(s / 4)];
471
472 let mut prmt = [4_u8; 4];
473 for c in 0..comps {
474 let cs = alu_src.swizzle[usize::from(c)];
475 assert!(s / 4 == cs / 4);
476 prmt[usize::from(c)] = cs;
477 }
478 srcs.push(b.prmt(dw.into(), 0.into(), prmt).into());
479 }
480 16 => {
481 assert!(comps <= 2);
482 let s = alu_src.swizzle[0];
483 let dw = ssa[usize::from(s / 2)];
484
485 let mut prmt = [0_u8; 4];
486 for c in 0..comps {
487 let cs = alu_src.swizzle[usize::from(c)];
488 assert!(s / 2 == cs / 2);
489 prmt[usize::from(c) * 2 + 0] = cs * 2 + 0;
490 prmt[usize::from(c) * 2 + 1] = cs * 2 + 1;
491 }
492 // TODO: Some ops can handle swizzles
493 srcs.push(b.prmt(dw.into(), 0.into(), prmt).into());
494 }
495 32 => {
496 assert!(comps == 1);
497 let s = usize::from(alu_src.swizzle[0]);
498 srcs.push(ssa[s].into());
499 }
500 64 => {
501 assert!(comps == 1);
502 let s = usize::from(alu_src.swizzle[0]);
503 srcs.push([ssa[s * 2], ssa[s * 2 + 1]].into());
504 }
505 _ => panic!("Invalid bit size: {bit_size}"),
506 }
507 }
508
509 let dst: SSARef = match alu.op {
510 nir_op_b2b1 => {
511 assert!(alu.get_src(0).bit_size() == 32);
512 b.isetp(IntCmpType::I32, IntCmpOp::Ne, srcs[0], 0.into())
513 }
514 nir_op_b2b32 | nir_op_b2i8 | nir_op_b2i16 | nir_op_b2i32 => {
515 b.sel(srcs[0].bnot(), 0.into(), 1.into())
516 }
517 nir_op_b2i64 => {
518 let lo = b.sel(srcs[0].bnot(), 0.into(), 1.into());
519 let hi = b.copy(0.into());
520 [lo[0], hi[0]].into()
521 }
522 nir_op_b2f32 => {
523 b.sel(srcs[0].bnot(), 0.0_f32.into(), 1.0_f32.into())
524 }
525 nir_op_b2f64 => {
526 let lo = b.copy(0.into());
527 let hi = b.sel(srcs[0].bnot(), 0.into(), 0x3ff00000.into());
528 [lo[0], hi[0]].into()
529 }
530 nir_op_bcsel => b.sel(srcs[0], srcs[1], srcs[2]),
531 nir_op_bfm => {
532 let dst = b.alloc_ssa(RegFile::GPR, 1);
533 b.push_op(OpBMsk {
534 dst: dst.into(),
535 pos: srcs[1],
536 width: srcs[0],
537 wrap: true,
538 });
539 dst
540 }
541 nir_op_bit_count => {
542 let dst = b.alloc_ssa(RegFile::GPR, 1);
543 b.push_op(OpPopC {
544 dst: dst.into(),
545 src: srcs[0],
546 });
547 dst
548 }
549 nir_op_bitfield_reverse => {
550 let dst = b.alloc_ssa(RegFile::GPR, 1);
551 if self.info.sm >= 70 {
552 b.push_op(OpBRev {
553 dst: dst.into(),
554 src: srcs[0],
555 });
556 } else {
557 // No BREV in Maxwell
558 b.push_op(OpBfe {
559 dst: dst.into(),
560 base: srcs[0],
561 signed: false,
562 range: Src::new_imm_u32(0x2000),
563 reverse: true,
564 });
565 }
566 dst
567 }
568 nir_op_ibitfield_extract | nir_op_ubitfield_extract => {
569 let range = b.alloc_ssa(RegFile::GPR, 1);
570 b.push_op(OpPrmt {
571 dst: range.into(),
572 srcs: [srcs[1], srcs[2]],
573 sel: 0x0040.into(),
574 mode: PrmtMode::Index,
575 });
576
577 let dst = b.alloc_ssa(RegFile::GPR, 1);
578 b.push_op(OpBfe {
579 dst: dst.into(),
580 base: srcs[0],
581 signed: !matches!(alu.op, nir_op_ubitfield_extract),
582 range: range.into(),
583 reverse: false,
584 });
585 dst
586 }
587 nir_op_extract_u8 | nir_op_extract_i8 | nir_op_extract_u16
588 | nir_op_extract_i16 => {
589 let src1 = alu.get_src(1);
590 let elem = src1.src.comp_as_uint(src1.swizzle[0]).unwrap();
591 let elem = u8::try_from(elem).unwrap();
592
593 match alu.op {
594 nir_op_extract_u8 => {
595 assert!(elem < 4);
596 let byte = elem;
597 let zero = 4;
598 b.prmt(srcs[0], 0.into(), [byte, zero, zero, zero])
599 }
600 nir_op_extract_i8 => {
601 assert!(elem < 4);
602 let byte = elem;
603 let sign = byte | 0x8;
604 b.prmt(srcs[0], 0.into(), [byte, sign, sign, sign])
605 }
606 nir_op_extract_u16 => {
607 assert!(elem < 2);
608 let byte = elem * 2;
609 let zero = 4;
610 b.prmt(srcs[0], 0.into(), [byte, byte + 1, zero, zero])
611 }
612 nir_op_extract_i16 => {
613 assert!(elem < 2);
614 let byte = elem * 2;
615 let sign = (byte + 1) | 0x8;
616 b.prmt(srcs[0], 0.into(), [byte, byte + 1, sign, sign])
617 }
618 _ => panic!("Unknown extract op: {}", alu.op),
619 }
620 }
621 nir_op_f2f16 | nir_op_f2f16_rtne | nir_op_f2f16_rtz
622 | nir_op_f2f32 | nir_op_f2f64 => {
623 let src_bits = alu.get_src(0).src.bit_size();
624 let dst_bits = alu.def.bit_size();
625 let src_type = FloatType::from_bits(src_bits.into());
626 let dst_type = FloatType::from_bits(dst_bits.into());
627
628 let dst = b.alloc_ssa(RegFile::GPR, dst_bits.div_ceil(32));
629 b.push_op(OpF2F {
630 dst: dst.into(),
631 src: srcs[0],
632 src_type: FloatType::from_bits(src_bits.into()),
633 dst_type: dst_type,
634 rnd_mode: match alu.op {
635 nir_op_f2f16_rtne => FRndMode::NearestEven,
636 nir_op_f2f16_rtz => FRndMode::Zero,
637 _ => self.float_ctl[dst_type].rnd_mode,
638 },
639 ftz: if src_bits < dst_bits {
640 self.float_ctl[src_type].ftz
641 } else {
642 self.float_ctl[dst_type].ftz
643 },
644 high: false,
645 integer_rnd: false,
646 });
647 dst
648 }
649 nir_op_find_lsb => {
650 let tmp = b.alloc_ssa(RegFile::GPR, 1);
651 b.push_op(OpBRev {
652 dst: tmp.into(),
653 src: srcs[0],
654 });
655 let dst = b.alloc_ssa(RegFile::GPR, 1);
656 b.push_op(OpFlo {
657 dst: dst.into(),
658 src: tmp.into(),
659 signed: alu.op == nir_op_ifind_msb,
660 return_shift_amount: true,
661 });
662 dst
663 }
664 nir_op_f2i8 | nir_op_f2i16 | nir_op_f2i32 | nir_op_f2i64
665 | nir_op_f2u8 | nir_op_f2u16 | nir_op_f2u32 | nir_op_f2u64 => {
666 let src_bits = usize::from(alu.get_src(0).bit_size());
667 let dst_bits = alu.def.bit_size();
668 let src_type = FloatType::from_bits(src_bits);
669 let dst = b.alloc_ssa(RegFile::GPR, dst_bits.div_ceil(32));
670 let dst_is_signed = alu.info().output_type & 2 != 0;
671 b.push_op(OpF2I {
672 dst: dst.into(),
673 src: srcs[0],
674 src_type: src_type,
675 dst_type: IntType::from_bits(
676 dst_bits.into(),
677 dst_is_signed,
678 ),
679 rnd_mode: FRndMode::Zero,
680 ftz: self.float_ctl[src_type].ftz,
681 });
682 dst
683 }
684 nir_op_fabs | nir_op_fadd | nir_op_fneg => {
685 let (x, y) = match alu.op {
686 nir_op_fabs => (srcs[0].fabs(), 0.0_f32.into()),
687 nir_op_fadd => (srcs[0], srcs[1]),
688 nir_op_fneg => (Src::new_zero().fneg(), srcs[0].fneg()),
689 _ => panic!("Unhandled case"),
690 };
691 let ftype = FloatType::from_bits(alu.def.bit_size().into());
692 let dst;
693 if alu.def.bit_size() == 64 {
694 dst = b.alloc_ssa(RegFile::GPR, 2);
695 b.push_op(OpDAdd {
696 dst: dst.into(),
697 srcs: [x, y],
698 rnd_mode: self.float_ctl[ftype].rnd_mode,
699 });
700 } else if alu.def.bit_size() == 32 {
701 dst = b.alloc_ssa(RegFile::GPR, 1);
702 b.push_op(OpFAdd {
703 dst: dst.into(),
704 srcs: [x, y],
705 saturate: self.try_saturate_alu_dst(&alu.def),
706 rnd_mode: self.float_ctl[ftype].rnd_mode,
707 ftz: self.float_ctl[ftype].ftz,
708 });
709 } else {
710 panic!("Unsupported float type: f{}", alu.def.bit_size());
711 }
712 dst
713 }
714 nir_op_fceil | nir_op_ffloor | nir_op_fround_even
715 | nir_op_ftrunc => {
716 assert!(alu.def.bit_size() == 32);
717 let dst = b.alloc_ssa(RegFile::GPR, 1);
718 let ty = FloatType::from_bits(alu.def.bit_size().into());
719 let rnd_mode = match alu.op {
720 nir_op_fceil => FRndMode::PosInf,
721 nir_op_ffloor => FRndMode::NegInf,
722 nir_op_ftrunc => FRndMode::Zero,
723 nir_op_fround_even => FRndMode::NearestEven,
724 _ => unreachable!(),
725 };
726 let ftz = self.float_ctl[ty].ftz;
727 if b.sm() >= 70 {
728 b.push_op(OpFRnd {
729 dst: dst.into(),
730 src: srcs[0],
731 src_type: ty,
732 dst_type: ty,
733 rnd_mode,
734 ftz,
735 });
736 } else {
737 b.push_op(OpF2F {
738 dst: dst.into(),
739 src: srcs[0],
740 src_type: ty,
741 dst_type: ty,
742 rnd_mode,
743 ftz,
744 integer_rnd: true,
745 high: false,
746 });
747 }
748 dst
749 }
750 nir_op_fcos => b.fcos(srcs[0]),
751 nir_op_feq | nir_op_fge | nir_op_flt | nir_op_fneu => {
752 let src_type =
753 FloatType::from_bits(alu.get_src(0).bit_size().into());
754 let cmp_op = match alu.op {
755 nir_op_feq => FloatCmpOp::OrdEq,
756 nir_op_fge => FloatCmpOp::OrdGe,
757 nir_op_flt => FloatCmpOp::OrdLt,
758 nir_op_fneu => FloatCmpOp::UnordNe,
759 _ => panic!("Usupported float comparison"),
760 };
761
762 let dst = b.alloc_ssa(RegFile::Pred, 1);
763 if alu.get_src(0).bit_size() == 64 {
764 b.push_op(OpDSetP {
765 dst: dst.into(),
766 set_op: PredSetOp::And,
767 cmp_op: cmp_op,
768 srcs: [srcs[0], srcs[1]],
769 accum: SrcRef::True.into(),
770 });
771 } else if alu.get_src(0).bit_size() == 32 {
772 b.push_op(OpFSetP {
773 dst: dst.into(),
774 set_op: PredSetOp::And,
775 cmp_op: cmp_op,
776 srcs: [srcs[0], srcs[1]],
777 accum: SrcRef::True.into(),
778 ftz: self.float_ctl[src_type].ftz,
779 });
780 } else {
781 panic!(
782 "Unsupported float type: f{}",
783 alu.get_src(0).bit_size()
784 );
785 }
786 dst
787 }
788 nir_op_fexp2 => b.fexp2(srcs[0]),
789 nir_op_ffma => {
790 let ftype = FloatType::from_bits(alu.def.bit_size().into());
791 let dst;
792 if alu.def.bit_size() == 64 {
793 debug_assert!(!self.float_ctl[ftype].ftz);
794 dst = b.alloc_ssa(RegFile::GPR, 2);
795 b.push_op(OpDFma {
796 dst: dst.into(),
797 srcs: [srcs[0], srcs[1], srcs[2]],
798 rnd_mode: self.float_ctl[ftype].rnd_mode,
799 });
800 } else if alu.def.bit_size() == 32 {
801 dst = b.alloc_ssa(RegFile::GPR, 1);
802 b.push_op(OpFFma {
803 dst: dst.into(),
804 srcs: [srcs[0], srcs[1], srcs[2]],
805 saturate: self.try_saturate_alu_dst(&alu.def),
806 rnd_mode: self.float_ctl[ftype].rnd_mode,
807 // The hardware doesn't like FTZ+DNZ and DNZ implies FTZ
808 // anyway so only set one of the two bits.
809 ftz: self.float_ctl[ftype].ftz,
810 dnz: false,
811 });
812 } else {
813 panic!("Unsupported float type: f{}", alu.def.bit_size());
814 }
815 dst
816 }
817 nir_op_ffmaz => {
818 assert!(alu.def.bit_size() == 32);
819 // DNZ implies FTZ so we need FTZ set or this is invalid
820 assert!(self.float_ctl.fp32.ftz);
821 let dst = b.alloc_ssa(RegFile::GPR, 1);
822 b.push_op(OpFFma {
823 dst: dst.into(),
824 srcs: [srcs[0], srcs[1], srcs[2]],
825 saturate: self.try_saturate_alu_dst(&alu.def),
826 rnd_mode: self.float_ctl.fp32.rnd_mode,
827 // The hardware doesn't like FTZ+DNZ and DNZ implies FTZ
828 // anyway so only set one of the two bits.
829 ftz: false,
830 dnz: true,
831 });
832 dst
833 }
834 nir_op_flog2 => {
835 assert!(alu.def.bit_size() == 32);
836 b.mufu(MuFuOp::Log2, srcs[0])
837 }
838 nir_op_fmax | nir_op_fmin => {
839 let dst;
840 if alu.def.bit_size() == 64 {
841 dst = b.alloc_ssa(RegFile::GPR, 2);
842 b.push_op(OpDMnMx {
843 dst: dst.into(),
844 srcs: [srcs[0], srcs[1]],
845 min: (alu.op == nir_op_fmin).into(),
846 });
847 } else if alu.def.bit_size() == 32 {
848 dst = b.alloc_ssa(RegFile::GPR, 1);
849 b.push_op(OpFMnMx {
850 dst: dst.into(),
851 srcs: [srcs[0], srcs[1]],
852 min: (alu.op == nir_op_fmin).into(),
853 ftz: self.float_ctl.fp32.ftz,
854 });
855 } else {
856 panic!("Unsupported float type: f{}", alu.def.bit_size());
857 }
858 dst
859 }
860 nir_op_fmul => {
861 let ftype = FloatType::from_bits(alu.def.bit_size().into());
862 let dst;
863 if alu.def.bit_size() == 64 {
864 debug_assert!(!self.float_ctl[ftype].ftz);
865 dst = b.alloc_ssa(RegFile::GPR, 2);
866 b.push_op(OpDMul {
867 dst: dst.into(),
868 srcs: [srcs[0], srcs[1]],
869 rnd_mode: self.float_ctl[ftype].rnd_mode,
870 });
871 } else if alu.def.bit_size() == 32 {
872 dst = b.alloc_ssa(RegFile::GPR, 1);
873 b.push_op(OpFMul {
874 dst: dst.into(),
875 srcs: [srcs[0], srcs[1]],
876 saturate: self.try_saturate_alu_dst(&alu.def),
877 rnd_mode: self.float_ctl[ftype].rnd_mode,
878 ftz: self.float_ctl[ftype].ftz,
879 dnz: false,
880 });
881 } else {
882 panic!("Unsupported float type: f{}", alu.def.bit_size());
883 }
884 dst
885 }
886 nir_op_fmulz => {
887 assert!(alu.def.bit_size() == 32);
888 // DNZ implies FTZ so we need FTZ set or this is invalid
889 assert!(self.float_ctl.fp32.ftz);
890 let dst = b.alloc_ssa(RegFile::GPR, 1);
891 b.push_op(OpFMul {
892 dst: dst.into(),
893 srcs: [srcs[0], srcs[1]],
894 saturate: self.try_saturate_alu_dst(&alu.def),
895 rnd_mode: self.float_ctl.fp32.rnd_mode,
896 // The hardware doesn't like FTZ+DNZ and DNZ implies FTZ
897 // anyway so only set one of the two bits.
898 ftz: false,
899 dnz: true,
900 });
901 dst
902 }
903 nir_op_fquantize2f16 => {
904 let tmp = b.alloc_ssa(RegFile::GPR, 1);
905 b.push_op(OpF2F {
906 dst: tmp.into(),
907 src: srcs[0],
908 src_type: FloatType::F32,
909 dst_type: FloatType::F16,
910 rnd_mode: FRndMode::NearestEven,
911 ftz: true,
912 high: false,
913 integer_rnd: false,
914 });
915 assert!(alu.def.bit_size() == 32);
916 let dst = b.alloc_ssa(RegFile::GPR, 1);
917 b.push_op(OpF2F {
918 dst: dst.into(),
919 src: tmp.into(),
920 src_type: FloatType::F16,
921 dst_type: FloatType::F32,
922 rnd_mode: FRndMode::NearestEven,
923 ftz: true,
924 high: false,
925 integer_rnd: false,
926 });
927 dst
928 }
929 nir_op_frcp => {
930 assert!(alu.def.bit_size() == 32);
931 b.mufu(MuFuOp::Rcp, srcs[0])
932 }
933 nir_op_frsq => {
934 assert!(alu.def.bit_size() == 32);
935 b.mufu(MuFuOp::Rsq, srcs[0])
936 }
937 nir_op_fsat => {
938 assert!(alu.def.bit_size() == 32);
939 if self.alu_src_is_saturated(&alu.srcs_as_slice()[0]) {
940 b.copy(srcs[0])
941 } else {
942 let ftype = FloatType::from_bits(alu.def.bit_size().into());
943 let dst = b.alloc_ssa(RegFile::GPR, 1);
944 b.push_op(OpFAdd {
945 dst: dst.into(),
946 srcs: [srcs[0], 0.into()],
947 saturate: true,
948 rnd_mode: self.float_ctl[ftype].rnd_mode,
949 ftz: self.float_ctl[ftype].ftz,
950 });
951 dst
952 }
953 }
954 nir_op_fsign => {
955 if alu.def.bit_size() == 64 {
956 let lz = b.dsetp(FloatCmpOp::OrdLt, srcs[0], 0.into());
957 let gz = b.dsetp(FloatCmpOp::OrdGt, srcs[0], 0.into());
958 let hi = b.sel(lz.into(), 0xbff00000.into(), 0.into());
959 let hi = b.sel(gz.into(), 0x3ff00000.into(), hi.into());
960 let lo = b.copy(0.into());
961 [lo[0], hi[0]].into()
962 } else if alu.def.bit_size() == 32 {
963 let lz = b.fset(FloatCmpOp::OrdLt, srcs[0], 0.into());
964 let gz = b.fset(FloatCmpOp::OrdGt, srcs[0], 0.into());
965 b.fadd(gz.into(), Src::from(lz).fneg())
966 } else {
967 panic!("Unsupported float type: f{}", alu.def.bit_size());
968 }
969 }
970 nir_op_fsin => b.fsin(srcs[0]),
971 nir_op_fsqrt => b.mufu(MuFuOp::Sqrt, srcs[0]),
972 nir_op_i2f16 | nir_op_i2f32 | nir_op_i2f64 => {
973 let src_bits = alu.get_src(0).src.bit_size();
974 let dst_bits = alu.def.bit_size();
975 let dst_type = FloatType::from_bits(dst_bits.into());
976 let dst = b.alloc_ssa(RegFile::GPR, dst_bits.div_ceil(32));
977 b.push_op(OpI2F {
978 dst: dst.into(),
979 src: srcs[0],
980 dst_type: dst_type,
981 src_type: IntType::from_bits(src_bits.into(), true),
982 rnd_mode: self.float_ctl[dst_type].rnd_mode,
983 });
984 dst
985 }
986 nir_op_i2i8 | nir_op_i2i16 | nir_op_i2i32 | nir_op_i2i64
987 | nir_op_u2u8 | nir_op_u2u16 | nir_op_u2u32 | nir_op_u2u64 => {
988 let src_bits = alu.get_src(0).src.bit_size();
989 let dst_bits = alu.def.bit_size();
990
991 let mut prmt = [0_u8; 8];
992 match alu.op {
993 nir_op_i2i8 | nir_op_i2i16 | nir_op_i2i32
994 | nir_op_i2i64 => {
995 let sign = ((src_bits / 8) - 1) | 0x8;
996 for i in 0..8 {
997 if i < (src_bits / 8) {
998 prmt[usize::from(i)] = i;
999 } else {
1000 prmt[usize::from(i)] = sign;
1001 }
1002 }
1003 }
1004 nir_op_u2u8 | nir_op_u2u16 | nir_op_u2u32
1005 | nir_op_u2u64 => {
1006 for i in 0..8 {
1007 if i < (src_bits / 8) {
1008 prmt[usize::from(i)] = i;
1009 } else {
1010 prmt[usize::from(i)] = 4;
1011 }
1012 }
1013 }
1014 _ => panic!("Invalid integer conversion: {}", alu.op),
1015 }
1016 let prmt_lo: [u8; 4] = prmt[0..4].try_into().unwrap();
1017 let prmt_hi: [u8; 4] = prmt[4..8].try_into().unwrap();
1018
1019 let src = srcs[0].as_ssa().unwrap();
1020 if src_bits == 64 {
1021 if dst_bits == 64 {
1022 *src
1023 } else {
1024 b.prmt(src[0].into(), src[1].into(), prmt_lo)
1025 }
1026 } else {
1027 if dst_bits == 64 {
1028 let lo = b.prmt(src[0].into(), 0.into(), prmt_lo);
1029 let hi = b.prmt(src[0].into(), 0.into(), prmt_hi);
1030 [lo[0], hi[0]].into()
1031 } else {
1032 b.prmt(src[0].into(), 0.into(), prmt_lo)
1033 }
1034 }
1035 }
1036 nir_op_iabs => b.iabs(srcs[0]),
1037 nir_op_iadd => match alu.def.bit_size {
1038 32 => b.iadd(srcs[0], srcs[1], 0.into()),
1039 64 => b.iadd64(srcs[0], srcs[1], 0.into()),
1040 x => panic!("unsupported bit size for nir_op_iadd: {x}"),
1041 },
1042 nir_op_iadd3 => match alu.def.bit_size {
1043 32 => b.iadd(srcs[0], srcs[1], srcs[2]),
1044 64 => b.iadd64(srcs[0], srcs[1], srcs[2]),
1045 x => panic!("unsupported bit size for nir_op_iadd3: {x}"),
1046 },
1047 nir_op_iand => b.lop2(LogicOp2::And, srcs[0], srcs[1]),
1048 nir_op_ieq => {
1049 if alu.get_src(0).bit_size() == 1 {
1050 b.lop2(LogicOp2::Xor, srcs[0], srcs[1].bnot())
1051 } else if alu.get_src(0).bit_size() == 64 {
1052 b.isetp64(IntCmpType::I32, IntCmpOp::Eq, srcs[0], srcs[1])
1053 } else {
1054 assert!(alu.get_src(0).bit_size() == 32);
1055 b.isetp(IntCmpType::I32, IntCmpOp::Eq, srcs[0], srcs[1])
1056 }
1057 }
1058 nir_op_ifind_msb | nir_op_ifind_msb_rev | nir_op_ufind_msb
1059 | nir_op_ufind_msb_rev => {
1060 let dst = b.alloc_ssa(RegFile::GPR, 1);
1061 b.push_op(OpFlo {
1062 dst: dst.into(),
1063 src: srcs[0],
1064 signed: match alu.op {
1065 nir_op_ifind_msb | nir_op_ifind_msb_rev => true,
1066 nir_op_ufind_msb | nir_op_ufind_msb_rev => false,
1067 _ => panic!("Not a find_msb op"),
1068 },
1069 return_shift_amount: match alu.op {
1070 nir_op_ifind_msb | nir_op_ufind_msb => false,
1071 nir_op_ifind_msb_rev | nir_op_ufind_msb_rev => true,
1072 _ => panic!("Not a find_msb op"),
1073 },
1074 });
1075 dst
1076 }
1077 nir_op_ige | nir_op_ilt | nir_op_uge | nir_op_ult => {
1078 let x = *srcs[0].as_ssa().unwrap();
1079 let y = *srcs[1].as_ssa().unwrap();
1080 let (cmp_type, cmp_op) = match alu.op {
1081 nir_op_ige => (IntCmpType::I32, IntCmpOp::Ge),
1082 nir_op_ilt => (IntCmpType::I32, IntCmpOp::Lt),
1083 nir_op_uge => (IntCmpType::U32, IntCmpOp::Ge),
1084 nir_op_ult => (IntCmpType::U32, IntCmpOp::Lt),
1085 _ => panic!("Not an integer comparison"),
1086 };
1087 if alu.get_src(0).bit_size() == 64 {
1088 b.isetp64(cmp_type, cmp_op, x.into(), y.into())
1089 } else {
1090 assert!(alu.get_src(0).bit_size() == 32);
1091 b.isetp(cmp_type, cmp_op, x.into(), y.into())
1092 }
1093 }
1094 nir_op_imad => {
1095 assert!(alu.def.bit_size() == 32);
1096 let dst = b.alloc_ssa(RegFile::GPR, 1);
1097 b.push_op(OpIMad {
1098 dst: dst.into(),
1099 srcs: [srcs[0], srcs[1], srcs[2]],
1100 signed: false,
1101 });
1102 dst
1103 }
1104 nir_op_imax | nir_op_imin | nir_op_umax | nir_op_umin => {
1105 let (tp, min) = match alu.op {
1106 nir_op_imax => (IntCmpType::I32, SrcRef::False),
1107 nir_op_imin => (IntCmpType::I32, SrcRef::True),
1108 nir_op_umax => (IntCmpType::U32, SrcRef::False),
1109 nir_op_umin => (IntCmpType::U32, SrcRef::True),
1110 _ => panic!("Not an integer min/max"),
1111 };
1112 assert!(alu.def.bit_size() == 32);
1113 b.imnmx(tp, srcs[0], srcs[1], min.into())
1114 }
1115 nir_op_imul => {
1116 assert!(alu.def.bit_size() == 32);
1117 b.imul(srcs[0], srcs[1])
1118 }
1119 nir_op_imul_2x32_64 | nir_op_umul_2x32_64 => {
1120 let signed = alu.op == nir_op_imul_2x32_64;
1121 b.imul_2x32_64(srcs[0], srcs[1], signed)
1122 }
1123 nir_op_imul_high | nir_op_umul_high => {
1124 let signed = alu.op == nir_op_imul_high;
1125 let dst64 = b.imul_2x32_64(srcs[0], srcs[1], signed);
1126 dst64[1].into()
1127 }
1128 nir_op_ine => {
1129 if alu.get_src(0).bit_size() == 1 {
1130 b.lop2(LogicOp2::Xor, srcs[0], srcs[1])
1131 } else if alu.get_src(0).bit_size() == 64 {
1132 b.isetp64(IntCmpType::I32, IntCmpOp::Ne, srcs[0], srcs[1])
1133 } else {
1134 assert!(alu.get_src(0).bit_size() == 32);
1135 b.isetp(IntCmpType::I32, IntCmpOp::Ne, srcs[0], srcs[1])
1136 }
1137 }
1138 nir_op_ineg => {
1139 if alu.def.bit_size == 64 {
1140 let x = srcs[0].as_ssa().unwrap();
1141 let sum = b.alloc_ssa(RegFile::GPR, 2);
1142 let carry = b.alloc_ssa(RegFile::Pred, 1);
1143 b.push_op(OpIAdd3 {
1144 dst: sum[0].into(),
1145 overflow: [carry.into(), Dst::None],
1146 srcs: [0.into(), Src::from(x[0]).ineg(), 0.into()],
1147 });
1148 b.push_op(OpIAdd3X {
1149 dst: sum[1].into(),
1150 overflow: [Dst::None, Dst::None],
1151 srcs: [0.into(), Src::from(x[1]).bnot(), 0.into()],
1152 carry: [carry.into(), SrcRef::False.into()],
1153 });
1154 sum
1155 } else {
1156 assert!(alu.def.bit_size() == 32);
1157 b.ineg(srcs[0])
1158 }
1159 }
1160 nir_op_inot => {
1161 if alu.def.bit_size() == 1 {
1162 b.lop2(LogicOp2::PassB, true.into(), srcs[0].bnot())
1163 } else {
1164 assert!(alu.def.bit_size() == 32);
1165 b.lop2(LogicOp2::PassB, 0.into(), srcs[0].bnot())
1166 }
1167 }
1168 nir_op_ior => b.lop2(LogicOp2::Or, srcs[0], srcs[1]),
1169 nir_op_ishl => {
1170 let x = *srcs[0].as_ssa().unwrap();
1171 let shift = srcs[1];
1172 if alu.def.bit_size() == 64 {
1173 // For 64-bit shifts, we have to use clamp mode so we need
1174 // to mask the shift in order satisfy NIR semantics.
1175 let shift = b.lop2(LogicOp2::And, shift, 0x3f.into());
1176 let dst = b.alloc_ssa(RegFile::GPR, 2);
1177 b.push_op(OpShf {
1178 dst: dst[0].into(),
1179 low: 0.into(),
1180 high: x[0].into(),
1181 shift: shift.into(),
1182 right: false,
1183 wrap: false,
1184 data_type: IntType::U32,
1185 dst_high: true,
1186 });
1187 b.push_op(OpShf {
1188 dst: dst[1].into(),
1189 low: x[0].into(),
1190 high: x[1].into(),
1191 shift: shift.into(),
1192 right: false,
1193 wrap: false,
1194 data_type: IntType::U64,
1195 dst_high: true,
1196 });
1197 dst
1198 } else {
1199 assert!(alu.def.bit_size() == 32);
1200 b.shl(srcs[0], srcs[1])
1201 }
1202 }
1203 nir_op_ishr => {
1204 let x = *srcs[0].as_ssa().unwrap();
1205 let shift = srcs[1];
1206 if alu.def.bit_size() == 64 {
1207 // For 64-bit shifts, we have to use clamp mode so we need
1208 // to mask the shift in order satisfy NIR semantics.
1209 let shift = b.lop2(LogicOp2::And, shift, 0x3f.into());
1210 let dst = b.alloc_ssa(RegFile::GPR, 2);
1211 b.push_op(OpShf {
1212 dst: dst[0].into(),
1213 low: x[0].into(),
1214 high: x[1].into(),
1215 shift: shift.into(),
1216 right: true,
1217 wrap: false,
1218 data_type: IntType::I64,
1219 dst_high: false,
1220 });
1221 b.push_op(OpShf {
1222 dst: dst[1].into(),
1223 low: x[0].into(),
1224 high: x[1].into(),
1225 shift: shift.into(),
1226 right: true,
1227 wrap: false,
1228 data_type: IntType::I32,
1229 dst_high: true,
1230 });
1231 dst
1232 } else {
1233 assert!(alu.def.bit_size() == 32);
1234 b.shr(srcs[0], srcs[1], true)
1235 }
1236 }
1237 nir_op_ixor => b.lop2(LogicOp2::Xor, srcs[0], srcs[1]),
1238 nir_op_pack_half_2x16_split | nir_op_pack_half_2x16_rtz_split => {
1239 assert!(alu.get_src(0).bit_size() == 32);
1240 let low = b.alloc_ssa(RegFile::GPR, 1);
1241 let high = b.alloc_ssa(RegFile::GPR, 1);
1242
1243 let rnd_mode = match alu.op {
1244 nir_op_pack_half_2x16_split => FRndMode::NearestEven,
1245 nir_op_pack_half_2x16_rtz_split => FRndMode::Zero,
1246 _ => panic!("Unhandled fp16 pack op"),
1247 };
1248
1249 b.push_op(OpF2F {
1250 dst: low.into(),
1251 src: srcs[0],
1252 src_type: FloatType::F32,
1253 dst_type: FloatType::F16,
1254 rnd_mode: rnd_mode,
1255 ftz: false,
1256 high: false,
1257 integer_rnd: false,
1258 });
1259
1260 let src_bits = usize::from(alu.get_src(1).bit_size());
1261 let src_type = FloatType::from_bits(src_bits);
1262 assert!(matches!(src_type, FloatType::F32));
1263 b.push_op(OpF2F {
1264 dst: high.into(),
1265 src: srcs[1],
1266 src_type: FloatType::F32,
1267 dst_type: FloatType::F16,
1268 rnd_mode: rnd_mode,
1269 ftz: false,
1270 high: false,
1271 integer_rnd: false,
1272 });
1273
1274 b.prmt(low.into(), high.into(), [0, 1, 4, 5])
1275 }
1276 nir_op_sdot_4x8_iadd => {
1277 let dst = b.alloc_ssa(RegFile::GPR, 1);
1278 b.push_op(OpIDp4 {
1279 dst: dst.into(),
1280 src_types: [IntType::I8, IntType::I8],
1281 srcs: [srcs[0], srcs[1], srcs[2]],
1282 });
1283 dst
1284 }
1285 nir_op_sudot_4x8_iadd => {
1286 let dst = b.alloc_ssa(RegFile::GPR, 1);
1287 b.push_op(OpIDp4 {
1288 dst: dst.into(),
1289 src_types: [IntType::I8, IntType::U8],
1290 srcs: [srcs[0], srcs[1], srcs[2]],
1291 });
1292 dst
1293 }
1294 nir_op_udot_4x8_uadd => {
1295 let dst = b.alloc_ssa(RegFile::GPR, 1);
1296 b.push_op(OpIDp4 {
1297 dst: dst.into(),
1298 src_types: [IntType::U8, IntType::U8],
1299 srcs: [srcs[0], srcs[1], srcs[2]],
1300 });
1301 dst
1302 }
1303 nir_op_u2f16 | nir_op_u2f32 | nir_op_u2f64 => {
1304 let src_bits = alu.get_src(0).src.bit_size();
1305 let dst_bits = alu.def.bit_size();
1306 let dst_type = FloatType::from_bits(dst_bits.into());
1307 let dst = b.alloc_ssa(RegFile::GPR, dst_bits.div_ceil(32));
1308 b.push_op(OpI2F {
1309 dst: dst.into(),
1310 src: srcs[0],
1311 dst_type: dst_type,
1312 src_type: IntType::from_bits(src_bits.into(), false),
1313 rnd_mode: self.float_ctl[dst_type].rnd_mode,
1314 });
1315 dst
1316 }
1317 nir_op_uadd_sat => {
1318 let x = srcs[0].as_ssa().unwrap();
1319 let y = srcs[1].as_ssa().unwrap();
1320 let sum_lo = b.alloc_ssa(RegFile::GPR, 1);
1321 let ovf_lo = b.alloc_ssa(RegFile::Pred, 1);
1322 b.push_op(OpIAdd3 {
1323 dst: sum_lo.into(),
1324 overflow: [ovf_lo.into(), Dst::None],
1325 srcs: [0.into(), x[0].into(), y[0].into()],
1326 });
1327 if alu.def.bit_size() == 64 {
1328 let sum_hi = b.alloc_ssa(RegFile::GPR, 1);
1329 let ovf_hi = b.alloc_ssa(RegFile::Pred, 1);
1330 b.push_op(OpIAdd3X {
1331 dst: sum_hi.into(),
1332 overflow: [ovf_hi.into(), Dst::None],
1333 srcs: [0.into(), x[1].into(), y[1].into()],
1334 carry: [ovf_lo.into(), false.into()],
1335 });
1336 let lo =
1337 b.sel(ovf_hi.into(), u32::MAX.into(), sum_lo.into());
1338 let hi =
1339 b.sel(ovf_hi.into(), u32::MAX.into(), sum_hi.into());
1340 [lo[0], hi[0]].into()
1341 } else {
1342 assert!(alu.def.bit_size() == 32);
1343 b.sel(ovf_lo.into(), u32::MAX.into(), sum_lo.into())
1344 }
1345 }
1346 nir_op_usub_sat => {
1347 let x = srcs[0].as_ssa().unwrap();
1348 let y = srcs[1].as_ssa().unwrap();
1349 let sum_lo = b.alloc_ssa(RegFile::GPR, 1);
1350 let ovf_lo = b.alloc_ssa(RegFile::Pred, 1);
1351 // The result of OpIAdd3X is the 33-bit value
1352 //
1353 // s|o = x + !y + 1
1354 //
1355 // The overflow bit of this result is true if and only if the
1356 // subtract did NOT overflow.
1357 b.push_op(OpIAdd3 {
1358 dst: sum_lo.into(),
1359 overflow: [ovf_lo.into(), Dst::None],
1360 srcs: [0.into(), x[0].into(), Src::from(y[0]).ineg()],
1361 });
1362 if alu.def.bit_size() == 64 {
1363 let sum_hi = b.alloc_ssa(RegFile::GPR, 1);
1364 let ovf_hi = b.alloc_ssa(RegFile::Pred, 1);
1365 b.push_op(OpIAdd3X {
1366 dst: sum_hi.into(),
1367 overflow: [ovf_hi.into(), Dst::None],
1368 srcs: [0.into(), x[1].into(), Src::from(y[1]).bnot()],
1369 carry: [ovf_lo.into(), false.into()],
1370 });
1371 let lo = b.sel(ovf_hi.into(), sum_lo.into(), 0.into());
1372 let hi = b.sel(ovf_hi.into(), sum_hi.into(), 0.into());
1373 [lo[0], hi[0]].into()
1374 } else {
1375 assert!(alu.def.bit_size() == 32);
1376 b.sel(ovf_lo.into(), sum_lo.into(), 0.into())
1377 }
1378 }
1379 nir_op_unpack_32_2x16_split_x => {
1380 b.prmt(srcs[0], 0.into(), [0, 1, 4, 4])
1381 }
1382 nir_op_unpack_32_2x16_split_y => {
1383 b.prmt(srcs[0], 0.into(), [2, 3, 4, 4])
1384 }
1385 nir_op_unpack_64_2x32_split_x => {
1386 let src0_x = srcs[0].as_ssa().unwrap()[0];
1387 b.copy(src0_x.into())
1388 }
1389 nir_op_unpack_64_2x32_split_y => {
1390 let src0_y = srcs[0].as_ssa().unwrap()[1];
1391 b.copy(src0_y.into())
1392 }
1393 nir_op_unpack_half_2x16_split_x
1394 | nir_op_unpack_half_2x16_split_y => {
1395 assert!(alu.def.bit_size() == 32);
1396 let dst = b.alloc_ssa(RegFile::GPR, 1);
1397
1398 b.push_op(OpF2F {
1399 dst: dst[0].into(),
1400 src: srcs[0],
1401 src_type: FloatType::F16,
1402 dst_type: FloatType::F32,
1403 rnd_mode: FRndMode::NearestEven,
1404 ftz: false,
1405 high: alu.op == nir_op_unpack_half_2x16_split_y,
1406 integer_rnd: false,
1407 });
1408
1409 dst
1410 }
1411 nir_op_ushr => {
1412 let x = *srcs[0].as_ssa().unwrap();
1413 let shift = srcs[1];
1414 if alu.def.bit_size() == 64 {
1415 // For 64-bit shifts, we have to use clamp mode so we need
1416 // to mask the shift in order satisfy NIR semantics.
1417 let shift = b.lop2(LogicOp2::And, shift, 0x3f.into());
1418 let dst = b.alloc_ssa(RegFile::GPR, 2);
1419 b.push_op(OpShf {
1420 dst: dst[0].into(),
1421 low: x[0].into(),
1422 high: x[1].into(),
1423 shift: shift.into(),
1424 right: true,
1425 wrap: false,
1426 data_type: IntType::U64,
1427 dst_high: false,
1428 });
1429 b.push_op(OpShf {
1430 dst: dst[1].into(),
1431 low: x[0].into(),
1432 high: x[1].into(),
1433 shift: shift.into(),
1434 right: true,
1435 wrap: false,
1436 data_type: IntType::U32,
1437 dst_high: true,
1438 });
1439 dst
1440 } else {
1441 assert!(alu.def.bit_size() == 32);
1442 b.shr(srcs[0], srcs[1], false)
1443 }
1444 }
1445 nir_op_fddx | nir_op_fddx_coarse | nir_op_fddx_fine => {
1446 // TODO: Real coarse derivatives
1447
1448 assert!(alu.def.bit_size() == 32);
1449 let ftype = FloatType::F32;
1450 let scratch = b.alloc_ssa(RegFile::GPR, 1);
1451
1452 b.push_op(OpShfl {
1453 dst: scratch[0].into(),
1454 in_bounds: Dst::None,
1455 src: srcs[0],
1456 lane: 1_u32.into(),
1457 c: (0x3_u32 | 0x1c_u32 << 8).into(),
1458 op: ShflOp::Bfly,
1459 });
1460
1461 let dst = b.alloc_ssa(RegFile::GPR, 1);
1462
1463 b.push_op(OpFSwzAdd {
1464 dst: dst[0].into(),
1465 srcs: [scratch[0].into(), srcs[0]],
1466 ops: [
1467 FSwzAddOp::SubLeft,
1468 FSwzAddOp::SubRight,
1469 FSwzAddOp::SubLeft,
1470 FSwzAddOp::SubRight,
1471 ],
1472 rnd_mode: self.float_ctl[ftype].rnd_mode,
1473 ftz: self.float_ctl[ftype].ftz,
1474 });
1475
1476 dst
1477 }
1478 nir_op_fddy | nir_op_fddy_coarse | nir_op_fddy_fine => {
1479 // TODO: Real coarse derivatives
1480
1481 assert!(alu.def.bit_size() == 32);
1482 let ftype = FloatType::F32;
1483 let scratch = b.alloc_ssa(RegFile::GPR, 1);
1484
1485 b.push_op(OpShfl {
1486 dst: scratch[0].into(),
1487 in_bounds: Dst::None,
1488 src: srcs[0],
1489 lane: 2_u32.into(),
1490 c: (0x3_u32 | 0x1c_u32 << 8).into(),
1491 op: ShflOp::Bfly,
1492 });
1493
1494 let dst = b.alloc_ssa(RegFile::GPR, 1);
1495
1496 b.push_op(OpFSwzAdd {
1497 dst: dst[0].into(),
1498 srcs: [scratch[0].into(), srcs[0]],
1499 ops: [
1500 FSwzAddOp::SubLeft,
1501 FSwzAddOp::SubLeft,
1502 FSwzAddOp::SubRight,
1503 FSwzAddOp::SubRight,
1504 ],
1505 rnd_mode: self.float_ctl[ftype].rnd_mode,
1506 ftz: self.float_ctl[ftype].ftz,
1507 });
1508
1509 dst
1510 }
1511 _ => panic!("Unsupported ALU instruction: {}", alu.info().name()),
1512 };
1513 self.set_dst(&alu.def, dst);
1514 }
1515
parse_jump(&mut self, _b: &mut impl SSABuilder, _jump: &nir_jump_instr)1516 fn parse_jump(&mut self, _b: &mut impl SSABuilder, _jump: &nir_jump_instr) {
1517 // Nothing to do
1518 }
1519
parse_tex(&mut self, b: &mut impl SSABuilder, tex: &nir_tex_instr)1520 fn parse_tex(&mut self, b: &mut impl SSABuilder, tex: &nir_tex_instr) {
1521 let dim = match tex.sampler_dim {
1522 GLSL_SAMPLER_DIM_1D => {
1523 if tex.is_array {
1524 TexDim::Array1D
1525 } else {
1526 TexDim::_1D
1527 }
1528 }
1529 GLSL_SAMPLER_DIM_2D => {
1530 if tex.is_array {
1531 TexDim::Array2D
1532 } else {
1533 TexDim::_2D
1534 }
1535 }
1536 GLSL_SAMPLER_DIM_3D => {
1537 assert!(!tex.is_array);
1538 TexDim::_3D
1539 }
1540 GLSL_SAMPLER_DIM_CUBE => {
1541 if tex.is_array {
1542 TexDim::ArrayCube
1543 } else {
1544 TexDim::Cube
1545 }
1546 }
1547 GLSL_SAMPLER_DIM_BUF => TexDim::_1D,
1548 GLSL_SAMPLER_DIM_MS => {
1549 if tex.is_array {
1550 TexDim::Array2D
1551 } else {
1552 TexDim::_2D
1553 }
1554 }
1555 _ => panic!("Unsupported texture dimension: {}", tex.sampler_dim),
1556 };
1557
1558 let srcs = tex.srcs_as_slice();
1559 assert!(srcs[0].src_type == nir_tex_src_backend1);
1560 if srcs.len() > 1 {
1561 assert!(srcs.len() == 2);
1562 assert!(srcs[1].src_type == nir_tex_src_backend2);
1563 }
1564
1565 let flags: nak_nir_tex_flags =
1566 unsafe { std::mem::transmute_copy(&tex.backend_flags) };
1567
1568 let mask = tex.def.components_read();
1569 let mask = u8::try_from(mask).unwrap();
1570
1571 let dst_comps = u8::try_from(mask.count_ones()).unwrap();
1572 let dst = b.alloc_ssa(RegFile::GPR, dst_comps);
1573
1574 // On Volta and later, the destination is split in two
1575 let mut dsts = [Dst::None; 2];
1576 if dst_comps > 2 && b.sm() >= 70 {
1577 dsts[0] = SSARef::try_from(&dst[0..2]).unwrap().into();
1578 dsts[1] = SSARef::try_from(&dst[2..]).unwrap().into();
1579 } else {
1580 dsts[0] = dst.into();
1581 }
1582
1583 if tex.op == nir_texop_hdr_dim_nv {
1584 let src = self.get_src(&srcs[0].src);
1585 b.push_op(OpTxq {
1586 dsts: dsts,
1587 src: src,
1588 query: TexQuery::Dimension,
1589 mask: mask,
1590 });
1591 } else if tex.op == nir_texop_tex_type_nv {
1592 let src = self.get_src(&srcs[0].src);
1593 b.push_op(OpTxq {
1594 dsts: dsts,
1595 src: src,
1596 query: TexQuery::TextureType,
1597 mask: mask,
1598 });
1599 } else {
1600 let lod_mode = match flags.lod_mode() {
1601 NAK_NIR_LOD_MODE_AUTO => TexLodMode::Auto,
1602 NAK_NIR_LOD_MODE_ZERO => TexLodMode::Zero,
1603 NAK_NIR_LOD_MODE_BIAS => TexLodMode::Bias,
1604 NAK_NIR_LOD_MODE_LOD => TexLodMode::Lod,
1605 NAK_NIR_LOD_MODE_CLAMP => TexLodMode::Clamp,
1606 NAK_NIR_LOD_MODE_BIAS_CLAMP => TexLodMode::BiasClamp,
1607 _ => panic!("Invalid LOD mode"),
1608 };
1609
1610 let offset_mode = match flags.offset_mode() {
1611 NAK_NIR_OFFSET_MODE_NONE => Tld4OffsetMode::None,
1612 NAK_NIR_OFFSET_MODE_AOFFI => Tld4OffsetMode::AddOffI,
1613 NAK_NIR_OFFSET_MODE_PER_PX => Tld4OffsetMode::PerPx,
1614 _ => panic!("Invalid offset mode"),
1615 };
1616
1617 let srcs = [self.get_src(&srcs[0].src), self.get_src(&srcs[1].src)];
1618
1619 if tex.op == nir_texop_txd {
1620 assert!(lod_mode == TexLodMode::Auto);
1621 assert!(offset_mode != Tld4OffsetMode::PerPx);
1622 assert!(!flags.has_z_cmpr());
1623 b.push_op(OpTxd {
1624 dsts: dsts,
1625 resident: Dst::None,
1626 srcs: srcs,
1627 dim: dim,
1628 offset: offset_mode == Tld4OffsetMode::AddOffI,
1629 mask: mask,
1630 });
1631 } else if tex.op == nir_texop_lod {
1632 assert!(offset_mode == Tld4OffsetMode::None);
1633 b.push_op(OpTmml {
1634 dsts: dsts,
1635 srcs: srcs,
1636 dim: dim,
1637 mask: mask,
1638 });
1639 } else if tex.op == nir_texop_txf || tex.op == nir_texop_txf_ms {
1640 assert!(offset_mode != Tld4OffsetMode::PerPx);
1641 b.push_op(OpTld {
1642 dsts: dsts,
1643 resident: Dst::None,
1644 srcs: srcs,
1645 dim: dim,
1646 lod_mode: lod_mode,
1647 is_ms: tex.op == nir_texop_txf_ms,
1648 offset: offset_mode == Tld4OffsetMode::AddOffI,
1649 mask: mask,
1650 });
1651 } else if tex.op == nir_texop_tg4 {
1652 b.push_op(OpTld4 {
1653 dsts: dsts,
1654 resident: Dst::None,
1655 srcs: srcs,
1656 dim: dim,
1657 comp: tex.component().try_into().unwrap(),
1658 offset_mode: offset_mode,
1659 z_cmpr: flags.has_z_cmpr(),
1660 mask: mask,
1661 });
1662 } else {
1663 assert!(offset_mode != Tld4OffsetMode::PerPx);
1664 b.push_op(OpTex {
1665 dsts: dsts,
1666 resident: Dst::None,
1667 srcs: srcs,
1668 dim: dim,
1669 lod_mode: lod_mode,
1670 z_cmpr: flags.has_z_cmpr(),
1671 offset: offset_mode == Tld4OffsetMode::AddOffI,
1672 mask: mask,
1673 });
1674 }
1675 }
1676
1677 let mut di = 0_usize;
1678 let mut nir_dst = Vec::new();
1679 for i in 0..tex.def.num_components() {
1680 if mask & (1 << i) == 0 {
1681 nir_dst.push(b.copy(0.into())[0]);
1682 } else {
1683 nir_dst.push(dst[di]);
1684 di += 1;
1685 }
1686 }
1687 self.set_ssa(tex.def.as_def(), nir_dst);
1688 }
1689
get_atomic_type(&self, intrin: &nir_intrinsic_instr) -> AtomType1690 fn get_atomic_type(&self, intrin: &nir_intrinsic_instr) -> AtomType {
1691 let bit_size = intrin.def.bit_size();
1692 match intrin.atomic_op() {
1693 nir_atomic_op_iadd => AtomType::U(bit_size),
1694 nir_atomic_op_imin => AtomType::I(bit_size),
1695 nir_atomic_op_umin => AtomType::U(bit_size),
1696 nir_atomic_op_imax => AtomType::I(bit_size),
1697 nir_atomic_op_umax => AtomType::U(bit_size),
1698 nir_atomic_op_iand => AtomType::U(bit_size),
1699 nir_atomic_op_ior => AtomType::U(bit_size),
1700 nir_atomic_op_ixor => AtomType::U(bit_size),
1701 nir_atomic_op_xchg => AtomType::U(bit_size),
1702 nir_atomic_op_fadd => AtomType::F(bit_size),
1703 nir_atomic_op_fmin => AtomType::F(bit_size),
1704 nir_atomic_op_fmax => AtomType::F(bit_size),
1705 nir_atomic_op_cmpxchg => AtomType::U(bit_size),
1706 _ => panic!("Unsupported NIR atomic op"),
1707 }
1708 }
1709
get_atomic_op(&self, intrin: &nir_intrinsic_instr) -> AtomOp1710 fn get_atomic_op(&self, intrin: &nir_intrinsic_instr) -> AtomOp {
1711 match intrin.atomic_op() {
1712 nir_atomic_op_iadd => AtomOp::Add,
1713 nir_atomic_op_imin => AtomOp::Min,
1714 nir_atomic_op_umin => AtomOp::Min,
1715 nir_atomic_op_imax => AtomOp::Max,
1716 nir_atomic_op_umax => AtomOp::Max,
1717 nir_atomic_op_iand => AtomOp::And,
1718 nir_atomic_op_ior => AtomOp::Or,
1719 nir_atomic_op_ixor => AtomOp::Xor,
1720 nir_atomic_op_xchg => AtomOp::Exch,
1721 nir_atomic_op_fadd => AtomOp::Add,
1722 nir_atomic_op_fmin => AtomOp::Min,
1723 nir_atomic_op_fmax => AtomOp::Max,
1724 nir_atomic_op_cmpxchg => AtomOp::CmpExch,
1725 _ => panic!("Unsupported NIR atomic op"),
1726 }
1727 }
1728
get_eviction_priority( &mut self, access: gl_access_qualifier, ) -> MemEvictionPriority1729 fn get_eviction_priority(
1730 &mut self,
1731 access: gl_access_qualifier,
1732 ) -> MemEvictionPriority {
1733 if self.info.sm >= 70 && access & ACCESS_NON_TEMPORAL != 0 {
1734 MemEvictionPriority::First
1735 } else {
1736 MemEvictionPriority::Normal
1737 }
1738 }
1739
get_image_dim(&mut self, intrin: &nir_intrinsic_instr) -> ImageDim1740 fn get_image_dim(&mut self, intrin: &nir_intrinsic_instr) -> ImageDim {
1741 let is_array = intrin.image_array();
1742 let image_dim = intrin.image_dim();
1743 match intrin.image_dim() {
1744 GLSL_SAMPLER_DIM_1D => {
1745 if is_array {
1746 ImageDim::_1DArray
1747 } else {
1748 ImageDim::_1D
1749 }
1750 }
1751 GLSL_SAMPLER_DIM_2D => {
1752 if is_array {
1753 ImageDim::_2DArray
1754 } else {
1755 ImageDim::_2D
1756 }
1757 }
1758 GLSL_SAMPLER_DIM_3D => {
1759 assert!(!is_array);
1760 ImageDim::_3D
1761 }
1762 GLSL_SAMPLER_DIM_CUBE => ImageDim::_2DArray,
1763 GLSL_SAMPLER_DIM_BUF => {
1764 assert!(!is_array);
1765 ImageDim::_1DBuffer
1766 }
1767 _ => panic!("Unsupported image dimension: {}", image_dim),
1768 }
1769 }
1770
get_image_coord( &mut self, intrin: &nir_intrinsic_instr, dim: ImageDim, ) -> Src1771 fn get_image_coord(
1772 &mut self,
1773 intrin: &nir_intrinsic_instr,
1774 dim: ImageDim,
1775 ) -> Src {
1776 let vec = self.get_ssa(intrin.get_src(1).as_def());
1777 // let sample = self.get_src(&srcs[2]);
1778 let comps = usize::from(dim.coord_comps());
1779 SSARef::try_from(&vec[0..comps]).unwrap().into()
1780 }
1781
parse_intrinsic( &mut self, b: &mut impl SSABuilder, intrin: &nir_intrinsic_instr, )1782 fn parse_intrinsic(
1783 &mut self,
1784 b: &mut impl SSABuilder,
1785 intrin: &nir_intrinsic_instr,
1786 ) {
1787 let srcs = intrin.srcs_as_slice();
1788 match intrin.intrinsic {
1789 nir_intrinsic_al2p_nv => {
1790 let offset = self.get_src(&srcs[0]);
1791 let addr = u16::try_from(intrin.base()).unwrap();
1792
1793 let flags = intrin.flags();
1794 let flags: nak_nir_attr_io_flags =
1795 unsafe { std::mem::transmute_copy(&flags) };
1796
1797 let access = AttrAccess {
1798 addr: addr,
1799 comps: 1,
1800 patch: flags.patch(),
1801 output: flags.output(),
1802 phys: false,
1803 };
1804
1805 let dst = b.alloc_ssa(RegFile::GPR, 1);
1806 b.push_op(OpAL2P {
1807 dst: dst.into(),
1808 offset: offset,
1809 access: access,
1810 });
1811 self.set_dst(&intrin.def, dst);
1812 }
1813 nir_intrinsic_ald_nv | nir_intrinsic_ast_nv => {
1814 let addr = u16::try_from(intrin.base()).unwrap();
1815 let base = u16::try_from(intrin.range_base()).unwrap();
1816 let range = u16::try_from(intrin.range()).unwrap();
1817 let range = base..(base + range);
1818
1819 let flags = intrin.flags();
1820 let flags: nak_nir_attr_io_flags =
1821 unsafe { std::mem::transmute_copy(&flags) };
1822 assert!(!flags.patch() || !flags.phys());
1823
1824 if let ShaderIoInfo::Vtg(io) = &mut self.info.io {
1825 if flags.patch() {
1826 match &mut self.info.stage {
1827 ShaderStageInfo::TessellationInit(stage) => {
1828 assert!(flags.output());
1829 stage.per_patch_attribute_count = max(
1830 stage.per_patch_attribute_count,
1831 (range.end / 4).try_into().unwrap(),
1832 );
1833 }
1834 ShaderStageInfo::Tessellation => (),
1835 _ => panic!("Patch I/O not supported"),
1836 }
1837 } else {
1838 if flags.output() {
1839 if intrin.intrinsic == nir_intrinsic_ast_nv {
1840 io.mark_store_req(range.clone());
1841 }
1842 io.mark_attrs_written(range);
1843 } else {
1844 io.mark_attrs_read(range);
1845 }
1846 }
1847 } else {
1848 panic!("Must be a VTG stage");
1849 }
1850
1851 let access = AttrAccess {
1852 addr: addr,
1853 comps: intrin.num_components,
1854 patch: flags.patch(),
1855 output: flags.output(),
1856 phys: flags.phys(),
1857 };
1858
1859 if intrin.intrinsic == nir_intrinsic_ald_nv {
1860 let vtx = self.get_src(&srcs[0]);
1861 let offset = self.get_src(&srcs[1]);
1862
1863 assert!(intrin.def.bit_size() == 32);
1864 let dst = b.alloc_ssa(RegFile::GPR, access.comps);
1865 b.push_op(OpALd {
1866 dst: dst.into(),
1867 vtx: vtx,
1868 offset: offset,
1869 access: access,
1870 });
1871 self.set_dst(&intrin.def, dst);
1872 } else if intrin.intrinsic == nir_intrinsic_ast_nv {
1873 assert!(srcs[0].bit_size() == 32);
1874 let data = self.get_src(&srcs[0]);
1875 let vtx = self.get_src(&srcs[1]);
1876 let offset = self.get_src(&srcs[2]);
1877
1878 b.push_op(OpASt {
1879 data: data,
1880 vtx: vtx,
1881 offset: offset,
1882 access: access,
1883 });
1884 } else {
1885 panic!("Invalid VTG I/O intrinsic");
1886 }
1887 }
1888 nir_intrinsic_ballot => {
1889 assert!(srcs[0].bit_size() == 1);
1890 let src = self.get_src(&srcs[0]);
1891
1892 assert!(intrin.def.bit_size() == 32);
1893 let dst = b.alloc_ssa(RegFile::GPR, 1);
1894
1895 b.push_op(OpVote {
1896 op: VoteOp::Any,
1897 ballot: dst.into(),
1898 vote: Dst::None,
1899 pred: src,
1900 });
1901 self.set_dst(&intrin.def, dst);
1902 }
1903 nir_intrinsic_bar_break_nv => {
1904 let src = self.get_src(&srcs[0]);
1905 let bar_in = b.bmov_to_bar(src);
1906
1907 let bar_out = b.alloc_ssa(RegFile::Bar, 1);
1908 b.push_op(OpBreak {
1909 bar_out: bar_out.into(),
1910 bar_in: bar_in.into(),
1911 cond: SrcRef::True.into(),
1912 });
1913
1914 self.set_dst(&intrin.def, b.bmov_to_gpr(bar_out.into()));
1915 }
1916 nir_intrinsic_bar_set_nv => {
1917 let label = self.label_alloc.alloc();
1918 let old = self.bar_label.insert(intrin.def.index, label);
1919 assert!(old.is_none());
1920
1921 let bar_clear = b.alloc_ssa(RegFile::Bar, 1);
1922 b.push_op(OpBClear {
1923 dst: bar_clear.into(),
1924 });
1925
1926 let bar_out = b.alloc_ssa(RegFile::Bar, 1);
1927 b.push_op(OpBSSy {
1928 bar_out: bar_out.into(),
1929 bar_in: bar_clear.into(),
1930 cond: SrcRef::True.into(),
1931 target: label,
1932 });
1933
1934 self.set_dst(&intrin.def, b.bmov_to_gpr(bar_out.into()));
1935 }
1936 nir_intrinsic_bar_sync_nv => {
1937 let src = self.get_src(&srcs[0]);
1938
1939 let bar = b.bmov_to_bar(src);
1940 b.push_op(OpBSync {
1941 bar: bar.into(),
1942 cond: SrcRef::True.into(),
1943 });
1944
1945 let bar_set_idx = &srcs[1].as_def().index;
1946 if let Some(label) = self.bar_label.get(bar_set_idx) {
1947 b.push_op(OpNop {
1948 label: Some(*label),
1949 });
1950 }
1951 }
1952 nir_intrinsic_bindless_image_atomic
1953 | nir_intrinsic_bindless_image_atomic_swap => {
1954 let handle = self.get_src(&srcs[0]);
1955 let dim = self.get_image_dim(intrin);
1956 let coord = self.get_image_coord(intrin, dim);
1957 // let sample = self.get_src(&srcs[2]);
1958 let atom_type = self.get_atomic_type(intrin);
1959 let atom_op = self.get_atomic_op(intrin);
1960
1961 assert!(
1962 intrin.def.bit_size() == 32 || intrin.def.bit_size() == 64
1963 );
1964 assert!(intrin.def.num_components() == 1);
1965 let dst = b.alloc_ssa(RegFile::GPR, intrin.def.bit_size() / 32);
1966
1967 let data = if intrin.intrinsic
1968 == nir_intrinsic_bindless_image_atomic_swap
1969 {
1970 if intrin.def.bit_size() == 64 {
1971 SSARef::from([
1972 self.get_ssa(srcs[3].as_def())[0],
1973 self.get_ssa(srcs[3].as_def())[1],
1974 self.get_ssa(srcs[4].as_def())[0],
1975 self.get_ssa(srcs[4].as_def())[1],
1976 ])
1977 .into()
1978 } else {
1979 SSARef::from([
1980 self.get_ssa(srcs[3].as_def())[0],
1981 self.get_ssa(srcs[4].as_def())[0],
1982 ])
1983 .into()
1984 }
1985 } else {
1986 self.get_src(&srcs[3])
1987 };
1988
1989 b.push_op(OpSuAtom {
1990 dst: dst.into(),
1991 resident: Dst::None,
1992 handle: handle,
1993 coord: coord,
1994 data: data,
1995 atom_op: atom_op,
1996 atom_type: atom_type,
1997 image_dim: dim,
1998 mem_order: MemOrder::Strong(MemScope::System),
1999 mem_eviction_priority: self
2000 .get_eviction_priority(intrin.access()),
2001 });
2002 self.set_dst(&intrin.def, dst);
2003 }
2004 nir_intrinsic_bindless_image_load => {
2005 let handle = self.get_src(&srcs[0]);
2006 let dim = self.get_image_dim(intrin);
2007 let coord = self.get_image_coord(intrin, dim);
2008 // let sample = self.get_src(&srcs[2]);
2009
2010 let comps = intrin.num_components;
2011 assert!(intrin.def.bit_size() == 32);
2012 assert!(comps == 1 || comps == 2 || comps == 4);
2013
2014 let dst = b.alloc_ssa(RegFile::GPR, comps);
2015
2016 b.push_op(OpSuLd {
2017 dst: dst.into(),
2018 resident: Dst::None,
2019 image_dim: dim,
2020 mem_order: MemOrder::Strong(MemScope::System),
2021 mem_eviction_priority: self
2022 .get_eviction_priority(intrin.access()),
2023 mask: (1 << comps) - 1,
2024 handle: handle,
2025 coord: coord,
2026 });
2027 self.set_dst(&intrin.def, dst);
2028 }
2029 nir_intrinsic_bindless_image_store => {
2030 let handle = self.get_src(&srcs[0]);
2031 let dim = self.get_image_dim(intrin);
2032 let coord = self.get_image_coord(intrin, dim);
2033 // let sample = self.get_src(&srcs[2]);
2034 let data = self.get_src(&srcs[3]);
2035
2036 let comps = intrin.num_components;
2037 assert!(srcs[3].bit_size() == 32);
2038 assert!(comps == 1 || comps == 2 || comps == 4);
2039
2040 b.push_op(OpSuSt {
2041 image_dim: dim,
2042 mem_order: MemOrder::Strong(MemScope::System),
2043 mem_eviction_priority: self
2044 .get_eviction_priority(intrin.access()),
2045 mask: (1 << comps) - 1,
2046 handle: handle,
2047 coord: coord,
2048 data: data,
2049 });
2050 }
2051 nir_intrinsic_demote
2052 | nir_intrinsic_discard
2053 | nir_intrinsic_terminate => {
2054 if let ShaderIoInfo::Fragment(info) = &mut self.info.io {
2055 info.uses_kill = true;
2056 } else {
2057 panic!("OpKill is only available in fragment shaders");
2058 }
2059 b.push_op(OpKill {});
2060
2061 if intrin.intrinsic == nir_intrinsic_terminate {
2062 b.push_op(OpExit {});
2063 }
2064 }
2065 nir_intrinsic_demote_if
2066 | nir_intrinsic_discard_if
2067 | nir_intrinsic_terminate_if => {
2068 if let ShaderIoInfo::Fragment(info) = &mut self.info.io {
2069 info.uses_kill = true;
2070 } else {
2071 panic!("OpKill is only available in fragment shaders");
2072 }
2073 let cond = self.get_ssa(srcs[0].as_def())[0];
2074 b.predicate(cond.into()).push_op(OpKill {});
2075
2076 if intrin.intrinsic == nir_intrinsic_terminate_if {
2077 b.predicate(cond.into()).push_op(OpExit {});
2078 }
2079 }
2080 nir_intrinsic_global_atomic => {
2081 let bit_size = intrin.def.bit_size();
2082 let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
2083 let data = self.get_src(&srcs[1]);
2084 let atom_type = self.get_atomic_type(intrin);
2085 let atom_op = self.get_atomic_op(intrin);
2086
2087 assert!(intrin.def.num_components() == 1);
2088 let dst = b.alloc_ssa(RegFile::GPR, bit_size.div_ceil(32));
2089
2090 b.push_op(OpAtom {
2091 dst: dst.into(),
2092 addr: addr,
2093 cmpr: 0.into(),
2094 data: data,
2095 atom_op: atom_op,
2096 atom_type: atom_type,
2097 addr_offset: offset,
2098 mem_space: MemSpace::Global(MemAddrType::A64),
2099 mem_order: MemOrder::Strong(MemScope::System),
2100 mem_eviction_priority: MemEvictionPriority::Normal, // Note: no intrinic access
2101 });
2102 self.set_dst(&intrin.def, dst);
2103 }
2104 nir_intrinsic_global_atomic_swap => {
2105 assert!(intrin.atomic_op() == nir_atomic_op_cmpxchg);
2106 let bit_size = intrin.def.bit_size();
2107 let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
2108 let cmpr = self.get_src(&srcs[1]);
2109 let data = self.get_src(&srcs[2]);
2110 let atom_type = AtomType::U(bit_size);
2111
2112 assert!(intrin.def.num_components() == 1);
2113 let dst = b.alloc_ssa(RegFile::GPR, bit_size.div_ceil(32));
2114
2115 b.push_op(OpAtom {
2116 dst: dst.into(),
2117 addr: addr,
2118 cmpr: cmpr,
2119 data: data,
2120 atom_op: AtomOp::CmpExch,
2121 atom_type: atom_type,
2122 addr_offset: offset,
2123 mem_space: MemSpace::Global(MemAddrType::A64),
2124 mem_order: MemOrder::Strong(MemScope::System),
2125 mem_eviction_priority: MemEvictionPriority::Normal, // Note: no intrinic access
2126 });
2127 self.set_dst(&intrin.def, dst);
2128 }
2129 nir_intrinsic_ipa_nv => {
2130 let addr = u16::try_from(intrin.base()).unwrap();
2131
2132 let flags = intrin.flags();
2133 let flags: nak_nir_ipa_flags =
2134 unsafe { std::mem::transmute_copy(&flags) };
2135
2136 let mode = match flags.interp_mode() {
2137 NAK_INTERP_MODE_PERSPECTIVE => PixelImap::Perspective,
2138 NAK_INTERP_MODE_SCREEN_LINEAR => PixelImap::ScreenLinear,
2139 NAK_INTERP_MODE_CONSTANT => PixelImap::Constant,
2140 _ => panic!("Unsupported interp mode"),
2141 };
2142
2143 let freq = match flags.interp_freq() {
2144 NAK_INTERP_FREQ_PASS => InterpFreq::Pass,
2145 NAK_INTERP_FREQ_PASS_MUL_W => InterpFreq::PassMulW,
2146 NAK_INTERP_FREQ_CONSTANT => InterpFreq::Constant,
2147 NAK_INTERP_FREQ_STATE => InterpFreq::State,
2148 _ => panic!("Invalid interp freq"),
2149 };
2150
2151 let loc = match flags.interp_loc() {
2152 NAK_INTERP_LOC_DEFAULT => InterpLoc::Default,
2153 NAK_INTERP_LOC_CENTROID => InterpLoc::Centroid,
2154 NAK_INTERP_LOC_OFFSET => InterpLoc::Offset,
2155 _ => panic!("Invalid interp loc"),
2156 };
2157
2158 let inv_w = if freq == InterpFreq::PassMulW {
2159 self.get_src(&srcs[0])
2160 } else {
2161 0.into()
2162 };
2163
2164 let offset = if loc == InterpLoc::Offset {
2165 self.get_src(&srcs[1])
2166 } else {
2167 0.into()
2168 };
2169
2170 let ShaderIoInfo::Fragment(io) = &mut self.info.io else {
2171 panic!("OpIpa is only used for fragment shaders");
2172 };
2173
2174 io.mark_attr_read(addr, mode);
2175
2176 let dst = b.alloc_ssa(RegFile::GPR, 1);
2177 b.push_op(OpIpa {
2178 dst: dst.into(),
2179 addr: addr,
2180 freq: freq,
2181 loc: loc,
2182 inv_w: inv_w,
2183 offset: offset,
2184 });
2185 self.set_dst(&intrin.def, dst);
2186 }
2187 nir_intrinsic_isberd_nv => {
2188 let dst = b.alloc_ssa(RegFile::GPR, 1);
2189 b.push_op(OpIsberd {
2190 dst: dst.into(),
2191 idx: self.get_src(&srcs[0]),
2192 });
2193 self.set_dst(&intrin.def, dst);
2194 }
2195 nir_intrinsic_load_barycentric_at_offset_nv => (),
2196 nir_intrinsic_load_barycentric_centroid => (),
2197 nir_intrinsic_load_barycentric_pixel => (),
2198 nir_intrinsic_load_barycentric_sample => (),
2199 nir_intrinsic_load_global | nir_intrinsic_load_global_constant => {
2200 let size_B =
2201 (intrin.def.bit_size() / 8) * intrin.def.num_components();
2202 assert!(u32::from(size_B) <= intrin.align());
2203 let order =
2204 if intrin.intrinsic == nir_intrinsic_load_global_constant {
2205 MemOrder::Constant
2206 } else {
2207 MemOrder::Strong(MemScope::System)
2208 };
2209 let access = MemAccess {
2210 mem_type: MemType::from_size(size_B, false),
2211 space: MemSpace::Global(MemAddrType::A64),
2212 order: order,
2213 eviction_priority: self
2214 .get_eviction_priority(intrin.access()),
2215 };
2216 let (addr, offset) = self.get_io_addr_offset(&srcs[0], 32);
2217 let dst = b.alloc_ssa(RegFile::GPR, size_B.div_ceil(4));
2218
2219 b.push_op(OpLd {
2220 dst: dst.into(),
2221 addr: addr,
2222 offset: offset,
2223 access: access,
2224 });
2225 self.set_dst(&intrin.def, dst);
2226 }
2227 nir_intrinsic_ldtram_nv => {
2228 let ShaderIoInfo::Fragment(io) = &mut self.info.io else {
2229 panic!("ldtram_nv is only used for fragment shaders");
2230 };
2231
2232 assert!(
2233 intrin.def.bit_size() == 32
2234 && intrin.def.num_components == 2
2235 );
2236
2237 let flags = intrin.flags();
2238 let use_c = flags != 0;
2239
2240 let addr = u16::try_from(intrin.base()).unwrap();
2241
2242 io.mark_barycentric_attr_in(addr);
2243
2244 let dst = b.alloc_ssa(RegFile::GPR, 2);
2245 b.push_op(OpLdTram {
2246 dst: dst.into(),
2247 addr,
2248 use_c,
2249 });
2250 self.set_dst(&intrin.def, dst);
2251 }
2252 nir_intrinsic_load_sample_id => {
2253 let dst = b.alloc_ssa(RegFile::GPR, 1);
2254 b.push_op(OpPixLd {
2255 dst: dst.into(),
2256 val: PixVal::MyIndex,
2257 });
2258 self.set_dst(&intrin.def, dst);
2259 }
2260 nir_intrinsic_load_sample_mask_in => {
2261 if let ShaderIoInfo::Fragment(info) = &mut self.info.io {
2262 info.reads_sample_mask = true;
2263 } else {
2264 panic!(
2265 "sample_mask_in is only available in fragment shaders"
2266 );
2267 }
2268
2269 let dst = b.alloc_ssa(RegFile::GPR, 1);
2270 b.push_op(OpPixLd {
2271 dst: dst.into(),
2272 val: PixVal::CovMask,
2273 });
2274 self.set_dst(&intrin.def, dst);
2275 }
2276 nir_intrinsic_load_tess_coord_xy => {
2277 // Loading gl_TessCoord in tessellation evaluation shaders is
2278 // weird. It's treated as a per-vertex output which is indexed
2279 // by LANEID.
2280 match &self.info.stage {
2281 ShaderStageInfo::Tessellation => (),
2282 _ => panic!(
2283 "load_tess_coord is only available in tessellation \
2284 shaders"
2285 ),
2286 };
2287
2288 assert!(intrin.def.bit_size() == 32);
2289 assert!(intrin.def.num_components() == 2);
2290
2291 let vtx = b.alloc_ssa(RegFile::GPR, 1);
2292 b.push_op(OpS2R {
2293 dst: vtx.into(),
2294 idx: 0,
2295 });
2296
2297 let access = AttrAccess {
2298 addr: NAK_ATTR_TESS_COORD,
2299 comps: 2,
2300 patch: false,
2301 output: true,
2302 phys: false,
2303 };
2304
2305 // This is recorded as a patch output in parse_shader() because
2306 // the hardware requires it be in the SPH, whether we use it or
2307 // not.
2308
2309 let dst = b.alloc_ssa(RegFile::GPR, access.comps);
2310 b.push_op(OpALd {
2311 dst: dst.into(),
2312 vtx: vtx.into(),
2313 offset: 0.into(),
2314 access: access,
2315 });
2316 self.set_dst(&intrin.def, dst);
2317 }
2318 nir_intrinsic_load_scratch => {
2319 let size_B =
2320 (intrin.def.bit_size() / 8) * intrin.def.num_components();
2321 assert!(u32::from(size_B) <= intrin.align());
2322 let access = MemAccess {
2323 mem_type: MemType::from_size(size_B, false),
2324 space: MemSpace::Local,
2325 order: MemOrder::Strong(MemScope::CTA),
2326 eviction_priority: MemEvictionPriority::Normal,
2327 };
2328 let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
2329 let dst = b.alloc_ssa(RegFile::GPR, size_B.div_ceil(4));
2330
2331 b.push_op(OpLd {
2332 dst: dst.into(),
2333 addr: addr,
2334 offset: offset,
2335 access: access,
2336 });
2337 self.set_dst(&intrin.def, dst);
2338 }
2339 nir_intrinsic_load_shared => {
2340 let size_B =
2341 (intrin.def.bit_size() / 8) * intrin.def.num_components();
2342 assert!(u32::from(size_B) <= intrin.align());
2343 let access = MemAccess {
2344 mem_type: MemType::from_size(size_B, false),
2345 space: MemSpace::Shared,
2346 order: MemOrder::Strong(MemScope::CTA),
2347 eviction_priority: MemEvictionPriority::Normal,
2348 };
2349 let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
2350 let offset = offset + intrin.base();
2351 let dst = b.alloc_ssa(RegFile::GPR, size_B.div_ceil(4));
2352
2353 b.push_op(OpLd {
2354 dst: dst.into(),
2355 addr: addr,
2356 offset: offset,
2357 access: access,
2358 });
2359 self.set_dst(&intrin.def, dst);
2360 }
2361 nir_intrinsic_load_sysval_nv => {
2362 let idx = u8::try_from(intrin.base()).unwrap();
2363 debug_assert!(intrin.def.num_components == 1);
2364 debug_assert!(
2365 intrin.def.bit_size == 32 || intrin.def.bit_size == 64
2366 );
2367 let comps = intrin.def.bit_size / 32;
2368 let dst = b.alloc_ssa(RegFile::GPR, comps);
2369 if idx == NAK_SV_CLOCK || idx == NAK_SV_CLOCK + 1 {
2370 debug_assert!(idx + comps <= NAK_SV_CLOCK + 2);
2371 b.push_op(OpCS2R {
2372 dst: dst.into(),
2373 idx: idx,
2374 });
2375 } else {
2376 debug_assert!(intrin.def.bit_size == 32);
2377 b.push_op(OpS2R {
2378 dst: dst.into(),
2379 idx: idx,
2380 });
2381 }
2382 self.set_dst(&intrin.def, dst);
2383 }
2384 nir_intrinsic_load_ubo => {
2385 let size_B =
2386 (intrin.def.bit_size() / 8) * intrin.def.num_components();
2387 let idx = srcs[0];
2388
2389 let (off, off_imm) = self.get_io_addr_offset(&srcs[1], 16);
2390 let (off, off_imm) =
2391 if let Ok(off_imm_u16) = u16::try_from(off_imm) {
2392 (off, off_imm_u16)
2393 } else {
2394 (self.get_src(&srcs[1]), 0)
2395 };
2396
2397 let dst = b.alloc_ssa(RegFile::GPR, size_B.div_ceil(4));
2398
2399 if let Some(idx_imm) = idx.as_uint() {
2400 let idx_imm: u8 = idx_imm.try_into().unwrap();
2401 let cb = CBufRef {
2402 buf: CBuf::Binding(idx_imm),
2403 offset: off_imm,
2404 };
2405 if off.is_zero() {
2406 for (i, comp) in dst.iter().enumerate() {
2407 let i = u16::try_from(i).unwrap();
2408 b.copy_to((*comp).into(), cb.offset(i * 4).into());
2409 }
2410 } else {
2411 b.push_op(OpLdc {
2412 dst: dst.into(),
2413 cb: cb.into(),
2414 offset: off,
2415 mem_type: MemType::from_size(size_B, false),
2416 });
2417 }
2418 } else {
2419 panic!("Indirect UBO indices not yet supported");
2420 }
2421 self.set_dst(&intrin.def, dst);
2422 }
2423 nir_intrinsic_barrier => {
2424 let modes = intrin.memory_modes();
2425 let semantics = intrin.memory_semantics();
2426 if (modes & nir_var_mem_global) != 0
2427 && (semantics & NIR_MEMORY_RELEASE) != 0
2428 {
2429 b.push_op(OpCCtl {
2430 op: CCtlOp::WBAll,
2431 mem_space: MemSpace::Global(MemAddrType::A64),
2432 addr: 0.into(),
2433 addr_offset: 0,
2434 });
2435 }
2436 match intrin.execution_scope() {
2437 SCOPE_NONE => (),
2438 SCOPE_WORKGROUP => {
2439 assert!(
2440 self.nir.info.stage() == MESA_SHADER_COMPUTE
2441 || self.nir.info.stage() == MESA_SHADER_KERNEL
2442 );
2443 self.info.num_barriers = 1;
2444 b.push_op(OpBar {});
2445 }
2446 _ => panic!("Unhandled execution scope"),
2447 }
2448 if intrin.memory_scope() != SCOPE_NONE {
2449 let mem_scope = match intrin.memory_scope() {
2450 SCOPE_INVOCATION | SCOPE_SUBGROUP => MemScope::CTA,
2451 SCOPE_WORKGROUP | SCOPE_QUEUE_FAMILY | SCOPE_DEVICE => {
2452 MemScope::GPU
2453 }
2454 _ => panic!("Unhandled memory scope"),
2455 };
2456 b.push_op(OpMemBar { scope: mem_scope });
2457 }
2458 if (modes & nir_var_mem_global) != 0
2459 && (semantics & NIR_MEMORY_ACQUIRE) != 0
2460 {
2461 b.push_op(OpCCtl {
2462 op: CCtlOp::IVAll,
2463 mem_space: MemSpace::Global(MemAddrType::A64),
2464 addr: 0.into(),
2465 addr_offset: 0,
2466 });
2467 }
2468 }
2469 nir_intrinsic_quad_broadcast
2470 | nir_intrinsic_read_invocation
2471 | nir_intrinsic_shuffle
2472 | nir_intrinsic_shuffle_down
2473 | nir_intrinsic_shuffle_up
2474 | nir_intrinsic_shuffle_xor => {
2475 assert!(srcs[0].bit_size() == 32);
2476 assert!(srcs[0].num_components() == 1);
2477 let data = self.get_src(&srcs[0]);
2478
2479 assert!(srcs[1].bit_size() == 32);
2480 let idx = self.get_src(&srcs[1]);
2481
2482 assert!(intrin.def.bit_size() == 32);
2483 let dst = b.alloc_ssa(RegFile::GPR, 1);
2484
2485 b.push_op(OpShfl {
2486 dst: dst.into(),
2487 in_bounds: Dst::None,
2488 src: data,
2489 lane: idx,
2490 c: match intrin.intrinsic {
2491 nir_intrinsic_quad_broadcast => 0x1c_03.into(),
2492 nir_intrinsic_shuffle_up => 0.into(),
2493 _ => 0x1f.into(),
2494 },
2495 op: match intrin.intrinsic {
2496 nir_intrinsic_shuffle_down => ShflOp::Down,
2497 nir_intrinsic_shuffle_up => ShflOp::Up,
2498 nir_intrinsic_shuffle_xor => ShflOp::Bfly,
2499 _ => ShflOp::Idx,
2500 },
2501 });
2502 self.set_dst(&intrin.def, dst);
2503 }
2504 nir_intrinsic_quad_swap_horizontal
2505 | nir_intrinsic_quad_swap_vertical
2506 | nir_intrinsic_quad_swap_diagonal => {
2507 assert!(srcs[0].bit_size() == 32);
2508 assert!(srcs[0].num_components() == 1);
2509 let data = self.get_src(&srcs[0]);
2510
2511 assert!(intrin.def.bit_size() == 32);
2512 let dst = b.alloc_ssa(RegFile::GPR, 1);
2513 b.push_op(OpShfl {
2514 dst: dst.into(),
2515 in_bounds: Dst::None,
2516 src: data,
2517 lane: match intrin.intrinsic {
2518 nir_intrinsic_quad_swap_horizontal => 1_u32.into(),
2519 nir_intrinsic_quad_swap_vertical => 2_u32.into(),
2520 nir_intrinsic_quad_swap_diagonal => 3_u32.into(),
2521 op => panic!("Unknown quad intrinsic {}", op),
2522 },
2523 c: 0x1c_03.into(),
2524 op: ShflOp::Bfly,
2525 });
2526 self.set_dst(&intrin.def, dst);
2527 }
2528 nir_intrinsic_shared_atomic => {
2529 let bit_size = intrin.def.bit_size();
2530 let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
2531 let data = self.get_src(&srcs[1]);
2532 let atom_type = self.get_atomic_type(intrin);
2533 let atom_op = self.get_atomic_op(intrin);
2534
2535 assert!(intrin.def.num_components() == 1);
2536 let dst = b.alloc_ssa(RegFile::GPR, bit_size.div_ceil(32));
2537
2538 b.push_op(OpAtom {
2539 dst: dst.into(),
2540 addr: addr,
2541 cmpr: 0.into(),
2542 data: data,
2543 atom_op: atom_op,
2544 atom_type: atom_type,
2545 addr_offset: offset,
2546 mem_space: MemSpace::Shared,
2547 mem_order: MemOrder::Strong(MemScope::CTA),
2548 mem_eviction_priority: MemEvictionPriority::Normal,
2549 });
2550 self.set_dst(&intrin.def, dst);
2551 }
2552 nir_intrinsic_shared_atomic_swap => {
2553 assert!(intrin.atomic_op() == nir_atomic_op_cmpxchg);
2554 let bit_size = intrin.def.bit_size();
2555 let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24);
2556 let cmpr = self.get_src(&srcs[1]);
2557 let data = self.get_src(&srcs[2]);
2558 let atom_type = AtomType::U(bit_size);
2559
2560 assert!(intrin.def.num_components() == 1);
2561 let dst = b.alloc_ssa(RegFile::GPR, bit_size.div_ceil(32));
2562
2563 b.push_op(OpAtom {
2564 dst: dst.into(),
2565 addr: addr,
2566 cmpr: cmpr,
2567 data: data,
2568 atom_op: AtomOp::CmpExch,
2569 atom_type: atom_type,
2570 addr_offset: offset,
2571 mem_space: MemSpace::Shared,
2572 mem_order: MemOrder::Strong(MemScope::CTA),
2573 mem_eviction_priority: MemEvictionPriority::Normal,
2574 });
2575 self.set_dst(&intrin.def, dst);
2576 }
2577 nir_intrinsic_ssa_bar_nv => {
2578 let src = self.get_src(&srcs[0]);
2579 b.push_op(OpSrcBar { src });
2580 }
2581 nir_intrinsic_store_global => {
2582 let data = self.get_src(&srcs[0]);
2583 let size_B =
2584 (srcs[0].bit_size() / 8) * srcs[0].num_components();
2585 assert!(u32::from(size_B) <= intrin.align());
2586 let access = MemAccess {
2587 mem_type: MemType::from_size(size_B, false),
2588 space: MemSpace::Global(MemAddrType::A64),
2589 order: MemOrder::Strong(MemScope::System),
2590 eviction_priority: self
2591 .get_eviction_priority(intrin.access()),
2592 };
2593 let (addr, offset) = self.get_io_addr_offset(&srcs[1], 32);
2594
2595 b.push_op(OpSt {
2596 addr: addr,
2597 data: data,
2598 offset: offset,
2599 access: access,
2600 });
2601 }
2602 nir_intrinsic_store_output => {
2603 let ShaderIoInfo::Fragment(_) = &mut self.info.io else {
2604 panic!("load_input is only used for fragment shaders");
2605 };
2606 let data = self.get_src(&srcs[0]);
2607
2608 let addr = u16::try_from(intrin.base()).unwrap()
2609 + u16::try_from(srcs[1].as_uint().unwrap()).unwrap()
2610 + 4 * u16::try_from(intrin.component()).unwrap();
2611 assert!(addr % 4 == 0);
2612
2613 for c in 0..usize::from(intrin.num_components) {
2614 let idx = usize::from(addr / 4) + c;
2615 self.fs_out_regs[idx] = data.as_ssa().unwrap()[c];
2616 }
2617 }
2618 nir_intrinsic_store_scratch => {
2619 let data = self.get_src(&srcs[0]);
2620 let size_B =
2621 (srcs[0].bit_size() / 8) * srcs[0].num_components();
2622 assert!(u32::from(size_B) <= intrin.align());
2623 let access = MemAccess {
2624 mem_type: MemType::from_size(size_B, false),
2625 space: MemSpace::Local,
2626 order: MemOrder::Strong(MemScope::CTA),
2627 eviction_priority: MemEvictionPriority::Normal,
2628 };
2629 let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24);
2630
2631 b.push_op(OpSt {
2632 addr: addr,
2633 data: data,
2634 offset: offset,
2635 access: access,
2636 });
2637 }
2638 nir_intrinsic_store_shared => {
2639 let data = self.get_src(&srcs[0]);
2640 let size_B =
2641 (srcs[0].bit_size() / 8) * srcs[0].num_components();
2642 assert!(u32::from(size_B) <= intrin.align());
2643 let access = MemAccess {
2644 mem_type: MemType::from_size(size_B, false),
2645 space: MemSpace::Shared,
2646 order: MemOrder::Strong(MemScope::CTA),
2647 eviction_priority: MemEvictionPriority::Normal,
2648 };
2649 let (addr, offset) = self.get_io_addr_offset(&srcs[1], 24);
2650 let offset = offset + intrin.base();
2651
2652 b.push_op(OpSt {
2653 addr: addr,
2654 data: data,
2655 offset: offset,
2656 access: access,
2657 });
2658 }
2659 nir_intrinsic_emit_vertex_nv | nir_intrinsic_end_primitive_nv => {
2660 assert!(intrin.def.bit_size() == 32);
2661 assert!(intrin.def.num_components() == 1);
2662
2663 let dst = b.alloc_ssa(RegFile::GPR, 1);
2664 let handle = self.get_src(&srcs[0]);
2665 let stream_id = intrin.stream_id();
2666
2667 b.push_op(OpOut {
2668 dst: dst.into(),
2669 handle: handle,
2670 stream: stream_id.into(),
2671 out_type: if intrin.intrinsic
2672 == nir_intrinsic_emit_vertex_nv
2673 {
2674 OutType::Emit
2675 } else {
2676 OutType::Cut
2677 },
2678 });
2679 self.set_dst(&intrin.def, dst);
2680 }
2681
2682 nir_intrinsic_final_primitive_nv => {
2683 let handle = self.get_src(&srcs[0]);
2684
2685 if self.info.sm >= 70 {
2686 b.push_op(OpOutFinal { handle: handle });
2687 }
2688 }
2689 nir_intrinsic_vote_all
2690 | nir_intrinsic_vote_any
2691 | nir_intrinsic_vote_ieq => {
2692 assert!(srcs[0].bit_size() == 1);
2693 let src = self.get_src(&srcs[0]);
2694
2695 assert!(intrin.def.bit_size() == 1);
2696 let dst = b.alloc_ssa(RegFile::Pred, 1);
2697
2698 b.push_op(OpVote {
2699 op: match intrin.intrinsic {
2700 nir_intrinsic_vote_all => VoteOp::All,
2701 nir_intrinsic_vote_any => VoteOp::Any,
2702 nir_intrinsic_vote_ieq => VoteOp::Eq,
2703 _ => panic!("Unknown vote intrinsic"),
2704 },
2705 ballot: Dst::None,
2706 vote: dst.into(),
2707 pred: src,
2708 });
2709 self.set_dst(&intrin.def, dst);
2710 }
2711 _ => panic!(
2712 "Unsupported intrinsic instruction: {}",
2713 intrin.info().name()
2714 ),
2715 }
2716 }
2717
parse_load_const( &mut self, b: &mut impl SSABuilder, load_const: &nir_load_const_instr, )2718 fn parse_load_const(
2719 &mut self,
2720 b: &mut impl SSABuilder,
2721 load_const: &nir_load_const_instr,
2722 ) {
2723 let values = &load_const.values();
2724
2725 let mut dst = Vec::new();
2726 match load_const.def.bit_size {
2727 1 => {
2728 for c in 0..load_const.def.num_components {
2729 let imm_b1 = unsafe { values[usize::from(c)].b };
2730 dst.push(b.copy(imm_b1.into())[0]);
2731 }
2732 }
2733 8 => {
2734 for dw in 0..load_const.def.num_components.div_ceil(4) {
2735 let mut imm_u32 = 0;
2736 for b in 0..4 {
2737 let c = dw * 4 + b;
2738 if c < load_const.def.num_components {
2739 let imm_u8 = unsafe { values[usize::from(c)].u8_ };
2740 imm_u32 |= u32::from(imm_u8) << b * 8;
2741 }
2742 }
2743 dst.push(b.copy(imm_u32.into())[0]);
2744 }
2745 }
2746 16 => {
2747 for dw in 0..load_const.def.num_components.div_ceil(2) {
2748 let mut imm_u32 = 0;
2749 for w in 0..2 {
2750 let c = dw * 2 + w;
2751 if c < load_const.def.num_components {
2752 let imm_u16 =
2753 unsafe { values[usize::from(c)].u16_ };
2754 imm_u32 |= u32::from(imm_u16) << w * 16;
2755 }
2756 }
2757 dst.push(b.copy(imm_u32.into())[0]);
2758 }
2759 }
2760 32 => {
2761 for c in 0..load_const.def.num_components {
2762 let imm_u32 = unsafe { values[usize::from(c)].u32_ };
2763 dst.push(b.copy(imm_u32.into())[0]);
2764 }
2765 }
2766 64 => {
2767 for c in 0..load_const.def.num_components {
2768 let imm_u64 = unsafe { values[c as usize].u64_ };
2769 dst.push(b.copy((imm_u64 as u32).into())[0]);
2770 dst.push(b.copy(((imm_u64 >> 32) as u32).into())[0]);
2771 }
2772 }
2773 _ => panic!("Unknown bit size: {}", load_const.def.bit_size),
2774 }
2775
2776 self.set_ssa(&load_const.def, dst);
2777 }
2778
parse_undef( &mut self, b: &mut impl SSABuilder, undef: &nir_undef_instr, )2779 fn parse_undef(
2780 &mut self,
2781 b: &mut impl SSABuilder,
2782 undef: &nir_undef_instr,
2783 ) {
2784 let dst = alloc_ssa_for_nir(b, &undef.def);
2785 for c in &dst {
2786 b.push_op(OpUndef { dst: (*c).into() });
2787 }
2788 self.set_ssa(&undef.def, dst);
2789 }
2790
store_fs_outputs(&mut self, b: &mut impl SSABuilder)2791 fn store_fs_outputs(&mut self, b: &mut impl SSABuilder) {
2792 let ShaderIoInfo::Fragment(info) = &mut self.info.io else {
2793 return;
2794 };
2795
2796 for i in 0..32 {
2797 // Assume that colors have to come a vec4 at a time
2798 if !self.fs_out_regs[i].is_none() {
2799 info.writes_color |= 0xf << (i & !3)
2800 }
2801 }
2802 let mask_idx = (NAK_FS_OUT_SAMPLE_MASK / 4) as usize;
2803 info.writes_sample_mask = !self.fs_out_regs[mask_idx].is_none();
2804 let depth_idx = (NAK_FS_OUT_DEPTH / 4) as usize;
2805 info.writes_depth = !self.fs_out_regs[depth_idx].is_none();
2806
2807 let mut srcs = Vec::new();
2808 for i in 0..32 {
2809 if info.writes_color & (1 << i) != 0 {
2810 if self.fs_out_regs[i].is_none() {
2811 srcs.push(0.into());
2812 } else {
2813 srcs.push(self.fs_out_regs[i].into());
2814 }
2815 }
2816 }
2817
2818 // These always come together for some reason
2819 if info.writes_sample_mask || info.writes_depth {
2820 if info.writes_sample_mask {
2821 srcs.push(self.fs_out_regs[mask_idx].into());
2822 } else {
2823 srcs.push(0.into());
2824 }
2825 if info.writes_depth {
2826 // Saturate depth writes.
2827 //
2828 // TODO: This seems wrong in light of unrestricted depth but
2829 // it's needed to pass CTS tests for now.
2830 let depth = self.fs_out_regs[depth_idx];
2831 let sat_depth = b.alloc_ssa(RegFile::GPR, 1);
2832 b.push_op(OpFAdd {
2833 dst: sat_depth.into(),
2834 srcs: [depth.into(), 0.into()],
2835 saturate: true,
2836 rnd_mode: FRndMode::NearestEven,
2837 ftz: false,
2838 });
2839 srcs.push(sat_depth.into());
2840 }
2841 }
2842
2843 b.push_op(OpFSOut { srcs: srcs });
2844 }
2845
parse_block( &mut self, ssa_alloc: &mut SSAValueAllocator, phi_map: &mut PhiAllocMap, nb: &nir_block, )2846 fn parse_block(
2847 &mut self,
2848 ssa_alloc: &mut SSAValueAllocator,
2849 phi_map: &mut PhiAllocMap,
2850 nb: &nir_block,
2851 ) {
2852 let mut b = SSAInstrBuilder::new(self.info.sm, ssa_alloc);
2853
2854 if nb.index == 0 && self.nir.info.shared_size > 0 {
2855 // The blob seems to always do a BSYNC before accessing shared
2856 // memory. Perhaps this is to ensure that our allocation is
2857 // actually available and not in use by another thread?
2858 let label = self.label_alloc.alloc();
2859 let bar_clear = b.alloc_ssa(RegFile::Bar, 1);
2860
2861 b.push_op(OpBClear {
2862 dst: bar_clear.into(),
2863 });
2864
2865 let bar = b.alloc_ssa(RegFile::Bar, 1);
2866 b.push_op(OpBSSy {
2867 bar_out: bar.into(),
2868 bar_in: bar_clear.into(),
2869 cond: SrcRef::True.into(),
2870 target: label,
2871 });
2872
2873 b.push_op(OpBSync {
2874 bar: bar.into(),
2875 cond: SrcRef::True.into(),
2876 });
2877
2878 b.push_op(OpNop { label: Some(label) });
2879 }
2880
2881 let mut phi = OpPhiDsts::new();
2882 for ni in nb.iter_instr_list() {
2883 if ni.type_ == nir_instr_type_phi {
2884 let np = ni.as_phi().unwrap();
2885 let dst = alloc_ssa_for_nir(&mut b, np.def.as_def());
2886 for (i, dst) in dst.iter().enumerate() {
2887 let phi_id = phi_map.get_phi_id(np, i.try_into().unwrap());
2888 phi.dsts.push(phi_id, (*dst).into());
2889 }
2890 self.set_ssa(np.def.as_def(), dst);
2891 } else {
2892 break;
2893 }
2894 }
2895
2896 if !phi.dsts.is_empty() {
2897 b.push_op(phi);
2898 }
2899
2900 for ni in nb.iter_instr_list() {
2901 if DEBUG.annotate() {
2902 let annotation = self
2903 .nir_instr_printer
2904 .instr_to_string(ni)
2905 .split_whitespace()
2906 .collect::<Vec<_>>()
2907 .join(" ");
2908 b.push_op(OpAnnotate {
2909 annotation: format!("generated by \"{}\"", annotation,),
2910 });
2911 }
2912
2913 match ni.type_ {
2914 nir_instr_type_alu => {
2915 self.parse_alu(&mut b, ni.as_alu().unwrap())
2916 }
2917 nir_instr_type_jump => {
2918 self.parse_jump(&mut b, ni.as_jump().unwrap())
2919 }
2920 nir_instr_type_tex => {
2921 self.parse_tex(&mut b, ni.as_tex().unwrap())
2922 }
2923 nir_instr_type_intrinsic => {
2924 self.parse_intrinsic(&mut b, ni.as_intrinsic().unwrap())
2925 }
2926 nir_instr_type_load_const => {
2927 self.parse_load_const(&mut b, ni.as_load_const().unwrap())
2928 }
2929 nir_instr_type_undef => {
2930 self.parse_undef(&mut b, ni.as_undef().unwrap())
2931 }
2932 nir_instr_type_phi => (),
2933 _ => panic!("Unsupported instruction type"),
2934 }
2935 }
2936
2937 let succ = nb.successors();
2938 for sb in succ {
2939 let sb = match sb {
2940 Some(b) => b,
2941 None => continue,
2942 };
2943
2944 let mut phi = OpPhiSrcs::new();
2945
2946 for i in sb.iter_instr_list() {
2947 let np = match i.as_phi() {
2948 Some(phi) => phi,
2949 None => break,
2950 };
2951
2952 for ps in np.iter_srcs() {
2953 if ps.pred().index == nb.index {
2954 let src = *self.get_src(&ps.src).as_ssa().unwrap();
2955 for (i, src) in src.iter().enumerate() {
2956 let phi_id =
2957 phi_map.get_phi_id(np, i.try_into().unwrap());
2958 phi.srcs.push(phi_id, (*src).into());
2959 }
2960 break;
2961 }
2962 }
2963 }
2964
2965 if !phi.srcs.is_empty() {
2966 b.push_op(phi);
2967 }
2968 }
2969
2970 if let Some(ni) = nb.following_if() {
2971 // The fall-through edge has to come first
2972 self.cfg.add_edge(nb.index, ni.first_then_block().index);
2973 self.cfg.add_edge(nb.index, ni.first_else_block().index);
2974
2975 let mut bra = Instr::new_boxed(OpBra {
2976 target: self.get_block_label(ni.first_else_block()),
2977 });
2978
2979 let cond = self.get_ssa(ni.condition.as_def())[0];
2980 bra.pred = cond.into();
2981 // This is the branch to jump to the else
2982 bra.pred.pred_inv = true;
2983
2984 b.push_instr(bra);
2985 } else {
2986 assert!(succ[1].is_none());
2987 let s0 = succ[0].unwrap();
2988 if s0.index == self.end_block_id {
2989 self.store_fs_outputs(&mut b);
2990 b.push_op(OpExit {});
2991 } else {
2992 self.cfg.add_edge(nb.index, s0.index);
2993 b.push_op(OpBra {
2994 target: self.get_block_label(s0),
2995 });
2996 }
2997 }
2998
2999 let mut bb = BasicBlock::new(self.get_block_label(nb));
3000 bb.instrs.append(&mut b.as_vec());
3001 self.cfg.add_node(nb.index, bb);
3002 }
3003
parse_if( &mut self, ssa_alloc: &mut SSAValueAllocator, phi_map: &mut PhiAllocMap, ni: &nir_if, )3004 fn parse_if(
3005 &mut self,
3006 ssa_alloc: &mut SSAValueAllocator,
3007 phi_map: &mut PhiAllocMap,
3008 ni: &nir_if,
3009 ) {
3010 self.parse_cf_list(ssa_alloc, phi_map, ni.iter_then_list());
3011 self.parse_cf_list(ssa_alloc, phi_map, ni.iter_else_list());
3012 }
3013
parse_loop( &mut self, ssa_alloc: &mut SSAValueAllocator, phi_map: &mut PhiAllocMap, nl: &nir_loop, )3014 fn parse_loop(
3015 &mut self,
3016 ssa_alloc: &mut SSAValueAllocator,
3017 phi_map: &mut PhiAllocMap,
3018 nl: &nir_loop,
3019 ) {
3020 self.parse_cf_list(ssa_alloc, phi_map, nl.iter_body());
3021 }
3022
parse_cf_list( &mut self, ssa_alloc: &mut SSAValueAllocator, phi_map: &mut PhiAllocMap, list: ExecListIter<nir_cf_node>, )3023 fn parse_cf_list(
3024 &mut self,
3025 ssa_alloc: &mut SSAValueAllocator,
3026 phi_map: &mut PhiAllocMap,
3027 list: ExecListIter<nir_cf_node>,
3028 ) {
3029 for node in list {
3030 match node.type_ {
3031 nir_cf_node_block => {
3032 let nb = node.as_block().unwrap();
3033 self.parse_block(ssa_alloc, phi_map, nb);
3034 }
3035 nir_cf_node_if => {
3036 let ni = node.as_if().unwrap();
3037 self.parse_if(ssa_alloc, phi_map, ni);
3038 }
3039 nir_cf_node_loop => {
3040 let nl = node.as_loop().unwrap();
3041 self.parse_loop(ssa_alloc, phi_map, nl);
3042 }
3043 _ => panic!("Invalid inner CF node type"),
3044 }
3045 }
3046 }
3047
parse_function_impl(&mut self, nfi: &nir_function_impl) -> Function3048 pub fn parse_function_impl(&mut self, nfi: &nir_function_impl) -> Function {
3049 let mut ssa_alloc = SSAValueAllocator::new();
3050 self.end_block_id = nfi.end_block().index;
3051
3052 let mut phi_alloc = PhiAllocator::new();
3053 let mut phi_map = PhiAllocMap::new(&mut phi_alloc);
3054
3055 self.parse_cf_list(&mut ssa_alloc, &mut phi_map, nfi.iter_body());
3056
3057 let cfg = std::mem::take(&mut self.cfg).as_cfg();
3058 assert!(cfg.len() > 0);
3059 for i in 0..cfg.len() {
3060 if cfg[i].falls_through() {
3061 assert!(cfg.succ_indices(i)[0] == i + 1);
3062 }
3063 }
3064
3065 Function {
3066 ssa_alloc: ssa_alloc,
3067 phi_alloc: phi_alloc,
3068 blocks: cfg,
3069 }
3070 }
3071
parse_shader(mut self) -> Shader3072 pub fn parse_shader(mut self) -> Shader {
3073 let mut functions = Vec::new();
3074 for nf in self.nir.iter_functions() {
3075 if let Some(nfi) = nf.get_impl() {
3076 let f = self.parse_function_impl(nfi);
3077 functions.push(f);
3078 }
3079 }
3080
3081 // Tessellation evaluation shaders MUST claim to read gl_TessCoord or
3082 // the hardware will throw an SPH error.
3083 if matches!(self.info.stage, ShaderStageInfo::Tessellation) {
3084 match &mut self.info.io {
3085 ShaderIoInfo::Vtg(io) => {
3086 let tc = NAK_ATTR_TESS_COORD;
3087 io.mark_attrs_written(tc..(tc + 8));
3088 }
3089 _ => panic!("Tessellation must have ShaderIoInfo::Vtg"),
3090 }
3091 }
3092
3093 Shader {
3094 info: self.info,
3095 functions: functions,
3096 }
3097 }
3098 }
3099
nak_shader_from_nir(ns: &nir_shader, sm: u8) -> Shader3100 pub fn nak_shader_from_nir(ns: &nir_shader, sm: u8) -> Shader {
3101 ShaderFromNir::new(ns, sm).parse_shader()
3102 }
3103