• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright © 2022 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3 
4 extern crate bitview;
5 extern crate nak_ir_proc;
6 
7 use bitview::{BitMutView, BitView};
8 use nak_bindings::*;
9 
10 pub use crate::builder::{Builder, InstrBuilder, SSABuilder, SSAInstrBuilder};
11 use crate::legalize::LegalizeBuilder;
12 use crate::sph::{OutputTopology, PixelImap};
13 use compiler::as_slice::*;
14 use compiler::cfg::CFG;
15 use compiler::smallvec::SmallVec;
16 use nak_ir_proc::*;
17 use std::cmp::{max, min};
18 use std::fmt;
19 use std::fmt::Write;
20 use std::iter::Zip;
21 use std::ops::{BitAnd, BitOr, Deref, DerefMut, Index, IndexMut, Not, Range};
22 use std::slice;
23 
24 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
25 pub struct Label {
26     idx: u32,
27 }
28 
29 impl fmt::Display for Label {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result30     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
31         write!(f, "L{}", self.idx)
32     }
33 }
34 
35 pub struct LabelAllocator {
36     count: u32,
37 }
38 
39 impl LabelAllocator {
new() -> LabelAllocator40     pub fn new() -> LabelAllocator {
41         LabelAllocator { count: 0 }
42     }
43 
alloc(&mut self) -> Label44     pub fn alloc(&mut self) -> Label {
45         let idx = self.count;
46         self.count += 1;
47         Label { idx: idx }
48     }
49 }
50 
51 /// Represents a register file
52 #[repr(u8)]
53 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
54 pub enum RegFile {
55     /// The general-purpose register file
56     ///
57     /// General-purpose registers are 32 bits per SIMT channel.
58     GPR = 0,
59 
60     /// The general-purpose uniform register file
61     ///
62     /// General-purpose uniform registers are 32 bits each and uniform across a
63     /// wave.
64     UGPR = 1,
65 
66     /// The predicate reigster file
67     ///
68     /// Predicate registers are 1 bit per SIMT channel.
69     Pred = 2,
70 
71     /// The uniform predicate reigster file
72     ///
73     /// Uniform predicate registers are 1 bit and uniform across a wave.
74     UPred = 3,
75 
76     /// The carry flag register file
77     ///
78     /// Only one carry flag register exists in hardware, but representing it as
79     /// a reg file simplifies dependency tracking.
80     ///
81     /// This is used only on SM50.
82     Carry = 4,
83 
84     /// The barrier register file
85     ///
86     /// This is a lane mask used for wave re-convergence instructions.
87     Bar = 5,
88 
89     /// The memory register file
90     ///
91     /// This is a virtual register file for things which will get spilled to
92     /// local memory.  Each memory location is 32 bits per SIMT channel.
93     Mem = 6,
94 }
95 
96 const NUM_REG_FILES: usize = 7;
97 
98 impl RegFile {
99     /// Returns true if the register file is uniform across a wave
is_uniform(&self) -> bool100     pub fn is_uniform(&self) -> bool {
101         match self {
102             RegFile::GPR
103             | RegFile::Pred
104             | RegFile::Carry
105             | RegFile::Bar
106             | RegFile::Mem => false,
107             RegFile::UGPR | RegFile::UPred => true,
108         }
109     }
110 
to_uniform(&self) -> Option<RegFile>111     pub fn to_uniform(&self) -> Option<RegFile> {
112         match self {
113             RegFile::GPR | RegFile::UGPR => Some(RegFile::UGPR),
114             RegFile::Pred | RegFile::UPred => Some(RegFile::UPred),
115             RegFile::Carry | RegFile::Bar | RegFile::Mem => None,
116         }
117     }
118 
to_warp(&self) -> RegFile119     pub fn to_warp(&self) -> RegFile {
120         match self {
121             RegFile::GPR | RegFile::UGPR => RegFile::GPR,
122             RegFile::Pred | RegFile::UPred => RegFile::Pred,
123             RegFile::Carry | RegFile::Bar | RegFile::Mem => *self,
124         }
125     }
126 
127     /// Returns true if the register file is general-purpose
is_gpr(&self) -> bool128     pub fn is_gpr(&self) -> bool {
129         match self {
130             RegFile::GPR | RegFile::UGPR => true,
131             RegFile::Pred
132             | RegFile::UPred
133             | RegFile::Carry
134             | RegFile::Bar
135             | RegFile::Mem => false,
136         }
137     }
138 
139     /// Returns true if the register file is a predicate register file
is_predicate(&self) -> bool140     pub fn is_predicate(&self) -> bool {
141         match self {
142             RegFile::GPR
143             | RegFile::UGPR
144             | RegFile::Carry
145             | RegFile::Bar
146             | RegFile::Mem => false,
147             RegFile::Pred | RegFile::UPred => true,
148         }
149     }
150 
fmt_prefix(&self) -> &'static str151     fn fmt_prefix(&self) -> &'static str {
152         match self {
153             RegFile::GPR => "r",
154             RegFile::UGPR => "ur",
155             RegFile::Pred => "p",
156             RegFile::UPred => "up",
157             RegFile::Carry => "c",
158             RegFile::Bar => "b",
159             RegFile::Mem => "m",
160         }
161     }
162 }
163 
164 impl fmt::Display for RegFile {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result165     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
166         match self {
167             RegFile::GPR => write!(f, "GPR"),
168             RegFile::UGPR => write!(f, "UGPR"),
169             RegFile::Pred => write!(f, "Pred"),
170             RegFile::UPred => write!(f, "UPred"),
171             RegFile::Carry => write!(f, "Carry"),
172             RegFile::Bar => write!(f, "Bar"),
173             RegFile::Mem => write!(f, "Mem"),
174         }
175     }
176 }
177 
178 impl From<RegFile> for u8 {
from(value: RegFile) -> u8179     fn from(value: RegFile) -> u8 {
180         value as u8
181     }
182 }
183 
184 impl TryFrom<u32> for RegFile {
185     type Error = &'static str;
186 
try_from(value: u32) -> Result<Self, Self::Error>187     fn try_from(value: u32) -> Result<Self, Self::Error> {
188         match value {
189             0 => Ok(RegFile::GPR),
190             1 => Ok(RegFile::UGPR),
191             2 => Ok(RegFile::Pred),
192             3 => Ok(RegFile::UPred),
193             4 => Ok(RegFile::Carry),
194             5 => Ok(RegFile::Bar),
195             6 => Ok(RegFile::Mem),
196             _ => Err("Invalid register file number"),
197         }
198     }
199 }
200 
201 impl TryFrom<u16> for RegFile {
202     type Error = &'static str;
203 
try_from(value: u16) -> Result<Self, Self::Error>204     fn try_from(value: u16) -> Result<Self, Self::Error> {
205         RegFile::try_from(u32::from(value))
206     }
207 }
208 
209 impl TryFrom<u8> for RegFile {
210     type Error = &'static str;
211 
try_from(value: u8) -> Result<Self, Self::Error>212     fn try_from(value: u8) -> Result<Self, Self::Error> {
213         RegFile::try_from(u32::from(value))
214     }
215 }
216 
217 /// A trait for things which have an associated register file
218 pub trait HasRegFile {
file(&self) -> RegFile219     fn file(&self) -> RegFile;
220 
is_uniform(&self) -> bool221     fn is_uniform(&self) -> bool {
222         self.file().is_uniform()
223     }
224 
is_gpr(&self) -> bool225     fn is_gpr(&self) -> bool {
226         self.file().is_gpr()
227     }
228 
is_predicate(&self) -> bool229     fn is_predicate(&self) -> bool {
230         self.file().is_predicate()
231     }
232 }
233 
234 #[derive(Clone)]
235 pub struct RegFileSet {
236     bits: u8,
237 }
238 
239 impl RegFileSet {
new() -> RegFileSet240     pub fn new() -> RegFileSet {
241         RegFileSet { bits: 0 }
242     }
243 
len(&self) -> usize244     pub fn len(&self) -> usize {
245         self.bits.count_ones() as usize
246     }
247 
contains(&self, file: RegFile) -> bool248     pub fn contains(&self, file: RegFile) -> bool {
249         self.bits & (1 << (file as u8)) != 0
250     }
251 
insert(&mut self, file: RegFile) -> bool252     pub fn insert(&mut self, file: RegFile) -> bool {
253         let has_file = self.contains(file);
254         self.bits |= 1 << (file as u8);
255         !has_file
256     }
257 
is_empty(&self) -> bool258     pub fn is_empty(&self) -> bool {
259         self.bits == 0
260     }
261 
262     #[allow(dead_code)]
iter(&self) -> RegFileSet263     pub fn iter(&self) -> RegFileSet {
264         self.clone()
265     }
266 
remove(&mut self, file: RegFile) -> bool267     pub fn remove(&mut self, file: RegFile) -> bool {
268         let has_file = self.contains(file);
269         self.bits &= !(1 << (file as u8));
270         has_file
271     }
272 }
273 
274 impl FromIterator<RegFile> for RegFileSet {
from_iter<T: IntoIterator<Item = RegFile>>(iter: T) -> Self275     fn from_iter<T: IntoIterator<Item = RegFile>>(iter: T) -> Self {
276         let mut set = RegFileSet::new();
277         for file in iter {
278             set.insert(file);
279         }
280         set
281     }
282 }
283 
284 impl Iterator for RegFileSet {
285     type Item = RegFile;
286 
next(&mut self) -> Option<RegFile>287     fn next(&mut self) -> Option<RegFile> {
288         if self.is_empty() {
289             None
290         } else {
291             let file = self.bits.trailing_zeros().try_into().unwrap();
292             self.remove(file);
293             Some(file)
294         }
295     }
296 
size_hint(&self) -> (usize, Option<usize>)297     fn size_hint(&self) -> (usize, Option<usize>) {
298         let len = self.len();
299         (len, Some(len))
300     }
301 }
302 
303 #[derive(Clone, Copy)]
304 pub struct PerRegFile<T> {
305     per_file: [T; NUM_REG_FILES],
306 }
307 
308 impl<T> PerRegFile<T> {
new_with<F: Fn(RegFile) -> T>(f: F) -> Self309     pub fn new_with<F: Fn(RegFile) -> T>(f: F) -> Self {
310         PerRegFile {
311             per_file: [
312                 f(RegFile::GPR),
313                 f(RegFile::UGPR),
314                 f(RegFile::Pred),
315                 f(RegFile::UPred),
316                 f(RegFile::Carry),
317                 f(RegFile::Bar),
318                 f(RegFile::Mem),
319             ],
320         }
321     }
322 
values(&self) -> slice::Iter<T>323     pub fn values(&self) -> slice::Iter<T> {
324         self.per_file.iter()
325     }
326 
values_mut(&mut self) -> slice::IterMut<T>327     pub fn values_mut(&mut self) -> slice::IterMut<T> {
328         self.per_file.iter_mut()
329     }
330 }
331 
332 impl<T: Default> Default for PerRegFile<T> {
default() -> Self333     fn default() -> Self {
334         PerRegFile {
335             per_file: Default::default(),
336         }
337     }
338 }
339 
340 impl<T> Index<RegFile> for PerRegFile<T> {
341     type Output = T;
342 
index(&self, idx: RegFile) -> &T343     fn index(&self, idx: RegFile) -> &T {
344         &self.per_file[idx as u8 as usize]
345     }
346 }
347 
348 impl<T> IndexMut<RegFile> for PerRegFile<T> {
index_mut(&mut self, idx: RegFile) -> &mut T349     fn index_mut(&mut self, idx: RegFile) -> &mut T {
350         &mut self.per_file[idx as u8 as usize]
351     }
352 }
353 
354 /// An SSA value
355 ///
356 /// Each SSA in NAK represents a single 32-bit or 1-bit (if a predicate) value
357 /// which must either be spilled to memory or allocated space in the specified
358 /// register file.  Whenever more data is required such as a 64-bit memory
359 /// address, double-precision float, or a vec4 texture result, multiple SSA
360 /// values are used.
361 ///
362 /// Each SSA value logically contains two things: an index and a register file.
363 /// It is required that each index refers to a unique SSA value, regardless of
364 /// register file.  This way the index can be used to index tightly-packed data
365 /// structures such as bitsets without having to determine separate ranges for
366 /// each register file.
367 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
368 pub struct SSAValue {
369     packed: u32,
370 }
371 
372 impl SSAValue {
373     /// A special SSA value which is always invalid
374     pub const NONE: Self = SSAValue { packed: 0 };
375 
376     /// Returns an SSA value with the given register file and index
new(file: RegFile, idx: u32) -> SSAValue377     pub fn new(file: RegFile, idx: u32) -> SSAValue {
378         assert!(idx > 0 && idx < (1 << 29) - 2);
379         let mut packed = idx;
380         assert!(u8::from(file) < 8);
381         packed |= u32::from(u8::from(file)) << 29;
382         SSAValue { packed: packed }
383     }
384 
385     /// Returns the index of this SSA value
idx(&self) -> u32386     pub fn idx(&self) -> u32 {
387         self.packed & 0x1fffffff
388     }
389 
390     /// Returns true if this SSA value is equal to SSAValue::NONE
is_none(&self) -> bool391     pub fn is_none(&self) -> bool {
392         self.packed == 0
393     }
394 
fmt_plain(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result395     fn fmt_plain(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
396         write!(f, "{}{}", self.file().fmt_prefix(), self.idx())
397     }
398 }
399 
400 impl HasRegFile for SSAValue {
401     /// Returns the register file of this SSA value
file(&self) -> RegFile402     fn file(&self) -> RegFile {
403         RegFile::try_from(self.packed >> 29).unwrap()
404     }
405 }
406 
407 impl fmt::Display for SSAValue {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result408     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
409         write!(f, "%")?;
410         self.fmt_plain(f)
411     }
412 }
413 
414 /// A reference to one or more SSA values
415 ///
416 /// Because each SSA value represents a single 1 or 32-bit scalar, we need a way
417 /// to reference multiple SSA values for instructions which read or write
418 /// multiple registers in the same source.  When the register allocator runs,
419 /// all the SSA values in a given SSA ref will be placed in consecutive
420 /// registers, with the base register aligned to the number of values, aligned
421 /// to the next power of two.
422 ///
423 /// An SSA reference can reference between 1 and 4 SSA values.  It dereferences
424 /// to a slice for easy access to individual SSA values.  The structure is
425 /// designed so that is always 16B, regardless of how many SSA values are
426 /// referenced so it's easy and fairly cheap to copy around and embed in other
427 /// structures.
428 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
429 pub struct SSARef {
430     v: [SSAValue; 4],
431 }
432 
433 impl SSARef {
434     /// Returns a new SSA reference
435     #[inline]
new(comps: &[SSAValue]) -> SSARef436     fn new(comps: &[SSAValue]) -> SSARef {
437         assert!(comps.len() > 0 && comps.len() <= 4);
438         let mut r = SSARef {
439             v: [SSAValue::NONE; 4],
440         };
441         for i in 0..comps.len() {
442             r.v[i] = comps[i];
443         }
444         if comps.len() < 4 {
445             r.v[3].packed = (comps.len() as u32).wrapping_neg();
446         }
447         r
448     }
449 
450     /// Returns the number of components in this SSA reference
comps(&self) -> u8451     pub fn comps(&self) -> u8 {
452         if self.v[3].packed >= u32::MAX - 2 {
453             self.v[3].packed.wrapping_neg() as u8
454         } else {
455             4
456         }
457     }
458 
file(&self) -> Option<RegFile>459     pub fn file(&self) -> Option<RegFile> {
460         let comps = usize::from(self.comps());
461         let file = self.v[0].file();
462         for i in 1..comps {
463             if self.v[i].file() != file {
464                 return None;
465             }
466         }
467         Some(file)
468     }
469 
is_uniform(&self) -> bool470     pub fn is_uniform(&self) -> bool {
471         for ssa in &self[..] {
472             if !ssa.is_uniform() {
473                 return false;
474             }
475         }
476         true
477     }
478 
is_gpr(&self) -> bool479     pub fn is_gpr(&self) -> bool {
480         for ssa in &self[..] {
481             if !ssa.is_gpr() {
482                 return false;
483             }
484         }
485         true
486     }
487 
is_predicate(&self) -> bool488     pub fn is_predicate(&self) -> bool {
489         if self.v[0].is_predicate() {
490             true
491         } else {
492             for ssa in &self[..] {
493                 debug_assert!(!ssa.is_predicate());
494             }
495             false
496         }
497     }
498 }
499 
500 impl Deref for SSARef {
501     type Target = [SSAValue];
502 
deref(&self) -> &[SSAValue]503     fn deref(&self) -> &[SSAValue] {
504         let comps = usize::from(self.comps());
505         &self.v[..comps]
506     }
507 }
508 
509 impl DerefMut for SSARef {
deref_mut(&mut self) -> &mut [SSAValue]510     fn deref_mut(&mut self) -> &mut [SSAValue] {
511         let comps = usize::from(self.comps());
512         &mut self.v[..comps]
513     }
514 }
515 
516 impl TryFrom<&[SSAValue]> for SSARef {
517     type Error = &'static str;
518 
try_from(comps: &[SSAValue]) -> Result<Self, Self::Error>519     fn try_from(comps: &[SSAValue]) -> Result<Self, Self::Error> {
520         if comps.len() == 0 {
521             Err("Empty vector")
522         } else if comps.len() > 4 {
523             Err("Too many vector components")
524         } else {
525             Ok(SSARef::new(comps))
526         }
527     }
528 }
529 
530 impl TryFrom<Vec<SSAValue>> for SSARef {
531     type Error = &'static str;
532 
try_from(comps: Vec<SSAValue>) -> Result<Self, Self::Error>533     fn try_from(comps: Vec<SSAValue>) -> Result<Self, Self::Error> {
534         SSARef::try_from(&comps[..])
535     }
536 }
537 
538 macro_rules! impl_ssa_ref_from_arr {
539     ($n: expr) => {
540         impl From<[SSAValue; $n]> for SSARef {
541             fn from(comps: [SSAValue; $n]) -> Self {
542                 SSARef::new(&comps[..])
543             }
544         }
545     };
546 }
547 impl_ssa_ref_from_arr!(1);
548 impl_ssa_ref_from_arr!(2);
549 impl_ssa_ref_from_arr!(3);
550 impl_ssa_ref_from_arr!(4);
551 
552 impl From<SSAValue> for SSARef {
from(val: SSAValue) -> Self553     fn from(val: SSAValue) -> Self {
554         [val].into()
555     }
556 }
557 
558 impl fmt::Display for SSARef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result559     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
560         if self.comps() == 1 {
561             write!(f, "{}", self[0])
562         } else {
563             write!(f, "{{")?;
564             for (i, v) in self.iter().enumerate() {
565                 if i != 0 {
566                     write!(f, " ")?;
567                 }
568                 write!(f, "{}", v)?;
569             }
570             write!(f, "}}")
571         }
572     }
573 }
574 
575 pub struct SSAValueAllocator {
576     count: u32,
577 }
578 
579 impl SSAValueAllocator {
new() -> SSAValueAllocator580     pub fn new() -> SSAValueAllocator {
581         SSAValueAllocator { count: 0 }
582     }
583 
584     #[allow(dead_code)]
max_idx(&self) -> u32585     pub fn max_idx(&self) -> u32 {
586         self.count
587     }
588 
alloc(&mut self, file: RegFile) -> SSAValue589     pub fn alloc(&mut self, file: RegFile) -> SSAValue {
590         self.count += 1;
591         SSAValue::new(file, self.count)
592     }
593 
alloc_vec(&mut self, file: RegFile, comps: u8) -> SSARef594     pub fn alloc_vec(&mut self, file: RegFile, comps: u8) -> SSARef {
595         assert!(comps >= 1 && comps <= 4);
596         let mut vec = [SSAValue::NONE; 4];
597         for c in 0..comps {
598             vec[usize::from(c)] = self.alloc(file);
599         }
600         vec[0..usize::from(comps)].try_into().unwrap()
601     }
602 }
603 
604 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
605 pub struct RegRef {
606     packed: u32,
607 }
608 
609 impl RegRef {
610     pub const MAX_IDX: u32 = (1 << 26) - 1;
611 
zero_idx(file: RegFile) -> u32612     fn zero_idx(file: RegFile) -> u32 {
613         match file {
614             RegFile::GPR => 255,
615             RegFile::UGPR => 63,
616             RegFile::Pred => 7,
617             RegFile::UPred => 7,
618             RegFile::Carry => panic!("Carry has no zero index"),
619             RegFile::Bar => panic!("Bar has no zero index"),
620             RegFile::Mem => panic!("Mem has no zero index"),
621         }
622     }
623 
new(file: RegFile, base_idx: u32, comps: u8) -> RegRef624     pub fn new(file: RegFile, base_idx: u32, comps: u8) -> RegRef {
625         assert!(base_idx <= Self::MAX_IDX);
626         let mut packed = base_idx;
627         assert!(comps > 0 && comps <= 8);
628         packed |= u32::from(comps - 1) << 26;
629         assert!(u8::from(file) < 8);
630         packed |= u32::from(u8::from(file)) << 29;
631         RegRef { packed: packed }
632     }
633 
zero(file: RegFile, comps: u8) -> RegRef634     pub fn zero(file: RegFile, comps: u8) -> RegRef {
635         RegRef::new(file, RegRef::zero_idx(file), comps)
636     }
637 
base_idx(&self) -> u32638     pub fn base_idx(&self) -> u32 {
639         self.packed & 0x03ffffff
640     }
641 
idx_range(&self) -> Range<u32>642     pub fn idx_range(&self) -> Range<u32> {
643         let start = self.base_idx();
644         let end = start + u32::from(self.comps());
645         start..end
646     }
647 
comps(&self) -> u8648     pub fn comps(&self) -> u8 {
649         (((self.packed >> 26) & 0x7) + 1).try_into().unwrap()
650     }
651 
comp(&self, c: u8) -> RegRef652     pub fn comp(&self, c: u8) -> RegRef {
653         assert!(c < self.comps());
654         RegRef::new(self.file(), self.base_idx() + u32::from(c), 1)
655     }
656 }
657 
658 impl HasRegFile for RegRef {
file(&self) -> RegFile659     fn file(&self) -> RegFile {
660         ((self.packed >> 29) & 0x7).try_into().unwrap()
661     }
662 }
663 
664 impl fmt::Display for RegRef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result665     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
666         write!(f, "{}{}", self.file().fmt_prefix(), self.base_idx())?;
667         if self.comps() > 1 {
668             write!(f, "..{}", self.idx_range().end)?;
669         }
670         Ok(())
671     }
672 }
673 
674 #[derive(Clone, Copy)]
675 pub enum Dst {
676     None,
677     SSA(SSARef),
678     Reg(RegRef),
679 }
680 
681 impl Dst {
is_none(&self) -> bool682     pub fn is_none(&self) -> bool {
683         matches!(self, Dst::None)
684     }
685 
as_reg(&self) -> Option<&RegRef>686     pub fn as_reg(&self) -> Option<&RegRef> {
687         match self {
688             Dst::Reg(r) => Some(r),
689             _ => None,
690         }
691     }
692 
as_ssa(&self) -> Option<&SSARef>693     pub fn as_ssa(&self) -> Option<&SSARef> {
694         match self {
695             Dst::SSA(r) => Some(r),
696             _ => None,
697         }
698     }
699 
iter_ssa(&self) -> slice::Iter<'_, SSAValue>700     pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
701         match self {
702             Dst::None | Dst::Reg(_) => &[],
703             Dst::SSA(ssa) => ssa.deref(),
704         }
705         .iter()
706     }
707 
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>708     pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
709         match self {
710             Dst::None | Dst::Reg(_) => &mut [],
711             Dst::SSA(ssa) => ssa.deref_mut(),
712         }
713         .iter_mut()
714     }
715 }
716 
717 impl From<RegRef> for Dst {
from(reg: RegRef) -> Dst718     fn from(reg: RegRef) -> Dst {
719         Dst::Reg(reg)
720     }
721 }
722 
723 impl<T: Into<SSARef>> From<T> for Dst {
from(ssa: T) -> Dst724     fn from(ssa: T) -> Dst {
725         Dst::SSA(ssa.into())
726     }
727 }
728 
729 impl fmt::Display for Dst {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result730     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
731         match self {
732             Dst::None => write!(f, "null")?,
733             Dst::SSA(v) => v.fmt(f)?,
734             Dst::Reg(r) => r.fmt(f)?,
735         }
736         Ok(())
737     }
738 }
739 
740 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
741 pub enum CBuf {
742     Binding(u8),
743 
744     #[allow(dead_code)]
745     BindlessSSA(SSARef),
746 
747     #[allow(dead_code)]
748     BindlessUGPR(RegRef),
749 }
750 
751 impl fmt::Display for CBuf {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result752     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
753         match self {
754             CBuf::Binding(idx) => write!(f, "c[{:#x}]", idx),
755             CBuf::BindlessSSA(v) => write!(f, "cx[{}]", v),
756             CBuf::BindlessUGPR(r) => write!(f, "cx[{}]", r),
757         }
758     }
759 }
760 
761 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
762 pub struct CBufRef {
763     pub buf: CBuf,
764     pub offset: u16,
765 }
766 
767 impl CBufRef {
offset(self, offset: u16) -> CBufRef768     pub fn offset(self, offset: u16) -> CBufRef {
769         CBufRef {
770             buf: self.buf,
771             offset: self.offset + offset,
772         }
773     }
774 }
775 
776 impl fmt::Display for CBufRef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result777     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
778         write!(f, "{}[{:#x}]", self.buf, self.offset)
779     }
780 }
781 
782 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
783 pub enum SrcRef {
784     Zero,
785     True,
786     False,
787     Imm32(u32),
788     CBuf(CBufRef),
789     SSA(SSARef),
790     Reg(RegRef),
791 }
792 
793 impl SrcRef {
794     #[allow(dead_code)]
is_alu(&self) -> bool795     pub fn is_alu(&self) -> bool {
796         match self {
797             SrcRef::Zero | SrcRef::Imm32(_) | SrcRef::CBuf(_) => true,
798             SrcRef::SSA(ssa) => ssa.is_gpr(),
799             SrcRef::Reg(reg) => reg.is_gpr(),
800             SrcRef::True | SrcRef::False => false,
801         }
802     }
803 
is_predicate(&self) -> bool804     pub fn is_predicate(&self) -> bool {
805         match self {
806             SrcRef::Zero | SrcRef::Imm32(_) | SrcRef::CBuf(_) => false,
807             SrcRef::True | SrcRef::False => true,
808             SrcRef::SSA(ssa) => ssa.is_predicate(),
809             SrcRef::Reg(reg) => reg.is_predicate(),
810         }
811     }
812 
is_carry(&self) -> bool813     pub fn is_carry(&self) -> bool {
814         match self {
815             SrcRef::SSA(ssa) => ssa.file() == Some(RegFile::Carry),
816             SrcRef::Reg(reg) => reg.file() == RegFile::Carry,
817             _ => false,
818         }
819     }
820 
821     #[allow(dead_code)]
is_barrier(&self) -> bool822     pub fn is_barrier(&self) -> bool {
823         match self {
824             SrcRef::SSA(ssa) => ssa.file() == Some(RegFile::Bar),
825             SrcRef::Reg(reg) => reg.file() == RegFile::Bar,
826             _ => false,
827         }
828     }
829 
as_reg(&self) -> Option<&RegRef>830     pub fn as_reg(&self) -> Option<&RegRef> {
831         match self {
832             SrcRef::Reg(r) => Some(r),
833             _ => None,
834         }
835     }
836 
as_ssa(&self) -> Option<&SSARef>837     pub fn as_ssa(&self) -> Option<&SSARef> {
838         match self {
839             SrcRef::SSA(r) => Some(r),
840             _ => None,
841         }
842     }
843 
as_u32(&self) -> Option<u32>844     pub fn as_u32(&self) -> Option<u32> {
845         match self {
846             SrcRef::Zero => Some(0),
847             SrcRef::Imm32(u) => Some(*u),
848             SrcRef::CBuf(_) | SrcRef::SSA(_) | SrcRef::Reg(_) => None,
849             _ => panic!("Invalid integer source"),
850         }
851     }
852 
get_reg(&self) -> Option<&RegRef>853     pub fn get_reg(&self) -> Option<&RegRef> {
854         match self {
855             SrcRef::Zero
856             | SrcRef::True
857             | SrcRef::False
858             | SrcRef::Imm32(_)
859             | SrcRef::SSA(_) => None,
860             SrcRef::CBuf(cb) => match &cb.buf {
861                 CBuf::Binding(_) | CBuf::BindlessSSA(_) => None,
862                 CBuf::BindlessUGPR(reg) => Some(reg),
863             },
864             SrcRef::Reg(reg) => Some(reg),
865         }
866     }
867 
iter_ssa(&self) -> slice::Iter<'_, SSAValue>868     pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
869         match self {
870             SrcRef::Zero
871             | SrcRef::True
872             | SrcRef::False
873             | SrcRef::Imm32(_)
874             | SrcRef::Reg(_) => &[],
875             SrcRef::CBuf(cb) => match &cb.buf {
876                 CBuf::Binding(_) | CBuf::BindlessUGPR(_) => &[],
877                 CBuf::BindlessSSA(ssa) => ssa.deref(),
878             },
879             SrcRef::SSA(ssa) => ssa.deref(),
880         }
881         .iter()
882     }
883 
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>884     pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
885         match self {
886             SrcRef::Zero
887             | SrcRef::True
888             | SrcRef::False
889             | SrcRef::Imm32(_)
890             | SrcRef::Reg(_) => &mut [],
891             SrcRef::CBuf(cb) => match &mut cb.buf {
892                 CBuf::Binding(_) | CBuf::BindlessUGPR(_) => &mut [],
893                 CBuf::BindlessSSA(ssa) => ssa.deref_mut(),
894             },
895             SrcRef::SSA(ssa) => ssa.deref_mut(),
896         }
897         .iter_mut()
898     }
899 }
900 
901 impl From<bool> for SrcRef {
from(b: bool) -> SrcRef902     fn from(b: bool) -> SrcRef {
903         if b {
904             SrcRef::True
905         } else {
906             SrcRef::False
907         }
908     }
909 }
910 
911 impl From<u32> for SrcRef {
from(u: u32) -> SrcRef912     fn from(u: u32) -> SrcRef {
913         if u == 0 {
914             SrcRef::Zero
915         } else {
916             SrcRef::Imm32(u)
917         }
918     }
919 }
920 
921 impl From<f32> for SrcRef {
from(f: f32) -> SrcRef922     fn from(f: f32) -> SrcRef {
923         f.to_bits().into()
924     }
925 }
926 
927 impl From<PrmtSel> for SrcRef {
from(sel: PrmtSel) -> SrcRef928     fn from(sel: PrmtSel) -> SrcRef {
929         u32::from(sel.0).into()
930     }
931 }
932 
933 impl From<CBufRef> for SrcRef {
from(cb: CBufRef) -> SrcRef934     fn from(cb: CBufRef) -> SrcRef {
935         SrcRef::CBuf(cb)
936     }
937 }
938 
939 impl From<RegRef> for SrcRef {
from(reg: RegRef) -> SrcRef940     fn from(reg: RegRef) -> SrcRef {
941         SrcRef::Reg(reg)
942     }
943 }
944 
945 impl<T: Into<SSARef>> From<T> for SrcRef {
from(ssa: T) -> SrcRef946     fn from(ssa: T) -> SrcRef {
947         SrcRef::SSA(ssa.into())
948     }
949 }
950 
951 impl fmt::Display for SrcRef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result952     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
953         match self {
954             SrcRef::Zero => write!(f, "rZ"),
955             SrcRef::True => write!(f, "pT"),
956             SrcRef::False => write!(f, "pF"),
957             SrcRef::Imm32(u) => write!(f, "{:#x}", u),
958             SrcRef::CBuf(c) => c.fmt(f),
959             SrcRef::SSA(v) => v.fmt(f),
960             SrcRef::Reg(r) => r.fmt(f),
961         }
962     }
963 }
964 
965 #[derive(Clone, Copy, PartialEq)]
966 pub enum SrcMod {
967     None,
968     FAbs,
969     FNeg,
970     FNegAbs,
971     INeg,
972     BNot,
973 }
974 
975 impl SrcMod {
is_none(&self) -> bool976     pub fn is_none(&self) -> bool {
977         matches!(self, SrcMod::None)
978     }
979 
has_fabs(&self) -> bool980     pub fn has_fabs(&self) -> bool {
981         match self {
982             SrcMod::None | SrcMod::FNeg => false,
983             SrcMod::FAbs | SrcMod::FNegAbs => true,
984             _ => panic!("Not a float modifier"),
985         }
986     }
987 
has_fneg(&self) -> bool988     pub fn has_fneg(&self) -> bool {
989         match self {
990             SrcMod::None | SrcMod::FAbs => false,
991             SrcMod::FNeg | SrcMod::FNegAbs => true,
992             _ => panic!("Not a float modifier"),
993         }
994     }
995 
is_ineg(&self) -> bool996     pub fn is_ineg(&self) -> bool {
997         match self {
998             SrcMod::None => false,
999             SrcMod::INeg => true,
1000             _ => panic!("Not an integer modifier"),
1001         }
1002     }
1003 
is_bnot(&self) -> bool1004     pub fn is_bnot(&self) -> bool {
1005         match self {
1006             SrcMod::None => false,
1007             SrcMod::BNot => true,
1008             _ => panic!("Not a bitwise modifier"),
1009         }
1010     }
1011 
fabs(self) -> SrcMod1012     pub fn fabs(self) -> SrcMod {
1013         match self {
1014             SrcMod::None | SrcMod::FAbs | SrcMod::FNeg | SrcMod::FNegAbs => {
1015                 SrcMod::FAbs
1016             }
1017             _ => panic!("Not a float source modifier"),
1018         }
1019     }
1020 
fneg(self) -> SrcMod1021     pub fn fneg(self) -> SrcMod {
1022         match self {
1023             SrcMod::None => SrcMod::FNeg,
1024             SrcMod::FAbs => SrcMod::FNegAbs,
1025             SrcMod::FNeg => SrcMod::None,
1026             SrcMod::FNegAbs => SrcMod::FAbs,
1027             _ => panic!("Not a float source modifier"),
1028         }
1029     }
1030 
ineg(self) -> SrcMod1031     pub fn ineg(self) -> SrcMod {
1032         match self {
1033             SrcMod::None => SrcMod::INeg,
1034             SrcMod::INeg => SrcMod::None,
1035             _ => panic!("Not an integer source modifier"),
1036         }
1037     }
1038 
bnot(self) -> SrcMod1039     pub fn bnot(self) -> SrcMod {
1040         match self {
1041             SrcMod::None => SrcMod::BNot,
1042             SrcMod::BNot => SrcMod::None,
1043             _ => panic!("Not a boolean source modifier"),
1044         }
1045     }
1046 
modify(self, other: SrcMod) -> SrcMod1047     pub fn modify(self, other: SrcMod) -> SrcMod {
1048         match other {
1049             SrcMod::None => self,
1050             SrcMod::FAbs => self.fabs(),
1051             SrcMod::FNeg => self.fneg(),
1052             SrcMod::FNegAbs => self.fabs().fneg(),
1053             SrcMod::INeg => self.ineg(),
1054             SrcMod::BNot => self.bnot(),
1055         }
1056     }
1057 }
1058 
1059 #[derive(Clone, Copy, PartialEq)]
1060 #[allow(dead_code)]
1061 pub enum SrcSwizzle {
1062     None,
1063     Xx,
1064     Yy,
1065 }
1066 
1067 impl SrcSwizzle {
is_none(&self) -> bool1068     pub fn is_none(&self) -> bool {
1069         matches!(self, SrcSwizzle::None)
1070     }
1071 }
1072 
1073 impl fmt::Display for SrcSwizzle {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1074     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1075         match self {
1076             SrcSwizzle::None => Ok(()),
1077             SrcSwizzle::Xx => write!(f, ".xx"),
1078             SrcSwizzle::Yy => write!(f, ".yy"),
1079         }
1080     }
1081 }
1082 
1083 #[derive(Clone, Copy, PartialEq)]
1084 pub struct Src {
1085     pub src_ref: SrcRef,
1086     pub src_mod: SrcMod,
1087     pub src_swizzle: SrcSwizzle,
1088 }
1089 
1090 impl Src {
new_zero() -> Src1091     pub fn new_zero() -> Src {
1092         SrcRef::Zero.into()
1093     }
1094 
new_imm_u32(u: u32) -> Src1095     pub fn new_imm_u32(u: u32) -> Src {
1096         u.into()
1097     }
1098 
new_imm_bool(b: bool) -> Src1099     pub fn new_imm_bool(b: bool) -> Src {
1100         b.into()
1101     }
1102 
fabs(&self) -> Src1103     pub fn fabs(&self) -> Src {
1104         Src {
1105             src_ref: self.src_ref,
1106             src_mod: self.src_mod.fabs(),
1107             src_swizzle: self.src_swizzle,
1108         }
1109     }
1110 
fneg(&self) -> Src1111     pub fn fneg(&self) -> Src {
1112         Src {
1113             src_ref: self.src_ref,
1114             src_mod: self.src_mod.fneg(),
1115             src_swizzle: self.src_swizzle,
1116         }
1117     }
1118 
ineg(&self) -> Src1119     pub fn ineg(&self) -> Src {
1120         Src {
1121             src_ref: self.src_ref,
1122             src_mod: self.src_mod.ineg(),
1123             src_swizzle: self.src_swizzle,
1124         }
1125     }
1126 
bnot(&self) -> Src1127     pub fn bnot(&self) -> Src {
1128         Src {
1129             src_ref: self.src_ref,
1130             src_mod: self.src_mod.bnot(),
1131             src_swizzle: self.src_swizzle,
1132         }
1133     }
1134 
fold_imm(&self, src_type: SrcType) -> Src1135     pub fn fold_imm(&self, src_type: SrcType) -> Src {
1136         let SrcRef::Imm32(mut u) = self.src_ref else {
1137             return *self;
1138         };
1139 
1140         if self.src_mod.is_none() && self.src_swizzle.is_none() {
1141             return *self;
1142         }
1143 
1144         assert!(src_type == SrcType::F16v2 || self.src_swizzle.is_none());
1145 
1146         // INeg affects more than just the 32 bits of input data so it can't be
1147         // trivially folded.  In fact, -imm may not be representable as a 32-bit
1148         // immediate at all.
1149         if src_type == SrcType::I32 {
1150             return *self;
1151         }
1152 
1153         u = match src_type {
1154             SrcType::F16 => {
1155                 let low = u & 0xFFFF;
1156 
1157                 match self.src_mod {
1158                     SrcMod::None => low,
1159                     SrcMod::FAbs => low & !(1_u32 << 15),
1160                     SrcMod::FNeg => low ^ (1_u32 << 15),
1161                     SrcMod::FNegAbs => low | (1_u32 << 15),
1162                     _ => panic!("Not a float source modifier"),
1163                 }
1164             }
1165             SrcType::F16v2 => {
1166                 let u = match self.src_swizzle {
1167                     SrcSwizzle::None => u,
1168                     SrcSwizzle::Xx => (u << 16) | (u & 0xffff),
1169                     SrcSwizzle::Yy => (u & 0xffff0000) | (u >> 16),
1170                 };
1171 
1172                 match self.src_mod {
1173                     SrcMod::None => u,
1174                     SrcMod::FAbs => u & 0x7FFF7FFF,
1175                     SrcMod::FNeg => u ^ 0x80008000,
1176                     SrcMod::FNegAbs => u | 0x80008000,
1177                     _ => panic!("Not a float source modifier"),
1178                 }
1179             }
1180             SrcType::F32 | SrcType::F64 => match self.src_mod {
1181                 SrcMod::None => u,
1182                 SrcMod::FAbs => u & !(1_u32 << 31),
1183                 SrcMod::FNeg => u ^ (1_u32 << 31),
1184                 SrcMod::FNegAbs => u | (1_u32 << 31),
1185                 _ => panic!("Not a float source modifier"),
1186             },
1187             SrcType::I32 => match self.src_mod {
1188                 SrcMod::None => u,
1189                 SrcMod::INeg => -(u as i32) as u32,
1190                 _ => panic!("Not an integer source modifier"),
1191             },
1192             SrcType::B32 => match self.src_mod {
1193                 SrcMod::None => u,
1194                 SrcMod::BNot => !u,
1195                 _ => panic!("Not a bitwise source modifier"),
1196             },
1197             _ => {
1198                 assert!(self.src_mod.is_none());
1199                 u
1200             }
1201         };
1202 
1203         Src {
1204             src_mod: SrcMod::None,
1205             src_ref: u.into(),
1206             src_swizzle: SrcSwizzle::None,
1207         }
1208     }
1209 
as_ssa(&self) -> Option<&SSARef>1210     pub fn as_ssa(&self) -> Option<&SSARef> {
1211         if self.src_mod.is_none() {
1212             self.src_ref.as_ssa()
1213         } else {
1214             None
1215         }
1216     }
1217 
as_bool(&self) -> Option<bool>1218     pub fn as_bool(&self) -> Option<bool> {
1219         match self.src_ref {
1220             SrcRef::True => Some(!self.src_mod.is_bnot()),
1221             SrcRef::False => Some(self.src_mod.is_bnot()),
1222             SrcRef::SSA(vec) => {
1223                 assert!(vec.is_predicate() && vec.comps() == 1);
1224                 None
1225             }
1226             SrcRef::Reg(reg) => {
1227                 assert!(reg.is_predicate() && reg.comps() == 1);
1228                 None
1229             }
1230             _ => panic!("Not a boolean source"),
1231         }
1232     }
1233 
as_u32(&self) -> Option<u32>1234     pub fn as_u32(&self) -> Option<u32> {
1235         if self.src_mod.is_none() {
1236             self.src_ref.as_u32()
1237         } else {
1238             None
1239         }
1240     }
1241 
as_imm_not_i20(&self) -> Option<u32>1242     pub fn as_imm_not_i20(&self) -> Option<u32> {
1243         match self.src_ref {
1244             SrcRef::Imm32(i) => {
1245                 assert!(self.src_mod.is_none());
1246                 let top = i & 0xfff80000;
1247                 if top == 0 || top == 0xfff80000 {
1248                     None
1249                 } else {
1250                     Some(i)
1251                 }
1252             }
1253             _ => None,
1254         }
1255     }
1256 
as_imm_not_f20(&self) -> Option<u32>1257     pub fn as_imm_not_f20(&self) -> Option<u32> {
1258         match self.src_ref {
1259             SrcRef::Imm32(i) => {
1260                 assert!(self.src_mod.is_none());
1261                 if (i & 0xfff) == 0 {
1262                     None
1263                 } else {
1264                     Some(i)
1265                 }
1266             }
1267             _ => None,
1268         }
1269     }
1270 
iter_ssa(&self) -> slice::Iter<'_, SSAValue>1271     pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
1272         self.src_ref.iter_ssa()
1273     }
1274 
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>1275     pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
1276         self.src_ref.iter_ssa_mut()
1277     }
1278 
is_uniform(&self) -> bool1279     pub fn is_uniform(&self) -> bool {
1280         match self.src_ref {
1281             SrcRef::Zero
1282             | SrcRef::True
1283             | SrcRef::False
1284             | SrcRef::Imm32(_)
1285             | SrcRef::CBuf(_) => true,
1286             SrcRef::SSA(ssa) => ssa.is_uniform(),
1287             SrcRef::Reg(reg) => reg.is_uniform(),
1288         }
1289     }
1290 
is_predicate(&self) -> bool1291     pub fn is_predicate(&self) -> bool {
1292         self.src_ref.is_predicate()
1293     }
1294 
is_zero(&self) -> bool1295     pub fn is_zero(&self) -> bool {
1296         match self.src_ref {
1297             SrcRef::Zero | SrcRef::Imm32(0) => match self.src_mod {
1298                 SrcMod::None | SrcMod::FAbs => true,
1299                 SrcMod::FNeg | SrcMod::FNegAbs | SrcMod::BNot => false,
1300                 // INeg affects more than just the 32 bits of input data so -0
1301                 // may not be equivalent to 0.
1302                 SrcMod::INeg => false,
1303             },
1304             _ => false,
1305         }
1306     }
1307 
is_fneg_zero(&self, src_type: SrcType) -> bool1308     pub fn is_fneg_zero(&self, src_type: SrcType) -> bool {
1309         match self.fold_imm(src_type).src_ref {
1310             SrcRef::Imm32(0x00008000) => src_type == SrcType::F16,
1311             SrcRef::Imm32(0x80000000) => src_type == SrcType::F32,
1312             SrcRef::Imm32(0x80008000) => src_type == SrcType::F16v2,
1313             _ => false,
1314         }
1315     }
1316 
1317     #[allow(dead_code)]
supports_type(&self, src_type: &SrcType) -> bool1318     pub fn supports_type(&self, src_type: &SrcType) -> bool {
1319         match src_type {
1320             SrcType::SSA => {
1321                 if !self.src_mod.is_none() {
1322                     return false;
1323                 }
1324 
1325                 matches!(self.src_ref, SrcRef::SSA(_) | SrcRef::Reg(_))
1326             }
1327             SrcType::GPR => {
1328                 if !self.src_mod.is_none() {
1329                     return false;
1330                 }
1331 
1332                 matches!(
1333                     self.src_ref,
1334                     SrcRef::Zero | SrcRef::SSA(_) | SrcRef::Reg(_)
1335                 )
1336             }
1337             SrcType::ALU => self.src_mod.is_none() && self.src_ref.is_alu(),
1338             SrcType::F16 | SrcType::F32 | SrcType::F64 | SrcType::F16v2 => {
1339                 match self.src_mod {
1340                     SrcMod::None
1341                     | SrcMod::FAbs
1342                     | SrcMod::FNeg
1343                     | SrcMod::FNegAbs => (),
1344                     _ => return false,
1345                 }
1346 
1347                 self.src_ref.is_alu()
1348             }
1349             SrcType::I32 => {
1350                 match self.src_mod {
1351                     SrcMod::None | SrcMod::INeg => (),
1352                     _ => return false,
1353                 }
1354 
1355                 self.src_ref.is_alu()
1356             }
1357             SrcType::B32 => {
1358                 match self.src_mod {
1359                     SrcMod::None | SrcMod::BNot => (),
1360                     _ => return false,
1361                 }
1362 
1363                 self.src_ref.is_alu()
1364             }
1365             SrcType::Pred => {
1366                 match self.src_mod {
1367                     SrcMod::None | SrcMod::BNot => (),
1368                     _ => return false,
1369                 }
1370 
1371                 self.src_ref.is_predicate()
1372             }
1373             SrcType::Carry => self.src_mod.is_none() && self.src_ref.is_carry(),
1374             SrcType::Bar => self.src_mod.is_none() && self.src_ref.is_barrier(),
1375         }
1376     }
1377 }
1378 
1379 impl<T: Into<SrcRef>> From<T> for Src {
from(value: T) -> Src1380     fn from(value: T) -> Src {
1381         Src {
1382             src_ref: value.into(),
1383             src_mod: SrcMod::None,
1384             src_swizzle: SrcSwizzle::None,
1385         }
1386     }
1387 }
1388 
1389 impl fmt::Display for Src {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1390     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1391         match self.src_mod {
1392             SrcMod::None => write!(f, "{}{}", self.src_ref, self.src_swizzle),
1393             SrcMod::FAbs => write!(f, "|{}{}|", self.src_ref, self.src_swizzle),
1394             SrcMod::FNeg => write!(f, "-{}{}", self.src_ref, self.src_swizzle),
1395             SrcMod::FNegAbs => {
1396                 write!(f, "-|{}{}|", self.src_ref, self.src_swizzle)
1397             }
1398             SrcMod::INeg => write!(f, "-{}{}", self.src_ref, self.src_swizzle),
1399             SrcMod::BNot => write!(f, "!{}{}", self.src_ref, self.src_swizzle),
1400         }
1401     }
1402 }
1403 
1404 #[repr(u8)]
1405 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
1406 pub enum SrcType {
1407     SSA,
1408     GPR,
1409     ALU,
1410     F16,
1411     F16v2,
1412     F32,
1413     F64,
1414     I32,
1415     B32,
1416     Pred,
1417     Carry,
1418     Bar,
1419 }
1420 
1421 impl SrcType {
1422     const DEFAULT: SrcType = SrcType::GPR;
1423 }
1424 
1425 pub type SrcTypeList = AttrList<SrcType>;
1426 
1427 pub trait SrcsAsSlice: AsSlice<Src, Attr = SrcType> {
srcs_as_slice(&self) -> &[Src]1428     fn srcs_as_slice(&self) -> &[Src] {
1429         self.as_slice()
1430     }
1431 
srcs_as_mut_slice(&mut self) -> &mut [Src]1432     fn srcs_as_mut_slice(&mut self) -> &mut [Src] {
1433         self.as_mut_slice()
1434     }
1435 
src_types(&self) -> SrcTypeList1436     fn src_types(&self) -> SrcTypeList {
1437         self.attrs()
1438     }
1439 
src_idx(&self, src: &Src) -> usize1440     fn src_idx(&self, src: &Src) -> usize {
1441         let r = self.srcs_as_slice().as_ptr_range();
1442         assert!(r.contains(&(src as *const Src)));
1443         unsafe { (src as *const Src).offset_from(r.start) as usize }
1444     }
1445 }
1446 
1447 impl<T: AsSlice<Src, Attr = SrcType>> SrcsAsSlice for T {}
1448 
all_dsts_uniform(dsts: &[Dst]) -> bool1449 fn all_dsts_uniform(dsts: &[Dst]) -> bool {
1450     let mut uniform = None;
1451     for dst in dsts {
1452         let dst_uniform = match dst {
1453             Dst::None => continue,
1454             Dst::Reg(r) => r.is_uniform(),
1455             Dst::SSA(r) => r.file().unwrap().is_uniform(),
1456         };
1457         assert!(uniform == None || uniform == Some(dst_uniform));
1458         uniform = Some(dst_uniform);
1459     }
1460     uniform == Some(true)
1461 }
1462 
1463 #[repr(u8)]
1464 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
1465 pub enum DstType {
1466     Pred,
1467     GPR,
1468     F16,
1469     F16v2,
1470     F32,
1471     F64,
1472     Carry,
1473     Bar,
1474     Vec,
1475 }
1476 
1477 impl DstType {
1478     const DEFAULT: DstType = DstType::Vec;
1479 }
1480 
1481 pub type DstTypeList = AttrList<DstType>;
1482 
1483 pub trait DstsAsSlice: AsSlice<Dst, Attr = DstType> {
dsts_as_slice(&self) -> &[Dst]1484     fn dsts_as_slice(&self) -> &[Dst] {
1485         self.as_slice()
1486     }
1487 
dsts_as_mut_slice(&mut self) -> &mut [Dst]1488     fn dsts_as_mut_slice(&mut self) -> &mut [Dst] {
1489         self.as_mut_slice()
1490     }
1491 
1492     // Currently only used by test code
1493     #[allow(dead_code)]
dst_types(&self) -> DstTypeList1494     fn dst_types(&self) -> DstTypeList {
1495         self.attrs()
1496     }
1497 
dst_idx(&self, dst: &Dst) -> usize1498     fn dst_idx(&self, dst: &Dst) -> usize {
1499         let r = self.dsts_as_slice().as_ptr_range();
1500         assert!(r.contains(&(dst as *const Dst)));
1501         unsafe { (dst as *const Dst).offset_from(r.start) as usize }
1502     }
1503 }
1504 
1505 impl<T: AsSlice<Dst, Attr = DstType>> DstsAsSlice for T {}
1506 
1507 pub trait IsUniform {
is_uniform(&self) -> bool1508     fn is_uniform(&self) -> bool;
1509 }
1510 
1511 impl<T: DstsAsSlice> IsUniform for T {
is_uniform(&self) -> bool1512     fn is_uniform(&self) -> bool {
1513         all_dsts_uniform(self.dsts_as_slice())
1514     }
1515 }
1516 
fmt_dst_slice(f: &mut fmt::Formatter<'_>, dsts: &[Dst]) -> fmt::Result1517 fn fmt_dst_slice(f: &mut fmt::Formatter<'_>, dsts: &[Dst]) -> fmt::Result {
1518     if dsts.is_empty() {
1519         return Ok(());
1520     }
1521 
1522     // Figure out the last non-null dst
1523     //
1524     // Note: By making the top inclusive and starting at 0, we ensure that
1525     // at least one dst always gets printed.
1526     let mut last_dst = 0;
1527     for (i, dst) in dsts.iter().enumerate() {
1528         if !dst.is_none() {
1529             last_dst = i;
1530         }
1531     }
1532 
1533     for i in 0..(last_dst + 1) {
1534         if i != 0 {
1535             write!(f, " ")?;
1536         }
1537         write!(f, "{}", &dsts[i])?;
1538     }
1539     Ok(())
1540 }
1541 
1542 #[allow(dead_code)]
1543 #[derive(Clone, Copy)]
1544 pub enum FoldData {
1545     Pred(bool),
1546     Carry(bool),
1547     U32(u32),
1548     Vec2([u32; 2]),
1549 }
1550 
1551 pub struct OpFoldData<'a> {
1552     pub dsts: &'a mut [FoldData],
1553     pub srcs: &'a [FoldData],
1554 }
1555 
1556 impl OpFoldData<'_> {
get_pred_src(&self, op: &impl SrcsAsSlice, src: &Src) -> bool1557     pub fn get_pred_src(&self, op: &impl SrcsAsSlice, src: &Src) -> bool {
1558         let i = op.src_idx(src);
1559         let b = match src.src_ref {
1560             SrcRef::Zero | SrcRef::Imm32(_) => panic!("Expected a predicate"),
1561             SrcRef::True => true,
1562             SrcRef::False => false,
1563             _ => {
1564                 if let FoldData::Pred(b) = self.srcs[i] {
1565                     b
1566                 } else {
1567                     panic!("FoldData is not a predicate");
1568                 }
1569             }
1570         };
1571         b ^ src.src_mod.is_bnot()
1572     }
1573 
get_u32_src(&self, op: &impl SrcsAsSlice, src: &Src) -> u321574     pub fn get_u32_src(&self, op: &impl SrcsAsSlice, src: &Src) -> u32 {
1575         let i = op.src_idx(src);
1576         match src.src_ref {
1577             SrcRef::Zero => 0,
1578             SrcRef::Imm32(imm) => imm,
1579             SrcRef::True | SrcRef::False => panic!("Unexpected predicate"),
1580             _ => {
1581                 if let FoldData::U32(u) = self.srcs[i] {
1582                     u
1583                 } else {
1584                     panic!("FoldData is not a U32");
1585                 }
1586             }
1587         }
1588     }
1589 
get_u32_bnot_src(&self, op: &impl SrcsAsSlice, src: &Src) -> u321590     pub fn get_u32_bnot_src(&self, op: &impl SrcsAsSlice, src: &Src) -> u32 {
1591         let x = self.get_u32_src(op, src);
1592         if src.src_mod.is_bnot() {
1593             !x
1594         } else {
1595             x
1596         }
1597     }
1598 
get_carry_src(&self, op: &impl SrcsAsSlice, src: &Src) -> bool1599     pub fn get_carry_src(&self, op: &impl SrcsAsSlice, src: &Src) -> bool {
1600         assert!(src.src_ref.as_ssa().is_some());
1601         let i = op.src_idx(src);
1602         if let FoldData::Carry(b) = self.srcs[i] {
1603             b
1604         } else {
1605             panic!("FoldData is not a predicate");
1606         }
1607     }
1608 
1609     #[allow(dead_code)]
get_f32_src(&self, op: &impl SrcsAsSlice, src: &Src) -> f321610     pub fn get_f32_src(&self, op: &impl SrcsAsSlice, src: &Src) -> f32 {
1611         f32::from_bits(self.get_u32_src(op, src))
1612     }
1613 
1614     #[allow(dead_code)]
get_f64_src(&self, op: &impl SrcsAsSlice, src: &Src) -> f641615     pub fn get_f64_src(&self, op: &impl SrcsAsSlice, src: &Src) -> f64 {
1616         let i = op.src_idx(src);
1617         match src.src_ref {
1618             SrcRef::Zero => 0.0,
1619             SrcRef::Imm32(imm) => f64::from_bits(u64::from(imm) << 32),
1620             SrcRef::True | SrcRef::False => panic!("Unexpected predicate"),
1621             _ => {
1622                 if let FoldData::Vec2(v) = self.srcs[i] {
1623                     let u = u64::from(v[0]) | (u64::from(v[1]) << 32);
1624                     f64::from_bits(u)
1625                 } else {
1626                     panic!("FoldData is not a U32");
1627                 }
1628             }
1629         }
1630     }
1631 
set_pred_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, b: bool)1632     pub fn set_pred_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, b: bool) {
1633         self.dsts[op.dst_idx(dst)] = FoldData::Pred(b);
1634     }
1635 
set_carry_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, b: bool)1636     pub fn set_carry_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, b: bool) {
1637         self.dsts[op.dst_idx(dst)] = FoldData::Carry(b);
1638     }
1639 
set_u32_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, u: u32)1640     pub fn set_u32_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, u: u32) {
1641         self.dsts[op.dst_idx(dst)] = FoldData::U32(u);
1642     }
1643 
1644     #[allow(dead_code)]
set_f32_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, f: f32)1645     pub fn set_f32_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, f: f32) {
1646         self.set_u32_dst(op, dst, f.to_bits());
1647     }
1648 
1649     #[allow(dead_code)]
set_f64_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, f: f64)1650     pub fn set_f64_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, f: f64) {
1651         let u = f.to_bits();
1652         let v = [u as u32, (u >> 32) as u32];
1653         self.dsts[op.dst_idx(dst)] = FoldData::Vec2(v);
1654     }
1655 }
1656 
1657 pub trait Foldable: SrcsAsSlice + DstsAsSlice {
1658     // Currently only used by test code
1659     #[allow(dead_code)]
fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)1660     fn fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>);
1661 }
1662 
1663 pub trait DisplayOp: DstsAsSlice {
fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1664     fn fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1665         fmt_dst_slice(f, self.dsts_as_slice())
1666     }
1667 
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1668     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result;
1669 }
1670 
1671 // Hack struct so we can re-use Formatters.  Shamelessly stolen from
1672 // https://users.rust-lang.org/t/reusing-an-fmt-formatter/8531/4
1673 pub struct Fmt<F>(pub F)
1674 where
1675     F: Fn(&mut fmt::Formatter) -> fmt::Result;
1676 
1677 impl<F> fmt::Display for Fmt<F>
1678 where
1679     F: Fn(&mut fmt::Formatter) -> fmt::Result,
1680 {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result1681     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1682         (self.0)(f)
1683     }
1684 }
1685 
1686 macro_rules! impl_display_for_op {
1687     ($op: ident) => {
1688         impl fmt::Display for $op {
1689             fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1690                 let mut s = String::new();
1691                 write!(s, "{}", Fmt(|f| self.fmt_dsts(f)))?;
1692                 if !s.is_empty() {
1693                     write!(f, "{} = ", s)?;
1694                 }
1695                 self.fmt_op(f)
1696             }
1697         }
1698     };
1699 }
1700 
1701 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1702 pub enum PredSetOp {
1703     And,
1704     Or,
1705     Xor,
1706 }
1707 
1708 impl PredSetOp {
eval(&self, a: bool, b: bool) -> bool1709     pub fn eval(&self, a: bool, b: bool) -> bool {
1710         match self {
1711             PredSetOp::And => a & b,
1712             PredSetOp::Or => a | b,
1713             PredSetOp::Xor => a ^ b,
1714         }
1715     }
1716 
is_trivial(&self, accum: &Src) -> bool1717     pub fn is_trivial(&self, accum: &Src) -> bool {
1718         if let Some(b) = accum.as_bool() {
1719             match self {
1720                 PredSetOp::And => b,
1721                 PredSetOp::Or => !b,
1722                 PredSetOp::Xor => !b,
1723             }
1724         } else {
1725             false
1726         }
1727     }
1728 }
1729 
1730 impl fmt::Display for PredSetOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1731     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1732         match self {
1733             PredSetOp::And => write!(f, ".and"),
1734             PredSetOp::Or => write!(f, ".or"),
1735             PredSetOp::Xor => write!(f, ".xor"),
1736         }
1737     }
1738 }
1739 
1740 #[allow(dead_code)]
1741 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1742 pub enum FloatCmpOp {
1743     OrdEq,
1744     OrdNe,
1745     OrdLt,
1746     OrdLe,
1747     OrdGt,
1748     OrdGe,
1749     UnordEq,
1750     UnordNe,
1751     UnordLt,
1752     UnordLe,
1753     UnordGt,
1754     UnordGe,
1755     IsNum,
1756     IsNan,
1757 }
1758 
1759 impl FloatCmpOp {
flip(self) -> FloatCmpOp1760     pub fn flip(self) -> FloatCmpOp {
1761         match self {
1762             FloatCmpOp::OrdEq | FloatCmpOp::OrdNe => self,
1763             FloatCmpOp::OrdLt => FloatCmpOp::OrdGt,
1764             FloatCmpOp::OrdLe => FloatCmpOp::OrdGe,
1765             FloatCmpOp::OrdGt => FloatCmpOp::OrdLt,
1766             FloatCmpOp::OrdGe => FloatCmpOp::OrdLe,
1767             FloatCmpOp::UnordEq | FloatCmpOp::UnordNe => self,
1768             FloatCmpOp::UnordLt => FloatCmpOp::UnordGt,
1769             FloatCmpOp::UnordLe => FloatCmpOp::UnordGe,
1770             FloatCmpOp::UnordGt => FloatCmpOp::UnordLt,
1771             FloatCmpOp::UnordGe => FloatCmpOp::UnordLe,
1772             FloatCmpOp::IsNum | FloatCmpOp::IsNan => panic!("Cannot flip unop"),
1773         }
1774     }
1775 }
1776 
1777 impl fmt::Display for FloatCmpOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1778     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1779         match self {
1780             FloatCmpOp::OrdEq => write!(f, ".eq"),
1781             FloatCmpOp::OrdNe => write!(f, ".ne"),
1782             FloatCmpOp::OrdLt => write!(f, ".lt"),
1783             FloatCmpOp::OrdLe => write!(f, ".le"),
1784             FloatCmpOp::OrdGt => write!(f, ".gt"),
1785             FloatCmpOp::OrdGe => write!(f, ".ge"),
1786             FloatCmpOp::UnordEq => write!(f, ".equ"),
1787             FloatCmpOp::UnordNe => write!(f, ".neu"),
1788             FloatCmpOp::UnordLt => write!(f, ".ltu"),
1789             FloatCmpOp::UnordLe => write!(f, ".leu"),
1790             FloatCmpOp::UnordGt => write!(f, ".gtu"),
1791             FloatCmpOp::UnordGe => write!(f, ".geu"),
1792             FloatCmpOp::IsNum => write!(f, ".num"),
1793             FloatCmpOp::IsNan => write!(f, ".nan"),
1794         }
1795     }
1796 }
1797 
1798 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1799 pub enum IntCmpOp {
1800     Eq,
1801     Ne,
1802     Lt,
1803     Le,
1804     Gt,
1805     Ge,
1806 }
1807 
1808 impl IntCmpOp {
flip(self) -> IntCmpOp1809     pub fn flip(self) -> IntCmpOp {
1810         match self {
1811             IntCmpOp::Eq | IntCmpOp::Ne => self,
1812             IntCmpOp::Lt => IntCmpOp::Gt,
1813             IntCmpOp::Le => IntCmpOp::Ge,
1814             IntCmpOp::Gt => IntCmpOp::Lt,
1815             IntCmpOp::Ge => IntCmpOp::Le,
1816         }
1817     }
1818 }
1819 
1820 impl fmt::Display for IntCmpOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1821     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1822         match self {
1823             IntCmpOp::Eq => write!(f, ".eq"),
1824             IntCmpOp::Ne => write!(f, ".ne"),
1825             IntCmpOp::Lt => write!(f, ".lt"),
1826             IntCmpOp::Le => write!(f, ".le"),
1827             IntCmpOp::Gt => write!(f, ".gt"),
1828             IntCmpOp::Ge => write!(f, ".ge"),
1829         }
1830     }
1831 }
1832 
1833 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1834 pub enum IntCmpType {
1835     U32,
1836     I32,
1837 }
1838 
1839 impl IntCmpType {
1840     #[allow(dead_code)]
is_signed(&self) -> bool1841     pub fn is_signed(&self) -> bool {
1842         match self {
1843             IntCmpType::U32 => false,
1844             IntCmpType::I32 => true,
1845         }
1846     }
1847 }
1848 
1849 impl fmt::Display for IntCmpType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1850     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1851         match self {
1852             IntCmpType::U32 => write!(f, ".u32"),
1853             IntCmpType::I32 => write!(f, ".i32"),
1854         }
1855     }
1856 }
1857 
1858 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1859 pub enum LogicOp2 {
1860     And,
1861     Or,
1862     Xor,
1863     PassB,
1864 }
1865 
1866 impl fmt::Display for LogicOp2 {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1867     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1868         match self {
1869             LogicOp2::And => write!(f, "and"),
1870             LogicOp2::Or => write!(f, "or"),
1871             LogicOp2::Xor => write!(f, "xor"),
1872             LogicOp2::PassB => write!(f, "pass_b"),
1873         }
1874     }
1875 }
1876 
1877 impl LogicOp2 {
to_lut(self) -> LogicOp31878     pub fn to_lut(self) -> LogicOp3 {
1879         match self {
1880             LogicOp2::And => LogicOp3::new_lut(&|x, y, _| x & y),
1881             LogicOp2::Or => LogicOp3::new_lut(&|x, y, _| x | y),
1882             LogicOp2::Xor => LogicOp3::new_lut(&|x, y, _| x ^ y),
1883             LogicOp2::PassB => LogicOp3::new_lut(&|_, b, _| b),
1884         }
1885     }
1886 }
1887 
1888 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1889 pub struct LogicOp3 {
1890     pub lut: u8,
1891 }
1892 
1893 impl LogicOp3 {
1894     pub const SRC_MASKS: [u8; 3] = [0xf0, 0xcc, 0xaa];
1895 
1896     #[inline]
new_lut<F: Fn(u8, u8, u8) -> u8>(f: &F) -> LogicOp31897     pub fn new_lut<F: Fn(u8, u8, u8) -> u8>(f: &F) -> LogicOp3 {
1898         LogicOp3 {
1899             lut: f(
1900                 LogicOp3::SRC_MASKS[0],
1901                 LogicOp3::SRC_MASKS[1],
1902                 LogicOp3::SRC_MASKS[2],
1903             ),
1904         }
1905     }
1906 
new_const(val: bool) -> LogicOp31907     pub fn new_const(val: bool) -> LogicOp3 {
1908         LogicOp3 {
1909             lut: if val { !0 } else { 0 },
1910         }
1911     }
1912 
src_used(&self, src_idx: usize) -> bool1913     pub fn src_used(&self, src_idx: usize) -> bool {
1914         let mask = LogicOp3::SRC_MASKS[src_idx];
1915         let shift = LogicOp3::SRC_MASKS[src_idx].trailing_zeros();
1916         self.lut & !mask != (self.lut >> shift) & !mask
1917     }
1918 
fix_src(&mut self, src_idx: usize, val: bool)1919     pub fn fix_src(&mut self, src_idx: usize, val: bool) {
1920         let mask = LogicOp3::SRC_MASKS[src_idx];
1921         let shift = LogicOp3::SRC_MASKS[src_idx].trailing_zeros();
1922         if val {
1923             let t_bits = self.lut & mask;
1924             self.lut = t_bits | (t_bits >> shift)
1925         } else {
1926             let f_bits = self.lut & !mask;
1927             self.lut = (f_bits << shift) | f_bits
1928         };
1929     }
1930 
invert_src(&mut self, src_idx: usize)1931     pub fn invert_src(&mut self, src_idx: usize) {
1932         let mask = LogicOp3::SRC_MASKS[src_idx];
1933         let shift = LogicOp3::SRC_MASKS[src_idx].trailing_zeros();
1934         let t_bits = self.lut & mask;
1935         let f_bits = self.lut & !mask;
1936         self.lut = (f_bits << shift) | (t_bits >> shift);
1937     }
1938 
eval< T: BitAnd<Output = T> + BitOr<Output = T> + Copy + Not<Output = T>, >( &self, x: T, y: T, z: T, ) -> T1939     pub fn eval<
1940         T: BitAnd<Output = T> + BitOr<Output = T> + Copy + Not<Output = T>,
1941     >(
1942         &self,
1943         x: T,
1944         y: T,
1945         z: T,
1946     ) -> T {
1947         let mut res = x & !x; // zero
1948         if (self.lut & (1 << 0)) != 0 {
1949             res = res | (!x & !y & !z);
1950         }
1951         if (self.lut & (1 << 1)) != 0 {
1952             res = res | (!x & !y & z);
1953         }
1954         if (self.lut & (1 << 2)) != 0 {
1955             res = res | (!x & y & !z);
1956         }
1957         if (self.lut & (1 << 3)) != 0 {
1958             res = res | (!x & y & z);
1959         }
1960         if (self.lut & (1 << 4)) != 0 {
1961             res = res | (x & !y & !z);
1962         }
1963         if (self.lut & (1 << 5)) != 0 {
1964             res = res | (x & !y & z);
1965         }
1966         if (self.lut & (1 << 6)) != 0 {
1967             res = res | (x & y & !z);
1968         }
1969         if (self.lut & (1 << 7)) != 0 {
1970             res = res | (x & y & z);
1971         }
1972         res
1973     }
1974 }
1975 
1976 impl fmt::Display for LogicOp3 {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1977     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1978         write!(f, "LUT[{:#x}]", self.lut)
1979     }
1980 }
1981 
1982 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1983 pub enum FloatType {
1984     F16,
1985     F32,
1986     F64,
1987 }
1988 
1989 impl FloatType {
from_bits(bytes: usize) -> FloatType1990     pub fn from_bits(bytes: usize) -> FloatType {
1991         match bytes {
1992             16 => FloatType::F16,
1993             32 => FloatType::F32,
1994             64 => FloatType::F64,
1995             _ => panic!("Invalid float type size"),
1996         }
1997     }
1998 
bits(&self) -> usize1999     pub fn bits(&self) -> usize {
2000         match self {
2001             FloatType::F16 => 16,
2002             FloatType::F32 => 32,
2003             FloatType::F64 => 64,
2004         }
2005     }
2006 }
2007 
2008 impl fmt::Display for FloatType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2009     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2010         match self {
2011             FloatType::F16 => write!(f, ".f16"),
2012             FloatType::F32 => write!(f, ".f32"),
2013             FloatType::F64 => write!(f, ".f64"),
2014         }
2015     }
2016 }
2017 
2018 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2019 pub enum FRndMode {
2020     NearestEven,
2021     NegInf,
2022     PosInf,
2023     Zero,
2024 }
2025 
2026 impl fmt::Display for FRndMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2027     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2028         match self {
2029             FRndMode::NearestEven => write!(f, ".re"),
2030             FRndMode::NegInf => write!(f, ".rm"),
2031             FRndMode::PosInf => write!(f, ".rp"),
2032             FRndMode::Zero => write!(f, ".rz"),
2033         }
2034     }
2035 }
2036 
2037 #[derive(Clone, Copy, Eq, PartialEq)]
2038 pub enum TexDim {
2039     _1D,
2040     Array1D,
2041     _2D,
2042     Array2D,
2043     _3D,
2044     Cube,
2045     ArrayCube,
2046 }
2047 
2048 impl fmt::Display for TexDim {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2049     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2050         match self {
2051             TexDim::_1D => write!(f, ".1d"),
2052             TexDim::Array1D => write!(f, ".a1d"),
2053             TexDim::_2D => write!(f, ".2d"),
2054             TexDim::Array2D => write!(f, ".a2d"),
2055             TexDim::_3D => write!(f, ".3d"),
2056             TexDim::Cube => write!(f, ".cube"),
2057             TexDim::ArrayCube => write!(f, ".acube"),
2058         }
2059     }
2060 }
2061 
2062 #[derive(Clone, Copy, Eq, PartialEq)]
2063 pub enum TexLodMode {
2064     Auto,
2065     Zero,
2066     Bias,
2067     Lod,
2068     Clamp,
2069     BiasClamp,
2070 }
2071 
2072 impl fmt::Display for TexLodMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2073     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2074         match self {
2075             TexLodMode::Auto => write!(f, "la"),
2076             TexLodMode::Zero => write!(f, "lz"),
2077             TexLodMode::Bias => write!(f, "lb"),
2078             TexLodMode::Lod => write!(f, "ll"),
2079             TexLodMode::Clamp => write!(f, "lc"),
2080             TexLodMode::BiasClamp => write!(f, "lb.lc"),
2081         }
2082     }
2083 }
2084 
2085 #[derive(Clone, Copy, Eq, PartialEq)]
2086 pub enum Tld4OffsetMode {
2087     None,
2088     AddOffI,
2089     PerPx,
2090 }
2091 
2092 impl fmt::Display for Tld4OffsetMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2093     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2094         match self {
2095             Tld4OffsetMode::None => write!(f, "no_off"),
2096             Tld4OffsetMode::AddOffI => write!(f, "aoffi"),
2097             Tld4OffsetMode::PerPx => write!(f, "ptp"),
2098         }
2099     }
2100 }
2101 
2102 #[allow(dead_code)]
2103 #[derive(Clone, Copy, Eq, PartialEq)]
2104 pub enum TexQuery {
2105     Dimension,
2106     TextureType,
2107     SamplerPos,
2108 }
2109 
2110 impl fmt::Display for TexQuery {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2111     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2112         match self {
2113             TexQuery::Dimension => write!(f, "dimension"),
2114             TexQuery::TextureType => write!(f, "texture_type"),
2115             TexQuery::SamplerPos => write!(f, "sampler_pos"),
2116         }
2117     }
2118 }
2119 
2120 #[derive(Clone, Copy, Eq, PartialEq)]
2121 pub enum ImageDim {
2122     _1D,
2123     _1DBuffer,
2124     _1DArray,
2125     _2D,
2126     _2DArray,
2127     _3D,
2128 }
2129 
2130 impl ImageDim {
coord_comps(&self) -> u82131     pub fn coord_comps(&self) -> u8 {
2132         match self {
2133             ImageDim::_1D => 1,
2134             ImageDim::_1DBuffer => 1,
2135             ImageDim::_1DArray => 2,
2136             ImageDim::_2D => 2,
2137             ImageDim::_2DArray => 3,
2138             ImageDim::_3D => 3,
2139         }
2140     }
2141 }
2142 
2143 impl fmt::Display for ImageDim {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2144     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2145         match self {
2146             ImageDim::_1D => write!(f, ".1d"),
2147             ImageDim::_1DBuffer => write!(f, ".buf"),
2148             ImageDim::_1DArray => write!(f, ".a1d"),
2149             ImageDim::_2D => write!(f, ".2d"),
2150             ImageDim::_2DArray => write!(f, ".a2d"),
2151             ImageDim::_3D => write!(f, ".3d"),
2152         }
2153     }
2154 }
2155 
2156 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
2157 pub enum IntType {
2158     U8,
2159     I8,
2160     U16,
2161     I16,
2162     U32,
2163     I32,
2164     U64,
2165     I64,
2166 }
2167 
2168 impl IntType {
from_bits(bits: usize, is_signed: bool) -> IntType2169     pub fn from_bits(bits: usize, is_signed: bool) -> IntType {
2170         match bits {
2171             8 => {
2172                 if is_signed {
2173                     IntType::I8
2174                 } else {
2175                     IntType::U8
2176                 }
2177             }
2178             16 => {
2179                 if is_signed {
2180                     IntType::I16
2181                 } else {
2182                     IntType::U16
2183                 }
2184             }
2185             32 => {
2186                 if is_signed {
2187                     IntType::I32
2188                 } else {
2189                     IntType::U32
2190                 }
2191             }
2192             64 => {
2193                 if is_signed {
2194                     IntType::I64
2195                 } else {
2196                     IntType::U64
2197                 }
2198             }
2199             _ => panic!("Invalid integer type size"),
2200         }
2201     }
2202 
is_signed(&self) -> bool2203     pub fn is_signed(&self) -> bool {
2204         match self {
2205             IntType::U8 | IntType::U16 | IntType::U32 | IntType::U64 => false,
2206             IntType::I8 | IntType::I16 | IntType::I32 | IntType::I64 => true,
2207         }
2208     }
2209 
bits(&self) -> usize2210     pub fn bits(&self) -> usize {
2211         match self {
2212             IntType::U8 | IntType::I8 => 8,
2213             IntType::U16 | IntType::I16 => 16,
2214             IntType::U32 | IntType::I32 => 32,
2215             IntType::U64 | IntType::I64 => 64,
2216         }
2217     }
2218 }
2219 
2220 impl fmt::Display for IntType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2221     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2222         match self {
2223             IntType::U8 => write!(f, ".u8"),
2224             IntType::I8 => write!(f, ".i8"),
2225             IntType::U16 => write!(f, ".u16"),
2226             IntType::I16 => write!(f, ".i16"),
2227             IntType::U32 => write!(f, ".u32"),
2228             IntType::I32 => write!(f, ".i32"),
2229             IntType::U64 => write!(f, ".u64"),
2230             IntType::I64 => write!(f, ".i64"),
2231         }
2232     }
2233 }
2234 
2235 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2236 pub enum MemAddrType {
2237     A32,
2238     A64,
2239 }
2240 
2241 impl fmt::Display for MemAddrType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2242     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2243         match self {
2244             MemAddrType::A32 => write!(f, ".a32"),
2245             MemAddrType::A64 => write!(f, ".a64"),
2246         }
2247     }
2248 }
2249 
2250 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2251 pub enum MemType {
2252     U8,
2253     I8,
2254     U16,
2255     I16,
2256     B32,
2257     B64,
2258     B128,
2259 }
2260 
2261 impl MemType {
from_size(size: u8, is_signed: bool) -> MemType2262     pub fn from_size(size: u8, is_signed: bool) -> MemType {
2263         match size {
2264             1 => {
2265                 if is_signed {
2266                     MemType::I8
2267                 } else {
2268                     MemType::U8
2269                 }
2270             }
2271             2 => {
2272                 if is_signed {
2273                     MemType::I16
2274                 } else {
2275                     MemType::U16
2276                 }
2277             }
2278             4 => MemType::B32,
2279             8 => MemType::B64,
2280             16 => MemType::B128,
2281             _ => panic!("Invalid memory load/store size"),
2282         }
2283     }
2284 
2285     #[allow(dead_code)]
bits(&self) -> usize2286     pub fn bits(&self) -> usize {
2287         match self {
2288             MemType::U8 | MemType::I8 => 8,
2289             MemType::U16 | MemType::I16 => 16,
2290             MemType::B32 => 32,
2291             MemType::B64 => 64,
2292             MemType::B128 => 128,
2293         }
2294     }
2295 }
2296 
2297 impl fmt::Display for MemType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2298     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2299         match self {
2300             MemType::U8 => write!(f, ".u8"),
2301             MemType::I8 => write!(f, ".i8"),
2302             MemType::U16 => write!(f, ".u16"),
2303             MemType::I16 => write!(f, ".i16"),
2304             MemType::B32 => write!(f, ".b32"),
2305             MemType::B64 => write!(f, ".b64"),
2306             MemType::B128 => write!(f, ".b128"),
2307         }
2308     }
2309 }
2310 
2311 #[allow(dead_code)]
2312 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2313 pub enum MemOrder {
2314     Constant,
2315     Weak,
2316     Strong(MemScope),
2317 }
2318 
2319 impl fmt::Display for MemOrder {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2320     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2321         match self {
2322             MemOrder::Constant => write!(f, ".constant"),
2323             MemOrder::Weak => write!(f, ".weak"),
2324             MemOrder::Strong(scope) => write!(f, ".strong{}", scope),
2325         }
2326     }
2327 }
2328 
2329 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2330 pub enum MemScope {
2331     CTA,
2332     GPU,
2333     System,
2334 }
2335 
2336 impl fmt::Display for MemScope {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2337     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2338         match self {
2339             MemScope::CTA => write!(f, ".cta"),
2340             MemScope::GPU => write!(f, ".gpu"),
2341             MemScope::System => write!(f, ".sys"),
2342         }
2343     }
2344 }
2345 
2346 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2347 pub enum MemSpace {
2348     Global(MemAddrType),
2349     Local,
2350     Shared,
2351 }
2352 
2353 impl MemSpace {
addr_type(&self) -> MemAddrType2354     pub fn addr_type(&self) -> MemAddrType {
2355         match self {
2356             MemSpace::Global(t) => *t,
2357             MemSpace::Local => MemAddrType::A32,
2358             MemSpace::Shared => MemAddrType::A32,
2359         }
2360     }
2361 }
2362 
2363 impl fmt::Display for MemSpace {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2364     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2365         match self {
2366             MemSpace::Global(t) => write!(f, ".global{t}"),
2367             MemSpace::Local => write!(f, ".local"),
2368             MemSpace::Shared => write!(f, ".shared"),
2369         }
2370     }
2371 }
2372 
2373 #[allow(dead_code)]
2374 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2375 pub enum MemEvictionPriority {
2376     First,
2377     Normal,
2378     Last,
2379     Unchanged,
2380 }
2381 
2382 impl fmt::Display for MemEvictionPriority {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2383     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2384         match self {
2385             MemEvictionPriority::First => write!(f, ".ef"),
2386             MemEvictionPriority::Normal => Ok(()),
2387             MemEvictionPriority::Last => write!(f, ".el"),
2388             MemEvictionPriority::Unchanged => write!(f, ".lu"),
2389         }
2390     }
2391 }
2392 
2393 #[derive(Clone)]
2394 pub struct MemAccess {
2395     pub mem_type: MemType,
2396     pub space: MemSpace,
2397     pub order: MemOrder,
2398     pub eviction_priority: MemEvictionPriority,
2399 }
2400 
2401 impl fmt::Display for MemAccess {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2402     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2403         write!(
2404             f,
2405             "{}{}{}{}",
2406             self.space, self.order, self.eviction_priority, self.mem_type,
2407         )
2408     }
2409 }
2410 
2411 #[allow(dead_code)]
2412 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2413 pub enum AtomType {
2414     F16x2,
2415     U32,
2416     I32,
2417     F32,
2418     U64,
2419     I64,
2420     F64,
2421 }
2422 
2423 impl AtomType {
F(bits: u8) -> AtomType2424     pub fn F(bits: u8) -> AtomType {
2425         match bits {
2426             16 => panic!("16-bit float atomics not yet supported"),
2427             32 => AtomType::F32,
2428             64 => AtomType::F64,
2429             _ => panic!("Invalid float atomic type"),
2430         }
2431     }
2432 
U(bits: u8) -> AtomType2433     pub fn U(bits: u8) -> AtomType {
2434         match bits {
2435             32 => AtomType::U32,
2436             64 => AtomType::U64,
2437             _ => panic!("Invalid uint atomic type"),
2438         }
2439     }
2440 
I(bits: u8) -> AtomType2441     pub fn I(bits: u8) -> AtomType {
2442         match bits {
2443             32 => AtomType::I32,
2444             64 => AtomType::I64,
2445             _ => panic!("Invalid int atomic type"),
2446         }
2447     }
2448 
bits(&self) -> usize2449     pub fn bits(&self) -> usize {
2450         match self {
2451             AtomType::F16x2 | AtomType::F32 => 32,
2452             AtomType::U32 | AtomType::I32 => 32,
2453             AtomType::U64 | AtomType::I64 | AtomType::F64 => 64,
2454         }
2455     }
2456 }
2457 
2458 impl fmt::Display for AtomType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2459     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2460         match self {
2461             AtomType::F16x2 => write!(f, ".f16x2"),
2462             AtomType::U32 => write!(f, ".u32"),
2463             AtomType::I32 => write!(f, ".i32"),
2464             AtomType::F32 => write!(f, ".f32"),
2465             AtomType::U64 => write!(f, ".u64"),
2466             AtomType::I64 => write!(f, ".i64"),
2467             AtomType::F64 => write!(f, ".f64"),
2468         }
2469     }
2470 }
2471 
2472 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2473 pub enum AtomCmpSrc {
2474     /// The cmpr value is passed as a separate source
2475     Separate,
2476     /// The cmpr value is packed in with the data with cmpr coming first
2477     Packed,
2478 }
2479 
2480 #[allow(dead_code)]
2481 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2482 pub enum AtomOp {
2483     Add,
2484     Min,
2485     Max,
2486     Inc,
2487     Dec,
2488     And,
2489     Or,
2490     Xor,
2491     Exch,
2492     CmpExch(AtomCmpSrc),
2493 }
2494 
2495 impl AtomOp {
is_reduction(&self) -> bool2496     pub fn is_reduction(&self) -> bool {
2497         match self {
2498             AtomOp::Add
2499             | AtomOp::Min
2500             | AtomOp::Max
2501             | AtomOp::Inc
2502             | AtomOp::Dec
2503             | AtomOp::And
2504             | AtomOp::Or
2505             | AtomOp::Xor => true,
2506             AtomOp::Exch | AtomOp::CmpExch(_) => false,
2507         }
2508     }
2509 }
2510 
2511 impl fmt::Display for AtomOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2512     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2513         match self {
2514             AtomOp::Add => write!(f, ".add"),
2515             AtomOp::Min => write!(f, ".min"),
2516             AtomOp::Max => write!(f, ".max"),
2517             AtomOp::Inc => write!(f, ".inc"),
2518             AtomOp::Dec => write!(f, ".dec"),
2519             AtomOp::And => write!(f, ".and"),
2520             AtomOp::Or => write!(f, ".or"),
2521             AtomOp::Xor => write!(f, ".xor"),
2522             AtomOp::Exch => write!(f, ".exch"),
2523             AtomOp::CmpExch(AtomCmpSrc::Separate) => write!(f, ".cmpexch"),
2524             AtomOp::CmpExch(AtomCmpSrc::Packed) => write!(f, ".cmpexch.packed"),
2525         }
2526     }
2527 }
2528 
2529 #[derive(Clone, Copy, Eq, PartialEq)]
2530 pub enum InterpFreq {
2531     Pass,
2532     PassMulW,
2533     Constant,
2534     State,
2535 }
2536 
2537 impl fmt::Display for InterpFreq {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2538     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2539         match self {
2540             InterpFreq::Pass => write!(f, ".pass"),
2541             InterpFreq::PassMulW => write!(f, ".pass_mul_w"),
2542             InterpFreq::Constant => write!(f, ".constant"),
2543             InterpFreq::State => write!(f, ".state"),
2544         }
2545     }
2546 }
2547 #[derive(Clone, Copy, Eq, PartialEq)]
2548 pub enum InterpLoc {
2549     Default,
2550     Centroid,
2551     Offset,
2552 }
2553 
2554 impl fmt::Display for InterpLoc {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2555     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2556         match self {
2557             InterpLoc::Default => Ok(()),
2558             InterpLoc::Centroid => write!(f, ".centroid"),
2559             InterpLoc::Offset => write!(f, ".offset"),
2560         }
2561     }
2562 }
2563 
2564 pub struct AttrAccess {
2565     pub addr: u16,
2566     pub comps: u8,
2567     pub patch: bool,
2568     pub output: bool,
2569     pub phys: bool,
2570 }
2571 
2572 #[repr(C)]
2573 #[derive(SrcsAsSlice, DstsAsSlice)]
2574 pub struct OpFAdd {
2575     #[dst_type(F32)]
2576     pub dst: Dst,
2577 
2578     #[src_type(F32)]
2579     pub srcs: [Src; 2],
2580 
2581     pub saturate: bool,
2582     pub rnd_mode: FRndMode,
2583     pub ftz: bool,
2584 }
2585 
2586 impl DisplayOp for OpFAdd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2587     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2588         let sat = if self.saturate { ".sat" } else { "" };
2589         write!(f, "fadd{sat}")?;
2590         if self.rnd_mode != FRndMode::NearestEven {
2591             write!(f, "{}", self.rnd_mode)?;
2592         }
2593         if self.ftz {
2594             write!(f, ".ftz")?;
2595         }
2596         write!(f, " {} {}", self.srcs[0], self.srcs[1],)
2597     }
2598 }
2599 impl_display_for_op!(OpFAdd);
2600 
2601 #[repr(C)]
2602 #[derive(SrcsAsSlice, DstsAsSlice)]
2603 pub struct OpFFma {
2604     #[dst_type(F32)]
2605     pub dst: Dst,
2606 
2607     #[src_type(F32)]
2608     pub srcs: [Src; 3],
2609 
2610     pub saturate: bool,
2611     pub rnd_mode: FRndMode,
2612     pub ftz: bool,
2613     pub dnz: bool,
2614 }
2615 
2616 impl DisplayOp for OpFFma {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2617     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2618         let sat = if self.saturate { ".sat" } else { "" };
2619         write!(f, "ffma{sat}")?;
2620         if self.rnd_mode != FRndMode::NearestEven {
2621             write!(f, "{}", self.rnd_mode)?;
2622         }
2623         if self.dnz {
2624             write!(f, ".dnz")?;
2625         } else if self.ftz {
2626             write!(f, ".ftz")?;
2627         }
2628         write!(f, " {} {} {}", self.srcs[0], self.srcs[1], self.srcs[2])
2629     }
2630 }
2631 impl_display_for_op!(OpFFma);
2632 
2633 #[repr(C)]
2634 #[derive(SrcsAsSlice, DstsAsSlice)]
2635 pub struct OpFMnMx {
2636     #[dst_type(F32)]
2637     pub dst: Dst,
2638 
2639     #[src_type(F32)]
2640     pub srcs: [Src; 2],
2641 
2642     #[src_type(Pred)]
2643     pub min: Src,
2644 
2645     pub ftz: bool,
2646 }
2647 
2648 impl DisplayOp for OpFMnMx {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2649     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2650         let ftz = if self.ftz { ".ftz" } else { "" };
2651         write!(
2652             f,
2653             "fmnmx{ftz} {} {} {}",
2654             self.srcs[0], self.srcs[1], self.min
2655         )
2656     }
2657 }
2658 impl_display_for_op!(OpFMnMx);
2659 
2660 #[repr(C)]
2661 #[derive(SrcsAsSlice, DstsAsSlice)]
2662 pub struct OpFMul {
2663     #[dst_type(F32)]
2664     pub dst: Dst,
2665 
2666     #[src_type(F32)]
2667     pub srcs: [Src; 2],
2668 
2669     pub saturate: bool,
2670     pub rnd_mode: FRndMode,
2671     pub ftz: bool,
2672     pub dnz: bool,
2673 }
2674 
2675 impl DisplayOp for OpFMul {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2676     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2677         let sat = if self.saturate { ".sat" } else { "" };
2678         write!(f, "fmul{sat}")?;
2679         if self.rnd_mode != FRndMode::NearestEven {
2680             write!(f, "{}", self.rnd_mode)?;
2681         }
2682         if self.dnz {
2683             write!(f, ".dnz")?;
2684         } else if self.ftz {
2685             write!(f, ".ftz")?;
2686         }
2687         write!(f, " {} {}", self.srcs[0], self.srcs[1],)
2688     }
2689 }
2690 impl_display_for_op!(OpFMul);
2691 
2692 #[repr(C)]
2693 #[derive(SrcsAsSlice, DstsAsSlice)]
2694 pub struct OpFSet {
2695     #[dst_type(F32)]
2696     pub dst: Dst,
2697 
2698     pub cmp_op: FloatCmpOp,
2699 
2700     #[src_type(F32)]
2701     pub srcs: [Src; 2],
2702 
2703     pub ftz: bool,
2704 }
2705 
2706 impl DisplayOp for OpFSet {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2707     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2708         let ftz = if self.ftz { ".ftz" } else { "" };
2709         write!(
2710             f,
2711             "fset{}{ftz} {} {}",
2712             self.cmp_op, self.srcs[0], self.srcs[1]
2713         )
2714     }
2715 }
2716 impl_display_for_op!(OpFSet);
2717 
2718 #[repr(C)]
2719 #[derive(SrcsAsSlice, DstsAsSlice)]
2720 pub struct OpFSetP {
2721     #[dst_type(Pred)]
2722     pub dst: Dst,
2723 
2724     pub set_op: PredSetOp,
2725     pub cmp_op: FloatCmpOp,
2726 
2727     #[src_type(F32)]
2728     pub srcs: [Src; 2],
2729 
2730     #[src_type(Pred)]
2731     pub accum: Src,
2732 
2733     pub ftz: bool,
2734 }
2735 
2736 impl DisplayOp for OpFSetP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2737     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2738         let ftz = if self.ftz { ".ftz" } else { "" };
2739         write!(f, "fsetp{}{ftz}", self.cmp_op)?;
2740         if !self.set_op.is_trivial(&self.accum) {
2741             write!(f, "{}", self.set_op)?;
2742         }
2743         write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
2744         if !self.set_op.is_trivial(&self.accum) {
2745             write!(f, " {}", self.accum)?;
2746         }
2747         Ok(())
2748     }
2749 }
2750 impl_display_for_op!(OpFSetP);
2751 
2752 #[allow(dead_code)]
2753 #[derive(Clone, Copy, Eq, PartialEq)]
2754 pub enum FSwzAddOp {
2755     Add,
2756     SubRight,
2757     SubLeft,
2758     MoveLeft,
2759 }
2760 
2761 impl fmt::Display for FSwzAddOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2762     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2763         match self {
2764             FSwzAddOp::Add => write!(f, "add"),
2765             FSwzAddOp::SubRight => write!(f, "subr"),
2766             FSwzAddOp::SubLeft => write!(f, "sub"),
2767             FSwzAddOp::MoveLeft => write!(f, "mov2"),
2768         }
2769     }
2770 }
2771 
2772 #[repr(C)]
2773 #[derive(SrcsAsSlice, DstsAsSlice)]
2774 pub struct OpFSwzAdd {
2775     #[dst_type(F32)]
2776     pub dst: Dst,
2777 
2778     #[src_type(GPR)]
2779     pub srcs: [Src; 2],
2780 
2781     pub rnd_mode: FRndMode,
2782     pub ftz: bool,
2783 
2784     pub ops: [FSwzAddOp; 4],
2785 }
2786 
2787 impl DisplayOp for OpFSwzAdd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2788     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2789         write!(f, "fswzadd",)?;
2790         if self.rnd_mode != FRndMode::NearestEven {
2791             write!(f, "{}", self.rnd_mode)?;
2792         }
2793         if self.ftz {
2794             write!(f, ".ftz")?;
2795         }
2796         write!(
2797             f,
2798             " {} {} [{}, {}, {}, {}]",
2799             self.srcs[0],
2800             self.srcs[1],
2801             self.ops[0],
2802             self.ops[1],
2803             self.ops[2],
2804             self.ops[3],
2805         )
2806     }
2807 }
2808 impl_display_for_op!(OpFSwzAdd);
2809 
2810 pub enum RroOp {
2811     SinCos,
2812     Exp2,
2813 }
2814 
2815 impl fmt::Display for RroOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2816     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2817         match self {
2818             RroOp::SinCos => write!(f, ".sincos"),
2819             RroOp::Exp2 => write!(f, ".exp2"),
2820         }
2821     }
2822 }
2823 
2824 /// MuFu range reduction operator
2825 ///
2826 /// Not available on SM70+
2827 #[repr(C)]
2828 #[derive(SrcsAsSlice, DstsAsSlice)]
2829 pub struct OpRro {
2830     #[dst_type(F32)]
2831     pub dst: Dst,
2832 
2833     pub op: RroOp,
2834 
2835     #[src_type(F32)]
2836     pub src: Src,
2837 }
2838 
2839 impl DisplayOp for OpRro {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2840     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2841         write!(f, "rro{} {}", self.op, self.src)
2842     }
2843 }
2844 impl_display_for_op!(OpRro);
2845 
2846 #[allow(dead_code)]
2847 #[derive(Clone, Copy, Eq, PartialEq)]
2848 pub enum MuFuOp {
2849     Cos,
2850     Sin,
2851     Exp2,
2852     Log2,
2853     Rcp,
2854     Rsq,
2855     Rcp64H,
2856     Rsq64H,
2857     Sqrt,
2858     Tanh,
2859 }
2860 
2861 impl fmt::Display for MuFuOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2862     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2863         match self {
2864             MuFuOp::Cos => write!(f, "cos"),
2865             MuFuOp::Sin => write!(f, "sin"),
2866             MuFuOp::Exp2 => write!(f, "exp2"),
2867             MuFuOp::Log2 => write!(f, "log2"),
2868             MuFuOp::Rcp => write!(f, "rcp"),
2869             MuFuOp::Rsq => write!(f, "rsq"),
2870             MuFuOp::Rcp64H => write!(f, "rcp64h"),
2871             MuFuOp::Rsq64H => write!(f, "rsq64h"),
2872             MuFuOp::Sqrt => write!(f, "sqrt"),
2873             MuFuOp::Tanh => write!(f, "tanh"),
2874         }
2875     }
2876 }
2877 
2878 #[repr(C)]
2879 #[derive(SrcsAsSlice, DstsAsSlice)]
2880 pub struct OpMuFu {
2881     #[dst_type(F32)]
2882     pub dst: Dst,
2883 
2884     pub op: MuFuOp,
2885 
2886     #[src_type(F32)]
2887     pub src: Src,
2888 }
2889 
2890 impl DisplayOp for OpMuFu {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2891     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2892         write!(f, "mufu.{} {}", self.op, self.src)
2893     }
2894 }
2895 impl_display_for_op!(OpMuFu);
2896 
2897 #[repr(C)]
2898 #[derive(SrcsAsSlice, DstsAsSlice)]
2899 pub struct OpDAdd {
2900     #[dst_type(F64)]
2901     pub dst: Dst,
2902 
2903     #[src_type(F64)]
2904     pub srcs: [Src; 2],
2905 
2906     pub rnd_mode: FRndMode,
2907 }
2908 
2909 impl DisplayOp for OpDAdd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2910     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2911         write!(f, "dadd")?;
2912         if self.rnd_mode != FRndMode::NearestEven {
2913             write!(f, "{}", self.rnd_mode)?;
2914         }
2915         write!(f, " {} {}", self.srcs[0], self.srcs[1],)
2916     }
2917 }
2918 impl_display_for_op!(OpDAdd);
2919 
2920 #[repr(C)]
2921 #[derive(SrcsAsSlice, DstsAsSlice)]
2922 pub struct OpDMul {
2923     #[dst_type(F64)]
2924     pub dst: Dst,
2925 
2926     #[src_type(F64)]
2927     pub srcs: [Src; 2],
2928 
2929     pub rnd_mode: FRndMode,
2930 }
2931 
2932 impl DisplayOp for OpDMul {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2933     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2934         write!(f, "dmul")?;
2935         if self.rnd_mode != FRndMode::NearestEven {
2936             write!(f, "{}", self.rnd_mode)?;
2937         }
2938         write!(f, " {} {}", self.srcs[0], self.srcs[1],)
2939     }
2940 }
2941 impl_display_for_op!(OpDMul);
2942 
2943 #[repr(C)]
2944 #[derive(SrcsAsSlice, DstsAsSlice)]
2945 pub struct OpDFma {
2946     #[dst_type(F64)]
2947     pub dst: Dst,
2948 
2949     #[src_type(F64)]
2950     pub srcs: [Src; 3],
2951 
2952     pub rnd_mode: FRndMode,
2953 }
2954 
2955 impl DisplayOp for OpDFma {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2956     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2957         write!(f, "dfma")?;
2958         if self.rnd_mode != FRndMode::NearestEven {
2959             write!(f, "{}", self.rnd_mode)?;
2960         }
2961         write!(f, " {} {} {}", self.srcs[0], self.srcs[1], self.srcs[2])
2962     }
2963 }
2964 impl_display_for_op!(OpDFma);
2965 
2966 #[repr(C)]
2967 #[derive(SrcsAsSlice, DstsAsSlice)]
2968 pub struct OpDMnMx {
2969     #[dst_type(F64)]
2970     pub dst: Dst,
2971 
2972     #[src_type(F64)]
2973     pub srcs: [Src; 2],
2974 
2975     #[src_type(Pred)]
2976     pub min: Src,
2977 }
2978 
2979 impl DisplayOp for OpDMnMx {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2980     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2981         write!(f, "dmnmx {} {} {}", self.srcs[0], self.srcs[1], self.min)
2982     }
2983 }
2984 impl_display_for_op!(OpDMnMx);
2985 
2986 #[repr(C)]
2987 #[derive(SrcsAsSlice, DstsAsSlice)]
2988 pub struct OpDSetP {
2989     #[dst_type(Pred)]
2990     pub dst: Dst,
2991 
2992     pub set_op: PredSetOp,
2993     pub cmp_op: FloatCmpOp,
2994 
2995     #[src_type(F64)]
2996     pub srcs: [Src; 2],
2997 
2998     #[src_type(Pred)]
2999     pub accum: Src,
3000 }
3001 
3002 impl DisplayOp for OpDSetP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3003     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3004         write!(f, "dsetp{}", self.cmp_op)?;
3005         if !self.set_op.is_trivial(&self.accum) {
3006             write!(f, "{}", self.set_op)?;
3007         }
3008         write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
3009         if !self.set_op.is_trivial(&self.accum) {
3010             write!(f, " {}", self.accum)?;
3011         }
3012         Ok(())
3013     }
3014 }
3015 impl_display_for_op!(OpDSetP);
3016 
3017 #[repr(C)]
3018 #[derive(SrcsAsSlice, DstsAsSlice)]
3019 pub struct OpHAdd2 {
3020     #[dst_type(F16v2)]
3021     pub dst: Dst,
3022 
3023     #[src_type(F16v2)]
3024     pub srcs: [Src; 2],
3025 
3026     pub saturate: bool,
3027     pub ftz: bool,
3028     pub f32: bool,
3029 }
3030 
3031 impl DisplayOp for OpHAdd2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3032     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3033         let sat = if self.saturate { ".sat" } else { "" };
3034         let f32 = if self.f32 { ".f32" } else { "" };
3035         write!(f, "hadd2{sat}{f32}")?;
3036         if self.ftz {
3037             write!(f, ".ftz")?;
3038         }
3039         write!(f, " {} {}", self.srcs[0], self.srcs[1])
3040     }
3041 }
3042 impl_display_for_op!(OpHAdd2);
3043 
3044 #[repr(C)]
3045 #[derive(SrcsAsSlice, DstsAsSlice)]
3046 pub struct OpHSet2 {
3047     #[dst_type(F16v2)]
3048     pub dst: Dst,
3049 
3050     pub set_op: PredSetOp,
3051     pub cmp_op: FloatCmpOp,
3052 
3053     #[src_type(F16v2)]
3054     pub srcs: [Src; 2],
3055 
3056     #[src_type(Pred)]
3057     pub accum: Src,
3058 
3059     pub ftz: bool,
3060 }
3061 
3062 impl DisplayOp for OpHSet2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3063     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3064         let ftz = if self.ftz { ".ftz" } else { "" };
3065         write!(f, "hset2{}{ftz}", self.cmp_op)?;
3066         if !self.set_op.is_trivial(&self.accum) {
3067             write!(f, "{}", self.set_op)?;
3068         }
3069         write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
3070         if !self.set_op.is_trivial(&self.accum) {
3071             write!(f, " {}", self.accum)?;
3072         }
3073         Ok(())
3074     }
3075 }
3076 impl_display_for_op!(OpHSet2);
3077 
3078 #[repr(C)]
3079 #[derive(SrcsAsSlice, DstsAsSlice)]
3080 pub struct OpHSetP2 {
3081     #[dst_type(Pred)]
3082     pub dsts: [Dst; 2],
3083 
3084     pub set_op: PredSetOp,
3085     pub cmp_op: FloatCmpOp,
3086 
3087     #[src_type(F16v2)]
3088     pub srcs: [Src; 2],
3089 
3090     #[src_type(Pred)]
3091     pub accum: Src,
3092 
3093     pub ftz: bool,
3094 
3095     // When not set, each dsts get the result of each lanes.
3096     // When set, the first dst gets the result of both lanes (res0 && res1)
3097     // and the second dst gets the negation !(res0 && res1)
3098     // before applying the accumulator.
3099     pub horizontal: bool,
3100 }
3101 
3102 impl DisplayOp for OpHSetP2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3103     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3104         let ftz = if self.ftz { ".ftz" } else { "" };
3105         write!(f, "hsetp2{}{ftz}", self.cmp_op)?;
3106         if !self.set_op.is_trivial(&self.accum) {
3107             write!(f, "{}", self.set_op)?;
3108         }
3109         write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
3110         if !self.set_op.is_trivial(&self.accum) {
3111             write!(f, " {}", self.accum)?;
3112         }
3113         Ok(())
3114     }
3115 }
3116 impl_display_for_op!(OpHSetP2);
3117 
3118 #[repr(C)]
3119 #[derive(SrcsAsSlice, DstsAsSlice)]
3120 pub struct OpHMul2 {
3121     #[dst_type(F16v2)]
3122     pub dst: Dst,
3123 
3124     #[src_type(F16v2)]
3125     pub srcs: [Src; 2],
3126 
3127     pub saturate: bool,
3128     pub ftz: bool,
3129     pub dnz: bool,
3130 }
3131 
3132 impl DisplayOp for OpHMul2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3133     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3134         let sat = if self.saturate { ".sat" } else { "" };
3135         write!(f, "hmul2{sat}")?;
3136         if self.dnz {
3137             write!(f, ".dnz")?;
3138         } else if self.ftz {
3139             write!(f, ".ftz")?;
3140         }
3141         write!(f, " {} {}", self.srcs[0], self.srcs[1])
3142     }
3143 }
3144 impl_display_for_op!(OpHMul2);
3145 
3146 #[repr(C)]
3147 #[derive(SrcsAsSlice, DstsAsSlice)]
3148 pub struct OpHFma2 {
3149     #[dst_type(F16v2)]
3150     pub dst: Dst,
3151 
3152     #[src_type(F16v2)]
3153     pub srcs: [Src; 3],
3154 
3155     pub saturate: bool,
3156     pub ftz: bool,
3157     pub dnz: bool,
3158     pub f32: bool,
3159 }
3160 
3161 impl DisplayOp for OpHFma2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3162     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3163         let sat = if self.saturate { ".sat" } else { "" };
3164         let f32 = if self.f32 { ".f32" } else { "" };
3165         write!(f, "hfma2{sat}{f32}")?;
3166         if self.dnz {
3167             write!(f, ".dnz")?;
3168         } else if self.ftz {
3169             write!(f, ".ftz")?;
3170         }
3171         write!(f, " {} {} {}", self.srcs[0], self.srcs[1], self.srcs[2])
3172     }
3173 }
3174 impl_display_for_op!(OpHFma2);
3175 
3176 #[repr(C)]
3177 #[derive(SrcsAsSlice, DstsAsSlice)]
3178 pub struct OpHMnMx2 {
3179     #[dst_type(F16v2)]
3180     pub dst: Dst,
3181 
3182     #[src_type(F16v2)]
3183     pub srcs: [Src; 2],
3184 
3185     #[src_type(Pred)]
3186     pub min: Src,
3187 
3188     pub ftz: bool,
3189 }
3190 
3191 impl DisplayOp for OpHMnMx2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3192     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3193         let ftz = if self.ftz { ".ftz" } else { "" };
3194         write!(
3195             f,
3196             "hmnmx2{ftz} {} {} {}",
3197             self.srcs[0], self.srcs[1], self.min
3198         )
3199     }
3200 }
3201 impl_display_for_op!(OpHMnMx2);
3202 
3203 #[repr(C)]
3204 #[derive(SrcsAsSlice, DstsAsSlice)]
3205 pub struct OpBMsk {
3206     #[dst_type(GPR)]
3207     pub dst: Dst,
3208 
3209     #[src_type(ALU)]
3210     pub pos: Src,
3211 
3212     #[src_type(ALU)]
3213     pub width: Src,
3214 
3215     pub wrap: bool,
3216 }
3217 
3218 impl DisplayOp for OpBMsk {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3219     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3220         let wrap = if self.wrap { ".wrap" } else { ".clamp" };
3221         write!(f, "bmsk{} {} {}", wrap, self.pos, self.width)
3222     }
3223 }
3224 impl_display_for_op!(OpBMsk);
3225 
3226 #[repr(C)]
3227 #[derive(SrcsAsSlice, DstsAsSlice)]
3228 pub struct OpBRev {
3229     #[dst_type(GPR)]
3230     pub dst: Dst,
3231 
3232     #[src_type(ALU)]
3233     pub src: Src,
3234 }
3235 
3236 impl DisplayOp for OpBRev {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3237     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3238         write!(f, "brev {}", self.src)
3239     }
3240 }
3241 impl_display_for_op!(OpBRev);
3242 
3243 /// Bitfield extract. Extracts all bits from `base` starting at `offset` into
3244 /// `dst`.
3245 #[repr(C)]
3246 #[derive(SrcsAsSlice, DstsAsSlice)]
3247 pub struct OpBfe {
3248     /// Where to insert the bits.
3249     #[dst_type(GPR)]
3250     pub dst: Dst,
3251 
3252     /// The source of bits to extract.
3253     #[src_type(ALU)]
3254     pub base: Src,
3255 
3256     /// The range of bits to extract. This source is interpreted as four
3257     /// separate bytes, [b0, b1, b2, b3].
3258     ///
3259     /// b0 and b1: unused
3260     /// b2: the number of bits to extract.
3261     /// b3: the offset of the first bit to extract.
3262     ///
3263     /// This matches the way the hardware works.
3264     #[src_type(ALU)]
3265     pub range: Src,
3266 
3267     /// Whether the output is signed
3268     pub signed: bool,
3269 
3270     /// Whether to reverse the bits before inserting them into `dst`.
3271     pub reverse: bool,
3272 }
3273 
3274 impl DisplayOp for OpBfe {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3275     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3276         write!(f, "bfe")?;
3277         if self.signed {
3278             write!(f, ".s")?;
3279         }
3280         if self.reverse {
3281             write!(f, ".rev")?;
3282         }
3283         write!(f, " {} {}", self.base, self.range,)
3284     }
3285 }
3286 impl_display_for_op!(OpBfe);
3287 
3288 #[repr(C)]
3289 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3290 pub struct OpFlo {
3291     #[dst_type(GPR)]
3292     pub dst: Dst,
3293 
3294     #[src_type(ALU)]
3295     pub src: Src,
3296 
3297     pub signed: bool,
3298     pub return_shift_amount: bool,
3299 }
3300 
3301 impl Foldable for OpFlo {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3302     fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3303         let src = f.get_u32_src(self, &self.src);
3304         let leading = if self.signed && (src & 0x80000000) != 0 {
3305             (!src).leading_zeros()
3306         } else {
3307             src.leading_zeros()
3308         };
3309         let dst = if self.return_shift_amount {
3310             leading
3311         } else {
3312             31 - leading
3313         };
3314         f.set_u32_dst(self, &self.dst, dst);
3315     }
3316 }
3317 
3318 impl DisplayOp for OpFlo {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3319     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3320         write!(f, "flo")?;
3321         if self.return_shift_amount {
3322             write!(f, ".samt")?;
3323         }
3324         write!(f, " {}", self.src)
3325     }
3326 }
3327 impl_display_for_op!(OpFlo);
3328 
3329 #[repr(C)]
3330 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3331 pub struct OpIAbs {
3332     #[dst_type(GPR)]
3333     pub dst: Dst,
3334 
3335     #[src_type(ALU)]
3336     pub src: Src,
3337 }
3338 
3339 impl Foldable for OpIAbs {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3340     fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3341         let src = f.get_u32_src(self, &self.src);
3342         let dst = (src as i32).abs() as u32;
3343         f.set_u32_dst(self, &self.dst, dst);
3344     }
3345 }
3346 
3347 impl DisplayOp for OpIAbs {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3348     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3349         write!(f, "iabs {}", self.src)
3350     }
3351 }
3352 impl_display_for_op!(OpIAbs);
3353 
3354 /// Only used on SM50
3355 #[repr(C)]
3356 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3357 pub struct OpIAdd2 {
3358     #[dst_type(GPR)]
3359     pub dst: Dst,
3360     #[dst_type(Carry)]
3361     pub carry_out: Dst,
3362 
3363     #[src_type(I32)]
3364     pub srcs: [Src; 2],
3365 }
3366 
3367 impl Foldable for OpIAdd2 {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3368     fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3369         let srcs = [
3370             f.get_u32_src(self, &self.srcs[0]),
3371             f.get_u32_src(self, &self.srcs[1]),
3372         ];
3373 
3374         let mut sum = 0_u64;
3375         for i in 0..2 {
3376             if self.srcs[i].src_mod.is_ineg() {
3377                 // This is a very literal interpretation of 2's compliment.
3378                 // This is not -u64::from(src) or u64::from(-src).
3379                 sum += u64::from(!srcs[i]) + 1;
3380             } else {
3381                 sum += u64::from(srcs[i]);
3382             }
3383         }
3384 
3385         f.set_u32_dst(self, &self.dst, sum as u32);
3386         f.set_carry_dst(self, &self.carry_out, sum >= (1 << 32));
3387     }
3388 }
3389 
3390 impl DisplayOp for OpIAdd2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3391     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3392         write!(f, "iadd2 {} {}", self.srcs[0], self.srcs[1])
3393     }
3394 }
3395 
3396 /// Only used on SM50
3397 #[repr(C)]
3398 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3399 pub struct OpIAdd2X {
3400     #[dst_type(GPR)]
3401     pub dst: Dst,
3402     #[dst_type(Carry)]
3403     pub carry_out: Dst,
3404 
3405     #[src_type(B32)]
3406     pub srcs: [Src; 2],
3407     #[src_type(Carry)]
3408     pub carry_in: Src,
3409 }
3410 
3411 impl Foldable for OpIAdd2X {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3412     fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3413         let srcs = [
3414             f.get_u32_bnot_src(self, &self.srcs[0]),
3415             f.get_u32_bnot_src(self, &self.srcs[1]),
3416         ];
3417         let carry_in = f.get_carry_src(self, &self.carry_in);
3418 
3419         let sum = u64::from(srcs[0]) + u64::from(srcs[1]) + u64::from(carry_in);
3420 
3421         f.set_u32_dst(self, &self.dst, sum as u32);
3422         f.set_carry_dst(self, &self.carry_out, sum >= (1 << 32));
3423     }
3424 }
3425 
3426 impl DisplayOp for OpIAdd2X {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3427     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3428         write!(f, "iadd2.x {} {}", self.srcs[0], self.srcs[1])?;
3429         if !self.carry_in.is_zero() {
3430             write!(f, " {}", self.carry_in)?;
3431         }
3432         Ok(())
3433     }
3434 }
3435 
3436 #[repr(C)]
3437 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3438 pub struct OpIAdd3 {
3439     #[dst_type(GPR)]
3440     pub dst: Dst,
3441 
3442     #[dst_type(Pred)]
3443     pub overflow: [Dst; 2],
3444 
3445     #[src_type(I32)]
3446     pub srcs: [Src; 3],
3447 }
3448 
3449 impl Foldable for OpIAdd3 {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3450     fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3451         let srcs = [
3452             f.get_u32_src(self, &self.srcs[0]),
3453             f.get_u32_src(self, &self.srcs[1]),
3454             f.get_u32_src(self, &self.srcs[2]),
3455         ];
3456 
3457         let mut sum = 0_u64;
3458         for i in 0..3 {
3459             if self.srcs[i].src_mod.is_ineg() {
3460                 // This is a very literal interpretation of 2's compliment.
3461                 // This is not -u64::from(src) or u64::from(-src).
3462                 sum += u64::from(!srcs[i]) + 1;
3463             } else {
3464                 sum += u64::from(srcs[i]);
3465             }
3466         }
3467 
3468         f.set_u32_dst(self, &self.dst, sum as u32);
3469         f.set_pred_dst(self, &self.overflow[0], sum >= 1_u64 << 32);
3470         f.set_pred_dst(self, &self.overflow[1], sum >= 2_u64 << 32);
3471     }
3472 }
3473 
3474 impl DisplayOp for OpIAdd3 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3475     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3476         write!(
3477             f,
3478             "iadd3 {} {} {}",
3479             self.srcs[0], self.srcs[1], self.srcs[2],
3480         )
3481     }
3482 }
3483 impl_display_for_op!(OpIAdd3);
3484 
3485 #[repr(C)]
3486 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3487 pub struct OpIAdd3X {
3488     #[dst_type(GPR)]
3489     pub dst: Dst,
3490 
3491     #[dst_type(Pred)]
3492     pub overflow: [Dst; 2],
3493 
3494     #[src_type(B32)]
3495     pub srcs: [Src; 3],
3496 
3497     #[src_type(Pred)]
3498     pub carry: [Src; 2],
3499 }
3500 
3501 impl Foldable for OpIAdd3X {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3502     fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3503         let srcs = [
3504             f.get_u32_bnot_src(self, &self.srcs[0]),
3505             f.get_u32_bnot_src(self, &self.srcs[1]),
3506             f.get_u32_bnot_src(self, &self.srcs[2]),
3507         ];
3508         let carry = [
3509             f.get_pred_src(self, &self.carry[0]),
3510             f.get_pred_src(self, &self.carry[1]),
3511         ];
3512 
3513         let mut sum = 0_u64;
3514         for i in 0..3 {
3515             sum += u64::from(srcs[i]);
3516         }
3517 
3518         for i in 0..2 {
3519             sum += u64::from(carry[i]);
3520         }
3521 
3522         f.set_u32_dst(self, &self.dst, sum as u32);
3523         f.set_pred_dst(self, &self.overflow[0], sum >= 1_u64 << 32);
3524         f.set_pred_dst(self, &self.overflow[1], sum >= 2_u64 << 32);
3525     }
3526 }
3527 
3528 impl DisplayOp for OpIAdd3X {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3529     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3530         write!(
3531             f,
3532             "iadd3.x {} {} {} {} {}",
3533             self.srcs[0],
3534             self.srcs[1],
3535             self.srcs[2],
3536             self.carry[0],
3537             self.carry[1]
3538         )
3539     }
3540 }
3541 impl_display_for_op!(OpIAdd3X);
3542 
3543 #[repr(C)]
3544 #[derive(SrcsAsSlice, DstsAsSlice)]
3545 pub struct OpIDp4 {
3546     #[dst_type(GPR)]
3547     pub dst: Dst,
3548 
3549     pub src_types: [IntType; 2],
3550 
3551     #[src_type(I32)]
3552     pub srcs: [Src; 3],
3553 }
3554 
3555 impl DisplayOp for OpIDp4 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3556     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3557         write!(
3558             f,
3559             "idp4{}{} {} {} {}",
3560             self.src_types[0],
3561             self.src_types[1],
3562             self.srcs[0],
3563             self.srcs[1],
3564             self.srcs[2],
3565         )
3566     }
3567 }
3568 impl_display_for_op!(OpIDp4);
3569 
3570 #[repr(C)]
3571 #[derive(SrcsAsSlice, DstsAsSlice)]
3572 pub struct OpIMad {
3573     #[dst_type(GPR)]
3574     pub dst: Dst,
3575 
3576     #[src_type(ALU)]
3577     pub srcs: [Src; 3],
3578 
3579     pub signed: bool,
3580 }
3581 
3582 impl DisplayOp for OpIMad {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3583     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3584         write!(f, "imad {} {} {}", self.srcs[0], self.srcs[1], self.srcs[2],)
3585     }
3586 }
3587 impl_display_for_op!(OpIMad);
3588 
3589 /// Only used on SM50
3590 #[repr(C)]
3591 #[derive(SrcsAsSlice, DstsAsSlice)]
3592 pub struct OpIMul {
3593     #[dst_type(GPR)]
3594     pub dst: Dst,
3595 
3596     #[src_type(ALU)]
3597     pub srcs: [Src; 2],
3598 
3599     pub signed: [bool; 2],
3600     pub high: bool,
3601 }
3602 
3603 impl DisplayOp for OpIMul {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3604     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3605         write!(f, "imul")?;
3606         if self.high {
3607             write!(f, ".hi")?;
3608         }
3609         let src_type = |signed| if signed { ".s32" } else { ".u32" };
3610         write!(
3611             f,
3612             "{}{}",
3613             src_type(self.signed[0]),
3614             src_type(self.signed[1])
3615         )?;
3616         write!(f, " {} {}", self.srcs[0], self.srcs[1])
3617     }
3618 }
3619 
3620 #[repr(C)]
3621 #[derive(SrcsAsSlice, DstsAsSlice)]
3622 pub struct OpIMad64 {
3623     #[dst_type(Vec)]
3624     pub dst: Dst,
3625 
3626     #[src_type(ALU)]
3627     pub srcs: [Src; 3],
3628 
3629     pub signed: bool,
3630 }
3631 
3632 impl DisplayOp for OpIMad64 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3633     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3634         write!(
3635             f,
3636             "imad64 {} {} {}",
3637             self.srcs[0], self.srcs[1], self.srcs[2],
3638         )
3639     }
3640 }
3641 impl_display_for_op!(OpIMad64);
3642 
3643 #[repr(C)]
3644 #[derive(SrcsAsSlice, DstsAsSlice)]
3645 pub struct OpIMnMx {
3646     #[dst_type(GPR)]
3647     pub dst: Dst,
3648 
3649     pub cmp_type: IntCmpType,
3650 
3651     #[src_type(ALU)]
3652     pub srcs: [Src; 2],
3653 
3654     #[src_type(Pred)]
3655     pub min: Src,
3656 }
3657 
3658 impl DisplayOp for OpIMnMx {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3659     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3660         write!(
3661             f,
3662             "imnmx{} {} {} {}",
3663             self.cmp_type, self.srcs[0], self.srcs[1], self.min
3664         )
3665     }
3666 }
3667 impl_display_for_op!(OpIMnMx);
3668 
3669 #[repr(C)]
3670 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3671 pub struct OpISetP {
3672     #[dst_type(Pred)]
3673     pub dst: Dst,
3674 
3675     pub set_op: PredSetOp,
3676     pub cmp_op: IntCmpOp,
3677     pub cmp_type: IntCmpType,
3678     pub ex: bool,
3679 
3680     #[src_type(ALU)]
3681     pub srcs: [Src; 2],
3682 
3683     #[src_type(Pred)]
3684     pub accum: Src,
3685 
3686     #[src_type(Pred)]
3687     pub low_cmp: Src,
3688 }
3689 
3690 impl Foldable for OpISetP {
fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3691     fn fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3692         let x = f.get_u32_src(self, &self.srcs[0]);
3693         let y = f.get_u32_src(self, &self.srcs[1]);
3694         let accum = f.get_pred_src(self, &self.accum);
3695         let low_cmp = f.get_pred_src(self, &self.low_cmp);
3696 
3697         let cmp = if self.cmp_type.is_signed() {
3698             let x = x as i32;
3699             let y = y as i32;
3700             match &self.cmp_op {
3701                 IntCmpOp::Eq => x == y,
3702                 IntCmpOp::Ne => x != y,
3703                 IntCmpOp::Lt => x < y,
3704                 IntCmpOp::Le => x <= y,
3705                 IntCmpOp::Gt => x > y,
3706                 IntCmpOp::Ge => x >= y,
3707             }
3708         } else {
3709             match &self.cmp_op {
3710                 IntCmpOp::Eq => x == y,
3711                 IntCmpOp::Ne => x != y,
3712                 IntCmpOp::Lt => x < y,
3713                 IntCmpOp::Le => x <= y,
3714                 IntCmpOp::Gt => x > y,
3715                 IntCmpOp::Ge => x >= y,
3716             }
3717         };
3718 
3719         let cmp = if self.ex && x == y {
3720             // Pre-Volta, isetp.x takes the accumulator into account.  If we
3721             // want to support this, we need to take an an accumulator into
3722             // account.  Disallow it for now.
3723             assert!(sm.sm() >= 70);
3724             low_cmp
3725         } else {
3726             cmp
3727         };
3728 
3729         let dst = self.set_op.eval(cmp, accum);
3730 
3731         f.set_pred_dst(self, &self.dst, dst);
3732     }
3733 }
3734 
3735 impl DisplayOp for OpISetP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3736     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3737         write!(f, "isetp{}{}", self.cmp_op, self.cmp_type)?;
3738         if !self.set_op.is_trivial(&self.accum) {
3739             write!(f, "{}", self.set_op)?;
3740         }
3741         if self.ex {
3742             write!(f, ".ex")?;
3743         }
3744         write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
3745         if !self.set_op.is_trivial(&self.accum) {
3746             write!(f, " {}", self.accum)?;
3747         }
3748         if self.ex {
3749             write!(f, " {}", self.low_cmp)?;
3750         }
3751         Ok(())
3752     }
3753 }
3754 impl_display_for_op!(OpISetP);
3755 
3756 #[repr(C)]
3757 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3758 pub struct OpLop2 {
3759     #[dst_type(GPR)]
3760     pub dst: Dst,
3761 
3762     #[src_type(B32)]
3763     pub srcs: [Src; 2],
3764 
3765     pub op: LogicOp2,
3766 }
3767 
3768 impl DisplayOp for OpLop2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3769     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3770         write!(f, "lop2.{} {} {}", self.op, self.srcs[0], self.srcs[1],)
3771     }
3772 }
3773 
3774 impl Foldable for OpLop2 {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3775     fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3776         let srcs = [
3777             f.get_u32_bnot_src(self, &self.srcs[0]),
3778             f.get_u32_bnot_src(self, &self.srcs[1]),
3779         ];
3780         let dst = match self.op {
3781             LogicOp2::And => srcs[0] & srcs[1],
3782             LogicOp2::Or => srcs[0] | srcs[1],
3783             LogicOp2::Xor => srcs[0] ^ srcs[1],
3784             LogicOp2::PassB => srcs[1],
3785         };
3786         f.set_u32_dst(self, &self.dst, dst);
3787     }
3788 }
3789 
3790 #[repr(C)]
3791 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3792 pub struct OpLop3 {
3793     #[dst_type(GPR)]
3794     pub dst: Dst,
3795 
3796     #[src_type(ALU)]
3797     pub srcs: [Src; 3],
3798 
3799     pub op: LogicOp3,
3800 }
3801 
3802 impl Foldable for OpLop3 {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3803     fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3804         let srcs = [
3805             f.get_u32_bnot_src(self, &self.srcs[0]),
3806             f.get_u32_bnot_src(self, &self.srcs[1]),
3807             f.get_u32_bnot_src(self, &self.srcs[2]),
3808         ];
3809         let dst = self.op.eval(srcs[0], srcs[1], srcs[2]);
3810         f.set_u32_dst(self, &self.dst, dst);
3811     }
3812 }
3813 
3814 impl DisplayOp for OpLop3 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3815     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3816         write!(
3817             f,
3818             "lop3.{} {} {} {}",
3819             self.op, self.srcs[0], self.srcs[1], self.srcs[2],
3820         )
3821     }
3822 }
3823 impl_display_for_op!(OpLop3);
3824 
3825 #[derive(Clone, Copy, Eq, PartialEq)]
3826 pub enum ShflOp {
3827     Idx,
3828     Up,
3829     Down,
3830     Bfly,
3831 }
3832 
3833 impl fmt::Display for ShflOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3834     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3835         match self {
3836             ShflOp::Idx => write!(f, "idx"),
3837             ShflOp::Up => write!(f, "up"),
3838             ShflOp::Down => write!(f, "down"),
3839             ShflOp::Bfly => write!(f, "bfly"),
3840         }
3841     }
3842 }
3843 
3844 #[repr(C)]
3845 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3846 pub struct OpShf {
3847     #[dst_type(GPR)]
3848     pub dst: Dst,
3849 
3850     #[src_type(GPR)]
3851     pub low: Src,
3852 
3853     #[src_type(ALU)]
3854     pub high: Src,
3855 
3856     #[src_type(ALU)]
3857     pub shift: Src,
3858 
3859     pub right: bool,
3860     pub wrap: bool,
3861     pub data_type: IntType,
3862     pub dst_high: bool,
3863 }
3864 
3865 impl Foldable for OpShf {
fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3866     fn fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3867         let low = f.get_u32_src(self, &self.low);
3868         let high = f.get_u32_src(self, &self.high);
3869         let shift = f.get_u32_src(self, &self.shift);
3870 
3871         let bits: u32 = self.data_type.bits().try_into().unwrap();
3872         let shift = if self.wrap {
3873             shift & (bits - 1)
3874         } else {
3875             min(shift, bits)
3876         };
3877 
3878         let x = u64::from(low) | (u64::from(high) << 32);
3879         let shifted = if sm.sm() < 70
3880             && self.dst_high
3881             && self.data_type != IntType::I64
3882         {
3883             if self.right {
3884                 x.checked_shr(shift).unwrap_or(0) as u64
3885             } else {
3886                 x.checked_shl(shift).unwrap_or(0) as u64
3887             }
3888         } else if self.data_type.is_signed() {
3889             if self.right {
3890                 (x as i64).checked_shr(shift).unwrap_or(0) as u64
3891             } else {
3892                 (x as i64).checked_shl(shift).unwrap_or(0) as u64
3893             }
3894         } else {
3895             if self.right {
3896                 x.checked_shr(shift).unwrap_or(0) as u64
3897             } else {
3898                 x.checked_shl(shift).unwrap_or(0) as u64
3899             }
3900         };
3901 
3902         let dst = if (sm.sm() < 70 && !self.right) || self.dst_high {
3903             (shifted >> 32) as u32
3904         } else {
3905             shifted as u32
3906         };
3907 
3908         f.set_u32_dst(self, &self.dst, dst);
3909     }
3910 }
3911 
3912 impl DisplayOp for OpShf {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3913     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3914         write!(f, "shf")?;
3915         if self.right {
3916             write!(f, ".r")?;
3917         } else {
3918             write!(f, ".l")?;
3919         }
3920         if self.wrap {
3921             write!(f, ".w")?;
3922         }
3923         write!(f, "{}", self.data_type)?;
3924         if self.dst_high {
3925             write!(f, ".hi")?;
3926         }
3927         write!(f, " {} {} {}", self.low, self.high, self.shift)
3928     }
3929 }
3930 impl_display_for_op!(OpShf);
3931 
3932 /// Only used on SM50
3933 #[repr(C)]
3934 #[derive(SrcsAsSlice, DstsAsSlice)]
3935 pub struct OpShl {
3936     #[dst_type(GPR)]
3937     pub dst: Dst,
3938 
3939     #[src_type(GPR)]
3940     pub src: Src,
3941 
3942     #[src_type(ALU)]
3943     pub shift: Src,
3944 
3945     pub wrap: bool,
3946 }
3947 
3948 impl DisplayOp for OpShl {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3949     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3950         write!(f, "shl")?;
3951         if self.wrap {
3952             write!(f, ".w")?;
3953         }
3954         write!(f, " {} {}", self.src, self.shift)
3955     }
3956 }
3957 
3958 /// Only used on SM50
3959 #[repr(C)]
3960 #[derive(SrcsAsSlice, DstsAsSlice)]
3961 pub struct OpShr {
3962     #[dst_type(GPR)]
3963     pub dst: Dst,
3964 
3965     #[src_type(GPR)]
3966     pub src: Src,
3967 
3968     #[src_type(ALU)]
3969     pub shift: Src,
3970 
3971     pub wrap: bool,
3972     pub signed: bool,
3973 }
3974 
3975 impl DisplayOp for OpShr {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3976     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3977         write!(f, "shr")?;
3978         if self.wrap {
3979             write!(f, ".w")?;
3980         }
3981         if !self.signed {
3982             write!(f, ".u32")?;
3983         }
3984         write!(f, " {} {}", self.src, self.shift)
3985     }
3986 }
3987 
3988 #[repr(C)]
3989 pub struct OpF2F {
3990     pub dst: Dst,
3991     pub src: Src,
3992 
3993     pub src_type: FloatType,
3994     pub dst_type: FloatType,
3995     pub rnd_mode: FRndMode,
3996     pub ftz: bool,
3997     /// For 16-bit up-conversions, take the high 16 bits of the source register.
3998     /// For 16-bit down-conversions, place the result into the upper 16 bits of
3999     /// the destination register
4000     pub high: bool,
4001     /// Round to the nearest integer rather than nearest float
4002     ///
4003     /// Not available on SM70+
4004     pub integer_rnd: bool,
4005 }
4006 
4007 impl AsSlice<Src> for OpF2F {
4008     type Attr = SrcType;
4009 
as_slice(&self) -> &[Src]4010     fn as_slice(&self) -> &[Src] {
4011         std::slice::from_ref(&self.src)
4012     }
4013 
as_mut_slice(&mut self) -> &mut [Src]4014     fn as_mut_slice(&mut self) -> &mut [Src] {
4015         std::slice::from_mut(&mut self.src)
4016     }
4017 
attrs(&self) -> SrcTypeList4018     fn attrs(&self) -> SrcTypeList {
4019         let src_type = match self.src_type {
4020             FloatType::F16 => SrcType::F16,
4021             FloatType::F32 => SrcType::F32,
4022             FloatType::F64 => SrcType::F64,
4023         };
4024         SrcTypeList::Uniform(src_type)
4025     }
4026 }
4027 
4028 impl AsSlice<Dst> for OpF2F {
4029     type Attr = DstType;
4030 
as_slice(&self) -> &[Dst]4031     fn as_slice(&self) -> &[Dst] {
4032         std::slice::from_ref(&self.dst)
4033     }
4034 
as_mut_slice(&mut self) -> &mut [Dst]4035     fn as_mut_slice(&mut self) -> &mut [Dst] {
4036         std::slice::from_mut(&mut self.dst)
4037     }
4038 
attrs(&self) -> DstTypeList4039     fn attrs(&self) -> DstTypeList {
4040         let dst_type = match self.dst_type {
4041             FloatType::F16 => DstType::F16,
4042             FloatType::F32 => DstType::F32,
4043             FloatType::F64 => DstType::F64,
4044         };
4045         DstTypeList::Uniform(dst_type)
4046     }
4047 }
4048 
4049 impl DisplayOp for OpF2F {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4050     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4051         write!(f, "f2f")?;
4052         if self.ftz {
4053             write!(f, ".ftz")?;
4054         }
4055         if self.integer_rnd {
4056             write!(f, ".int")?;
4057         }
4058         write!(
4059             f,
4060             "{}{}{} {}",
4061             self.dst_type, self.src_type, self.rnd_mode, self.src,
4062         )
4063     }
4064 }
4065 impl_display_for_op!(OpF2F);
4066 
4067 #[repr(C)]
4068 #[derive(DstsAsSlice, SrcsAsSlice)]
4069 pub struct OpF2FP {
4070     #[dst_type(GPR)]
4071     pub dst: Dst,
4072 
4073     #[src_type(ALU)]
4074     pub srcs: [Src; 2],
4075 
4076     pub rnd_mode: FRndMode,
4077 }
4078 
4079 impl DisplayOp for OpF2FP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4080     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4081         write!(f, "f2fp.pack_ab")?;
4082         if self.rnd_mode != FRndMode::NearestEven {
4083             write!(f, "{}", self.rnd_mode)?;
4084         }
4085         write!(f, " {}, {}", self.srcs[0], self.srcs[1],)
4086     }
4087 }
4088 impl_display_for_op!(OpF2FP);
4089 
4090 #[repr(C)]
4091 #[derive(DstsAsSlice)]
4092 pub struct OpF2I {
4093     #[dst_type(GPR)]
4094     pub dst: Dst,
4095 
4096     pub src: Src,
4097 
4098     pub src_type: FloatType,
4099     pub dst_type: IntType,
4100     pub rnd_mode: FRndMode,
4101     pub ftz: bool,
4102 }
4103 
4104 impl AsSlice<Src> for OpF2I {
4105     type Attr = SrcType;
4106 
as_slice(&self) -> &[Src]4107     fn as_slice(&self) -> &[Src] {
4108         std::slice::from_ref(&self.src)
4109     }
4110 
as_mut_slice(&mut self) -> &mut [Src]4111     fn as_mut_slice(&mut self) -> &mut [Src] {
4112         std::slice::from_mut(&mut self.src)
4113     }
4114 
attrs(&self) -> SrcTypeList4115     fn attrs(&self) -> SrcTypeList {
4116         let src_type = match self.src_type {
4117             FloatType::F16 => SrcType::F16,
4118             FloatType::F32 => SrcType::F32,
4119             FloatType::F64 => SrcType::F64,
4120         };
4121         SrcTypeList::Uniform(src_type)
4122     }
4123 }
4124 
4125 impl DisplayOp for OpF2I {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4126     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4127         let ftz = if self.ftz { ".ftz" } else { "" };
4128         write!(
4129             f,
4130             "f2i{}{}{}{ftz} {}",
4131             self.dst_type, self.src_type, self.rnd_mode, self.src,
4132         )
4133     }
4134 }
4135 impl_display_for_op!(OpF2I);
4136 
4137 #[repr(C)]
4138 pub struct OpI2F {
4139     pub dst: Dst,
4140     pub src: Src,
4141 
4142     pub dst_type: FloatType,
4143     pub src_type: IntType,
4144     pub rnd_mode: FRndMode,
4145 }
4146 
4147 impl AsSlice<Src> for OpI2F {
4148     type Attr = SrcType;
4149 
as_slice(&self) -> &[Src]4150     fn as_slice(&self) -> &[Src] {
4151         std::slice::from_ref(&self.src)
4152     }
4153 
as_mut_slice(&mut self) -> &mut [Src]4154     fn as_mut_slice(&mut self) -> &mut [Src] {
4155         std::slice::from_mut(&mut self.src)
4156     }
4157 
attrs(&self) -> SrcTypeList4158     fn attrs(&self) -> SrcTypeList {
4159         if self.src_type.bits() <= 32 {
4160             SrcTypeList::Uniform(SrcType::ALU)
4161         } else {
4162             SrcTypeList::Uniform(SrcType::GPR)
4163         }
4164     }
4165 }
4166 
4167 impl AsSlice<Dst> for OpI2F {
4168     type Attr = DstType;
4169 
as_slice(&self) -> &[Dst]4170     fn as_slice(&self) -> &[Dst] {
4171         std::slice::from_ref(&self.dst)
4172     }
4173 
as_mut_slice(&mut self) -> &mut [Dst]4174     fn as_mut_slice(&mut self) -> &mut [Dst] {
4175         std::slice::from_mut(&mut self.dst)
4176     }
4177 
attrs(&self) -> DstTypeList4178     fn attrs(&self) -> DstTypeList {
4179         let dst_type = match self.dst_type {
4180             FloatType::F16 => DstType::F16,
4181             FloatType::F32 => DstType::F32,
4182             FloatType::F64 => DstType::F64,
4183         };
4184         DstTypeList::Uniform(dst_type)
4185     }
4186 }
4187 
4188 impl DisplayOp for OpI2F {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4189     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4190         write!(
4191             f,
4192             "i2f{}{}{} {}",
4193             self.dst_type, self.src_type, self.rnd_mode, self.src,
4194         )
4195     }
4196 }
4197 impl_display_for_op!(OpI2F);
4198 
4199 /// Not used on SM70+
4200 #[repr(C)]
4201 #[derive(SrcsAsSlice, DstsAsSlice)]
4202 pub struct OpI2I {
4203     #[dst_type(GPR)]
4204     pub dst: Dst,
4205 
4206     #[src_type(ALU)]
4207     pub src: Src,
4208 
4209     pub src_type: IntType,
4210     pub dst_type: IntType,
4211 
4212     pub saturate: bool,
4213     pub abs: bool,
4214     pub neg: bool,
4215 }
4216 
4217 impl DisplayOp for OpI2I {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4218     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4219         write!(f, "i2i")?;
4220         if self.saturate {
4221             write!(f, ".sat ")?;
4222         }
4223         write!(f, "{}{} {}", self.dst_type, self.src_type, self.src,)?;
4224         if self.abs {
4225             write!(f, ".abs")?;
4226         }
4227         if self.neg {
4228             write!(f, ".neg")?;
4229         }
4230         Ok(())
4231     }
4232 }
4233 impl_display_for_op!(OpI2I);
4234 
4235 #[repr(C)]
4236 #[derive(DstsAsSlice)]
4237 pub struct OpFRnd {
4238     #[dst_type(F32)]
4239     pub dst: Dst,
4240 
4241     pub src: Src,
4242 
4243     pub dst_type: FloatType,
4244     pub src_type: FloatType,
4245     pub rnd_mode: FRndMode,
4246     pub ftz: bool,
4247 }
4248 
4249 impl AsSlice<Src> for OpFRnd {
4250     type Attr = SrcType;
4251 
as_slice(&self) -> &[Src]4252     fn as_slice(&self) -> &[Src] {
4253         std::slice::from_ref(&self.src)
4254     }
4255 
as_mut_slice(&mut self) -> &mut [Src]4256     fn as_mut_slice(&mut self) -> &mut [Src] {
4257         std::slice::from_mut(&mut self.src)
4258     }
4259 
attrs(&self) -> SrcTypeList4260     fn attrs(&self) -> SrcTypeList {
4261         let src_type = match self.src_type {
4262             FloatType::F16 => SrcType::F16,
4263             FloatType::F32 => SrcType::F32,
4264             FloatType::F64 => SrcType::F64,
4265         };
4266         SrcTypeList::Uniform(src_type)
4267     }
4268 }
4269 
4270 impl DisplayOp for OpFRnd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4271     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4272         let ftz = if self.ftz { ".ftz" } else { "" };
4273         write!(
4274             f,
4275             "frnd{}{}{}{ftz} {}",
4276             self.dst_type, self.src_type, self.rnd_mode, self.src,
4277         )
4278     }
4279 }
4280 impl_display_for_op!(OpFRnd);
4281 
4282 #[repr(C)]
4283 #[derive(SrcsAsSlice, DstsAsSlice)]
4284 pub struct OpMov {
4285     #[dst_type(GPR)]
4286     pub dst: Dst,
4287 
4288     #[src_type(ALU)]
4289     pub src: Src,
4290 
4291     pub quad_lanes: u8,
4292 }
4293 
4294 impl DisplayOp for OpMov {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4295     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4296         if self.quad_lanes == 0xf {
4297             write!(f, "mov {}", self.src)
4298         } else {
4299             write!(f, "mov[{:#x}] {}", self.quad_lanes, self.src)
4300         }
4301     }
4302 }
4303 impl_display_for_op!(OpMov);
4304 
4305 #[derive(Copy, Clone)]
4306 pub struct PrmtSelByte(u8);
4307 
4308 impl PrmtSelByte {
4309     pub const INVALID: PrmtSelByte = PrmtSelByte(u8::MAX);
4310 
new(src_idx: usize, byte_idx: usize, msb: bool) -> PrmtSelByte4311     pub fn new(src_idx: usize, byte_idx: usize, msb: bool) -> PrmtSelByte {
4312         assert!(src_idx < 2);
4313         assert!(byte_idx < 4);
4314 
4315         let mut nib = 0;
4316         nib |= (src_idx as u8) << 2;
4317         nib |= byte_idx as u8;
4318         if msb {
4319             nib |= 0x8;
4320         }
4321         PrmtSelByte(nib)
4322     }
4323 
src(&self) -> usize4324     pub fn src(&self) -> usize {
4325         ((self.0 >> 2) & 0x1).into()
4326     }
4327 
byte(&self) -> usize4328     pub fn byte(&self) -> usize {
4329         (self.0 & 0x3).into()
4330     }
4331 
msb(&self) -> bool4332     pub fn msb(&self) -> bool {
4333         (self.0 & 0x8) != 0
4334     }
4335 
fold_u32(&self, u: u32) -> u84336     pub fn fold_u32(&self, u: u32) -> u8 {
4337         let mut sb = (u >> (self.byte() * 8)) as u8;
4338         if self.msb() {
4339             sb = ((sb as i8) >> 7) as u8;
4340         }
4341         sb
4342     }
4343 }
4344 
4345 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
4346 pub struct PrmtSel(pub u16);
4347 
4348 impl PrmtSel {
new(bytes: [PrmtSelByte; 4]) -> PrmtSel4349     pub fn new(bytes: [PrmtSelByte; 4]) -> PrmtSel {
4350         let mut sel = 0;
4351         for i in 0..4 {
4352             assert!(bytes[i].0 <= 0xf);
4353             sel |= u16::from(bytes[i].0) << (i * 4);
4354         }
4355         PrmtSel(sel)
4356     }
4357 
get(&self, byte_idx: usize) -> PrmtSelByte4358     pub fn get(&self, byte_idx: usize) -> PrmtSelByte {
4359         assert!(byte_idx < 4);
4360         PrmtSelByte(((self.0 >> (byte_idx * 4)) & 0xf) as u8)
4361     }
4362 }
4363 
4364 #[allow(dead_code)]
4365 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
4366 pub enum PrmtMode {
4367     Index,
4368     Forward4Extract,
4369     Backward4Extract,
4370     Replicate8,
4371     EdgeClampLeft,
4372     EdgeClampRight,
4373     Replicate16,
4374 }
4375 
4376 impl fmt::Display for PrmtMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4377     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4378         match self {
4379             PrmtMode::Index => Ok(()),
4380             PrmtMode::Forward4Extract => write!(f, ".f4e"),
4381             PrmtMode::Backward4Extract => write!(f, ".b4e"),
4382             PrmtMode::Replicate8 => write!(f, ".rc8"),
4383             PrmtMode::EdgeClampLeft => write!(f, ".ecl"),
4384             PrmtMode::EdgeClampRight => write!(f, ".ecl"),
4385             PrmtMode::Replicate16 => write!(f, ".rc16"),
4386         }
4387     }
4388 }
4389 
4390 #[repr(C)]
4391 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
4392 /// Permutes `srcs` into `dst` using `selection`.
4393 pub struct OpPrmt {
4394     #[dst_type(GPR)]
4395     pub dst: Dst,
4396 
4397     #[src_type(ALU)]
4398     pub srcs: [Src; 2],
4399 
4400     #[src_type(ALU)]
4401     pub sel: Src,
4402 
4403     pub mode: PrmtMode,
4404 }
4405 
4406 impl OpPrmt {
get_sel(&self) -> Option<PrmtSel>4407     pub fn get_sel(&self) -> Option<PrmtSel> {
4408         // TODO: We could construct a PrmtSel for the other modes but we don't
4409         // use them right now because they're kinda pointless.
4410         if self.mode != PrmtMode::Index {
4411             return None;
4412         }
4413 
4414         if let Some(sel) = self.sel.as_u32() {
4415             // The top 16 bits are ignored
4416             Some(PrmtSel(sel as u16))
4417         } else {
4418             None
4419         }
4420     }
4421 
as_u32(&self) -> Option<u32>4422     pub fn as_u32(&self) -> Option<u32> {
4423         let Some(sel) = self.get_sel() else {
4424             return None;
4425         };
4426 
4427         let mut imm = 0_u32;
4428         for b in 0..4 {
4429             let sel_byte = sel.get(b);
4430             let Some(src_u32) = self.srcs[sel_byte.src()].as_u32() else {
4431                 return None;
4432             };
4433 
4434             let sb = sel_byte.fold_u32(src_u32);
4435             imm |= u32::from(sb) << (b * 8);
4436         }
4437         Some(imm)
4438     }
4439 }
4440 
4441 impl Foldable for OpPrmt {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)4442     fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
4443         let srcs = [
4444             f.get_u32_src(self, &self.srcs[0]),
4445             f.get_u32_src(self, &self.srcs[1]),
4446         ];
4447         let sel = f.get_u32_src(self, &self.sel);
4448 
4449         assert!(self.mode == PrmtMode::Index);
4450         let sel = PrmtSel(sel as u16);
4451 
4452         let mut dst = 0_u32;
4453         for b in 0..4 {
4454             let sel_byte = sel.get(b);
4455             let src = srcs[sel_byte.src()];
4456             let sb = sel_byte.fold_u32(src);
4457             dst |= u32::from(sb) << (b * 8);
4458         }
4459 
4460         f.set_u32_dst(self, &self.dst, dst);
4461     }
4462 }
4463 
4464 impl DisplayOp for OpPrmt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4465     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4466         write!(
4467             f,
4468             "prmt{} {} [{}] {}",
4469             self.mode, self.srcs[0], self.sel, self.srcs[1],
4470         )
4471     }
4472 }
4473 impl_display_for_op!(OpPrmt);
4474 
4475 #[repr(C)]
4476 #[derive(SrcsAsSlice, DstsAsSlice)]
4477 pub struct OpSel {
4478     #[dst_type(GPR)]
4479     pub dst: Dst,
4480 
4481     #[src_type(Pred)]
4482     pub cond: Src,
4483 
4484     #[src_type(ALU)]
4485     pub srcs: [Src; 2],
4486 }
4487 
4488 impl DisplayOp for OpSel {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4489     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4490         write!(f, "sel {} {} {}", self.cond, self.srcs[0], self.srcs[1],)
4491     }
4492 }
4493 impl_display_for_op!(OpSel);
4494 
4495 #[repr(C)]
4496 #[derive(SrcsAsSlice, DstsAsSlice)]
4497 pub struct OpShfl {
4498     #[dst_type(GPR)]
4499     pub dst: Dst,
4500 
4501     #[dst_type(Pred)]
4502     pub in_bounds: Dst,
4503 
4504     #[src_type(SSA)]
4505     pub src: Src,
4506 
4507     #[src_type(ALU)]
4508     pub lane: Src,
4509 
4510     #[src_type(ALU)]
4511     pub c: Src,
4512 
4513     pub op: ShflOp,
4514 }
4515 
4516 impl DisplayOp for OpShfl {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4517     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4518         write!(f, "shfl.{} {} {} {}", self.op, self.src, self.lane, self.c)
4519     }
4520 }
4521 impl_display_for_op!(OpShfl);
4522 
4523 #[repr(C)]
4524 #[derive(SrcsAsSlice, DstsAsSlice)]
4525 pub struct OpPLop3 {
4526     #[dst_type(Pred)]
4527     pub dsts: [Dst; 2],
4528 
4529     #[src_type(Pred)]
4530     pub srcs: [Src; 3],
4531 
4532     pub ops: [LogicOp3; 2],
4533 }
4534 
4535 impl DisplayOp for OpPLop3 {
fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4536     fn fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4537         write!(f, "{} {}", self.dsts[0], self.dsts[1])
4538     }
4539 
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4540     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4541         write!(
4542             f,
4543             "plop3 {} {} {} {} {}",
4544             self.srcs[0], self.srcs[1], self.srcs[2], self.ops[0], self.ops[1],
4545         )
4546     }
4547 }
4548 impl_display_for_op!(OpPLop3);
4549 
4550 #[repr(C)]
4551 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
4552 pub struct OpPSetP {
4553     #[dst_type(Pred)]
4554     pub dsts: [Dst; 2],
4555 
4556     pub ops: [PredSetOp; 2],
4557 
4558     #[src_type(Pred)]
4559     pub srcs: [Src; 3],
4560 }
4561 
4562 impl Foldable for OpPSetP {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)4563     fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
4564         let srcs = [
4565             f.get_pred_src(self, &self.srcs[0]),
4566             f.get_pred_src(self, &self.srcs[1]),
4567             f.get_pred_src(self, &self.srcs[2]),
4568         ];
4569 
4570         let tmp = self.ops[0].eval(srcs[0], srcs[1]);
4571         let dst0 = self.ops[1].eval(srcs[2], tmp);
4572 
4573         let tmp = self.ops[0].eval(!srcs[0], srcs[1]);
4574         let dst1 = self.ops[1].eval(srcs[2], tmp);
4575 
4576         f.set_pred_dst(self, &self.dsts[0], dst0);
4577         f.set_pred_dst(self, &self.dsts[1], dst1);
4578     }
4579 }
4580 
4581 impl DisplayOp for OpPSetP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4582     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4583         write!(
4584             f,
4585             "psetp{}{} {} {} {}",
4586             self.ops[0], self.ops[1], self.srcs[0], self.srcs[1], self.srcs[2],
4587         )
4588     }
4589 }
4590 
4591 #[repr(C)]
4592 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
4593 pub struct OpPopC {
4594     #[dst_type(GPR)]
4595     pub dst: Dst,
4596 
4597     #[src_type(B32)]
4598     pub src: Src,
4599 }
4600 
4601 impl Foldable for OpPopC {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)4602     fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
4603         let src = f.get_u32_bnot_src(self, &self.src);
4604         let dst = src.count_ones();
4605         f.set_u32_dst(self, &self.dst, dst);
4606     }
4607 }
4608 
4609 impl DisplayOp for OpPopC {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4610     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4611         write!(f, "popc {}", self.src,)
4612     }
4613 }
4614 impl_display_for_op!(OpPopC);
4615 
4616 #[repr(C)]
4617 #[derive(SrcsAsSlice, DstsAsSlice)]
4618 pub struct OpR2UR {
4619     #[dst_type(GPR)]
4620     pub dst: Dst,
4621 
4622     #[src_type(GPR)]
4623     pub src: Src,
4624 }
4625 
4626 impl DisplayOp for OpR2UR {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4627     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4628         write!(f, "r2ur {}", self.src)
4629     }
4630 }
4631 impl_display_for_op!(OpR2UR);
4632 
4633 #[repr(C)]
4634 #[derive(SrcsAsSlice, DstsAsSlice)]
4635 pub struct OpTex {
4636     pub dsts: [Dst; 2],
4637     pub fault: Dst,
4638 
4639     #[src_type(SSA)]
4640     pub srcs: [Src; 2],
4641 
4642     pub dim: TexDim,
4643     pub lod_mode: TexLodMode,
4644     pub z_cmpr: bool,
4645     pub offset: bool,
4646     pub mask: u8,
4647 }
4648 
4649 impl DisplayOp for OpTex {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4650     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4651         write!(f, "tex.b{}", self.dim)?;
4652         if self.lod_mode != TexLodMode::Auto {
4653             write!(f, ".{}", self.lod_mode)?;
4654         }
4655         if self.offset {
4656             write!(f, ".aoffi")?;
4657         }
4658         if self.z_cmpr {
4659             write!(f, ".dc")?;
4660         }
4661         write!(f, " {} {}", self.srcs[0], self.srcs[1])
4662     }
4663 }
4664 impl_display_for_op!(OpTex);
4665 
4666 #[repr(C)]
4667 #[derive(SrcsAsSlice, DstsAsSlice)]
4668 pub struct OpTld {
4669     pub dsts: [Dst; 2],
4670     pub fault: Dst,
4671 
4672     #[src_type(SSA)]
4673     pub srcs: [Src; 2],
4674 
4675     pub dim: TexDim,
4676     pub is_ms: bool,
4677     pub lod_mode: TexLodMode,
4678     pub offset: bool,
4679     pub mask: u8,
4680 }
4681 
4682 impl DisplayOp for OpTld {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4683     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4684         write!(f, "tld.b{}", self.dim)?;
4685         if self.lod_mode != TexLodMode::Auto {
4686             write!(f, ".{}", self.lod_mode)?;
4687         }
4688         if self.offset {
4689             write!(f, ".aoffi")?;
4690         }
4691         if self.is_ms {
4692             write!(f, ".ms")?;
4693         }
4694         write!(f, " {} {}", self.srcs[0], self.srcs[1])
4695     }
4696 }
4697 impl_display_for_op!(OpTld);
4698 
4699 #[repr(C)]
4700 #[derive(SrcsAsSlice, DstsAsSlice)]
4701 pub struct OpTld4 {
4702     pub dsts: [Dst; 2],
4703     pub fault: Dst,
4704 
4705     #[src_type(SSA)]
4706     pub srcs: [Src; 2],
4707 
4708     pub dim: TexDim,
4709     pub comp: u8,
4710     pub offset_mode: Tld4OffsetMode,
4711     pub z_cmpr: bool,
4712     pub mask: u8,
4713 }
4714 
4715 impl DisplayOp for OpTld4 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4716     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4717         write!(f, "tld4.g.b{}", self.dim)?;
4718         if self.offset_mode != Tld4OffsetMode::None {
4719             write!(f, ".{}", self.offset_mode)?;
4720         }
4721         write!(f, " {} {}", self.srcs[0], self.srcs[1])
4722     }
4723 }
4724 impl_display_for_op!(OpTld4);
4725 
4726 #[repr(C)]
4727 #[derive(SrcsAsSlice, DstsAsSlice)]
4728 pub struct OpTmml {
4729     pub dsts: [Dst; 2],
4730 
4731     #[src_type(SSA)]
4732     pub srcs: [Src; 2],
4733 
4734     pub dim: TexDim,
4735     pub mask: u8,
4736 }
4737 
4738 impl DisplayOp for OpTmml {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4739     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4740         write!(
4741             f,
4742             "tmml.b.lod{} {} {}",
4743             self.dim, self.srcs[0], self.srcs[1]
4744         )
4745     }
4746 }
4747 impl_display_for_op!(OpTmml);
4748 
4749 #[repr(C)]
4750 #[derive(SrcsAsSlice, DstsAsSlice)]
4751 pub struct OpTxd {
4752     pub dsts: [Dst; 2],
4753     pub fault: Dst,
4754 
4755     #[src_type(SSA)]
4756     pub srcs: [Src; 2],
4757 
4758     pub dim: TexDim,
4759     pub offset: bool,
4760     pub mask: u8,
4761 }
4762 
4763 impl DisplayOp for OpTxd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4764     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4765         write!(f, "txd.b{}", self.dim)?;
4766         if self.offset {
4767             write!(f, ".aoffi")?;
4768         }
4769         write!(f, " {} {}", self.srcs[0], self.srcs[1])
4770     }
4771 }
4772 impl_display_for_op!(OpTxd);
4773 
4774 #[repr(C)]
4775 #[derive(SrcsAsSlice, DstsAsSlice)]
4776 pub struct OpTxq {
4777     pub dsts: [Dst; 2],
4778 
4779     #[src_type(SSA)]
4780     pub src: Src,
4781 
4782     pub query: TexQuery,
4783     pub mask: u8,
4784 }
4785 
4786 impl DisplayOp for OpTxq {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4787     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4788         write!(f, "txq.b {} {}", self.src, self.query)
4789     }
4790 }
4791 impl_display_for_op!(OpTxq);
4792 
4793 #[repr(C)]
4794 #[derive(SrcsAsSlice, DstsAsSlice)]
4795 pub struct OpSuLd {
4796     pub dst: Dst,
4797     pub fault: Dst,
4798 
4799     pub image_dim: ImageDim,
4800     pub mem_order: MemOrder,
4801     pub mem_eviction_priority: MemEvictionPriority,
4802     pub mask: u8,
4803 
4804     #[src_type(GPR)]
4805     pub handle: Src,
4806 
4807     #[src_type(SSA)]
4808     pub coord: Src,
4809 }
4810 
4811 impl DisplayOp for OpSuLd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4812     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4813         write!(
4814             f,
4815             "suld.p{}{}{} [{}] {}",
4816             self.image_dim,
4817             self.mem_order,
4818             self.mem_eviction_priority,
4819             self.coord,
4820             self.handle,
4821         )
4822     }
4823 }
4824 impl_display_for_op!(OpSuLd);
4825 
4826 #[repr(C)]
4827 #[derive(SrcsAsSlice, DstsAsSlice)]
4828 pub struct OpSuSt {
4829     pub image_dim: ImageDim,
4830     pub mem_order: MemOrder,
4831     pub mem_eviction_priority: MemEvictionPriority,
4832     pub mask: u8,
4833 
4834     #[src_type(GPR)]
4835     pub handle: Src,
4836 
4837     #[src_type(SSA)]
4838     pub coord: Src,
4839 
4840     #[src_type(SSA)]
4841     pub data: Src,
4842 }
4843 
4844 impl DisplayOp for OpSuSt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4845     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4846         write!(
4847             f,
4848             "sust.p{}{}{} [{}] {} {}",
4849             self.image_dim,
4850             self.mem_order,
4851             self.mem_eviction_priority,
4852             self.coord,
4853             self.data,
4854             self.handle,
4855         )
4856     }
4857 }
4858 impl_display_for_op!(OpSuSt);
4859 
4860 #[repr(C)]
4861 #[derive(SrcsAsSlice, DstsAsSlice)]
4862 pub struct OpSuAtom {
4863     pub dst: Dst,
4864     pub fault: Dst,
4865 
4866     pub image_dim: ImageDim,
4867 
4868     pub atom_op: AtomOp,
4869     pub atom_type: AtomType,
4870 
4871     pub mem_order: MemOrder,
4872     pub mem_eviction_priority: MemEvictionPriority,
4873 
4874     #[src_type(GPR)]
4875     pub handle: Src,
4876 
4877     #[src_type(SSA)]
4878     pub coord: Src,
4879 
4880     #[src_type(SSA)]
4881     pub data: Src,
4882 }
4883 
4884 impl DisplayOp for OpSuAtom {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4885     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4886         write!(
4887             f,
4888             "suatom.p{}{}{}{}{} [{}] {} {}",
4889             self.image_dim,
4890             self.atom_op,
4891             self.atom_type,
4892             self.mem_order,
4893             self.mem_eviction_priority,
4894             self.coord,
4895             self.data,
4896             self.handle,
4897         )
4898     }
4899 }
4900 impl_display_for_op!(OpSuAtom);
4901 
4902 #[repr(C)]
4903 #[derive(SrcsAsSlice, DstsAsSlice)]
4904 pub struct OpLd {
4905     pub dst: Dst,
4906 
4907     #[src_type(GPR)]
4908     pub addr: Src,
4909 
4910     pub offset: i32,
4911     pub access: MemAccess,
4912 }
4913 
4914 impl DisplayOp for OpLd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4915     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4916         write!(f, "ld{} [{}", self.access, self.addr)?;
4917         if self.offset > 0 {
4918             write!(f, "+{:#x}", self.offset)?;
4919         }
4920         write!(f, "]")
4921     }
4922 }
4923 impl_display_for_op!(OpLd);
4924 
4925 #[allow(dead_code)]
4926 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
4927 pub enum LdcMode {
4928     Indexed,
4929     IndexedLinear,
4930     IndexedSegmented,
4931     IndexedSegmentedLinear,
4932 }
4933 
4934 impl fmt::Display for LdcMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4935     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4936         match self {
4937             LdcMode::Indexed => Ok(()),
4938             LdcMode::IndexedLinear => write!(f, ".il"),
4939             LdcMode::IndexedSegmented => write!(f, ".is"),
4940             LdcMode::IndexedSegmentedLinear => write!(f, ".isl"),
4941         }
4942     }
4943 }
4944 
4945 #[repr(C)]
4946 #[derive(SrcsAsSlice, DstsAsSlice)]
4947 pub struct OpLdc {
4948     pub dst: Dst,
4949 
4950     #[src_type(ALU)]
4951     pub cb: Src,
4952 
4953     #[src_type(GPR)]
4954     pub offset: Src,
4955 
4956     pub mode: LdcMode,
4957     pub mem_type: MemType,
4958 }
4959 
4960 impl DisplayOp for OpLdc {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4961     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4962         let SrcRef::CBuf(cb) = self.cb.src_ref else {
4963             panic!("Not a cbuf");
4964         };
4965         write!(f, "ldc{}{} {}[", self.mode, self.mem_type, cb.buf)?;
4966         if self.offset.is_zero() {
4967             write!(f, "+{:#x}", cb.offset)?;
4968         } else if cb.offset == 0 {
4969             write!(f, "{}", self.offset)?;
4970         } else {
4971             write!(f, "{}+{:#x}", self.offset, cb.offset)?;
4972         }
4973         write!(f, "]")
4974     }
4975 }
4976 impl_display_for_op!(OpLdc);
4977 
4978 #[repr(C)]
4979 #[derive(SrcsAsSlice, DstsAsSlice)]
4980 pub struct OpSt {
4981     #[src_type(GPR)]
4982     pub addr: Src,
4983 
4984     #[src_type(SSA)]
4985     pub data: Src,
4986 
4987     pub offset: i32,
4988     pub access: MemAccess,
4989 }
4990 
4991 impl DisplayOp for OpSt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4992     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4993         write!(f, "st{} [{}", self.access, self.addr)?;
4994         if self.offset > 0 {
4995             write!(f, "+{:#x}", self.offset)?;
4996         }
4997         write!(f, "] {}", self.data)
4998     }
4999 }
5000 impl_display_for_op!(OpSt);
5001 
5002 #[repr(C)]
5003 #[derive(SrcsAsSlice, DstsAsSlice)]
5004 pub struct OpAtom {
5005     pub dst: Dst,
5006 
5007     #[src_type(GPR)]
5008     pub addr: Src,
5009 
5010     #[src_type(GPR)]
5011     pub cmpr: Src,
5012 
5013     #[src_type(SSA)]
5014     pub data: Src,
5015 
5016     pub atom_op: AtomOp,
5017     pub atom_type: AtomType,
5018 
5019     pub addr_offset: i32,
5020 
5021     pub mem_space: MemSpace,
5022     pub mem_order: MemOrder,
5023     pub mem_eviction_priority: MemEvictionPriority,
5024 }
5025 
5026 impl DisplayOp for OpAtom {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5027     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5028         write!(
5029             f,
5030             "atom{}{}{}{}{}",
5031             self.atom_op,
5032             self.atom_type,
5033             self.mem_space,
5034             self.mem_order,
5035             self.mem_eviction_priority,
5036         )?;
5037         write!(f, " [")?;
5038         if !self.addr.is_zero() {
5039             write!(f, "{}", self.addr)?;
5040         }
5041         if self.addr_offset > 0 {
5042             if !self.addr.is_zero() {
5043                 write!(f, "+")?;
5044             }
5045             write!(f, "{:#x}", self.addr_offset)?;
5046         }
5047         write!(f, "]")?;
5048         if self.atom_op == AtomOp::CmpExch(AtomCmpSrc::Separate) {
5049             write!(f, " {}", self.cmpr)?;
5050         }
5051         write!(f, " {}", self.data)
5052     }
5053 }
5054 impl_display_for_op!(OpAtom);
5055 
5056 #[repr(C)]
5057 #[derive(SrcsAsSlice, DstsAsSlice)]
5058 pub struct OpAL2P {
5059     pub dst: Dst,
5060 
5061     #[src_type(GPR)]
5062     pub offset: Src,
5063 
5064     pub access: AttrAccess,
5065 }
5066 
5067 impl DisplayOp for OpAL2P {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5068     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5069         write!(f, "al2p")?;
5070         if self.access.output {
5071             write!(f, ".o")?;
5072         }
5073         if self.access.patch {
5074             write!(f, ".p")?;
5075         }
5076         write!(f, " a[{:#x}", self.access.addr)?;
5077         if !self.offset.is_zero() {
5078             write!(f, "+{}", self.offset)?;
5079         }
5080         write!(f, "]")
5081     }
5082 }
5083 impl_display_for_op!(OpAL2P);
5084 
5085 #[repr(C)]
5086 #[derive(SrcsAsSlice, DstsAsSlice)]
5087 pub struct OpALd {
5088     pub dst: Dst,
5089 
5090     #[src_type(GPR)]
5091     pub vtx: Src,
5092 
5093     #[src_type(GPR)]
5094     pub offset: Src,
5095 
5096     pub access: AttrAccess,
5097 }
5098 
5099 impl DisplayOp for OpALd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5100     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5101         write!(f, "ald")?;
5102         if self.access.output {
5103             write!(f, ".o")?;
5104         }
5105         if self.access.patch {
5106             write!(f, ".p")?;
5107         }
5108         if self.access.phys {
5109             write!(f, ".phys")?;
5110         }
5111         write!(f, " a")?;
5112         if !self.vtx.is_zero() {
5113             write!(f, "[{}]", self.vtx)?;
5114         }
5115         write!(f, "[{:#x}", self.access.addr)?;
5116         if !self.offset.is_zero() {
5117             write!(f, "+{}", self.offset)?;
5118         }
5119         write!(f, "]")
5120     }
5121 }
5122 impl_display_for_op!(OpALd);
5123 
5124 #[repr(C)]
5125 #[derive(SrcsAsSlice, DstsAsSlice)]
5126 pub struct OpASt {
5127     #[src_type(GPR)]
5128     pub vtx: Src,
5129 
5130     #[src_type(GPR)]
5131     pub offset: Src,
5132 
5133     #[src_type(SSA)]
5134     pub data: Src,
5135 
5136     pub access: AttrAccess,
5137 }
5138 
5139 impl DisplayOp for OpASt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5140     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5141         write!(f, "ast")?;
5142         if self.access.patch {
5143             write!(f, ".p")?;
5144         }
5145         if self.access.phys {
5146             write!(f, ".phys")?;
5147         }
5148         write!(f, " a")?;
5149         if !self.vtx.is_zero() {
5150             write!(f, "[{}]", self.vtx)?;
5151         }
5152         write!(f, "[{:#x}", self.access.addr)?;
5153         if !self.offset.is_zero() {
5154             write!(f, "+{}", self.offset)?;
5155         }
5156         write!(f, "] {}", self.data)
5157     }
5158 }
5159 impl_display_for_op!(OpASt);
5160 
5161 #[repr(C)]
5162 #[derive(SrcsAsSlice, DstsAsSlice)]
5163 pub struct OpIpa {
5164     pub dst: Dst,
5165     pub addr: u16,
5166     pub freq: InterpFreq,
5167     pub loc: InterpLoc,
5168     pub inv_w: Src,
5169     pub offset: Src,
5170 }
5171 
5172 impl DisplayOp for OpIpa {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5173     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5174         write!(
5175             f,
5176             "ipa{}{} a[{:#x}] {}",
5177             self.freq, self.loc, self.addr, self.inv_w
5178         )?;
5179         if self.loc == InterpLoc::Offset {
5180             write!(f, " {}", self.offset)?;
5181         }
5182         Ok(())
5183     }
5184 }
5185 impl_display_for_op!(OpIpa);
5186 
5187 #[repr(C)]
5188 #[derive(SrcsAsSlice, DstsAsSlice)]
5189 pub struct OpLdTram {
5190     pub dst: Dst,
5191     pub addr: u16,
5192     pub use_c: bool,
5193 }
5194 
5195 impl DisplayOp for OpLdTram {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5196     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5197         write!(f, "ldtram")?;
5198         if self.use_c {
5199             write!(f, ".c")?;
5200         } else {
5201             write!(f, ".ab")?;
5202         }
5203         write!(f, " a[{:#x}]", self.addr)?;
5204         Ok(())
5205     }
5206 }
5207 impl_display_for_op!(OpLdTram);
5208 
5209 #[allow(dead_code)]
5210 #[derive(Copy, Clone, Debug)]
5211 pub enum CCtlOp {
5212     Qry1, // Only available pre-Volta
5213     PF1,
5214     PF1_5, // Only available pre-Volta
5215     PF2,
5216     WB,
5217     IV,
5218     IVAll,
5219     RS,
5220     RSLB,   // Only available pre-Volta
5221     IVAllP, // Only available on Volta+
5222     WBAll,  // Only available on Volta+
5223     WBAllP, // Only available on Volta+
5224 }
5225 
5226 impl CCtlOp {
is_all(&self) -> bool5227     pub fn is_all(&self) -> bool {
5228         match self {
5229             CCtlOp::Qry1
5230             | CCtlOp::PF1
5231             | CCtlOp::PF1_5
5232             | CCtlOp::PF2
5233             | CCtlOp::WB
5234             | CCtlOp::IV
5235             | CCtlOp::RS
5236             | CCtlOp::RSLB => false,
5237             CCtlOp::IVAll | CCtlOp::IVAllP | CCtlOp::WBAll | CCtlOp::WBAllP => {
5238                 true
5239             }
5240         }
5241     }
5242 }
5243 
5244 impl fmt::Display for CCtlOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5245     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5246         match self {
5247             CCtlOp::Qry1 => write!(f, "qry1"),
5248             CCtlOp::PF1 => write!(f, "pf1"),
5249             CCtlOp::PF1_5 => write!(f, "pf1.5"),
5250             CCtlOp::PF2 => write!(f, "pf2"),
5251             CCtlOp::WB => write!(f, "wb"),
5252             CCtlOp::IV => write!(f, "iv"),
5253             CCtlOp::IVAll => write!(f, "ivall"),
5254             CCtlOp::RS => write!(f, "rs"),
5255             CCtlOp::RSLB => write!(f, "rslb"),
5256             CCtlOp::IVAllP => write!(f, "ivallp"),
5257             CCtlOp::WBAll => write!(f, "wball"),
5258             CCtlOp::WBAllP => write!(f, "wballp"),
5259         }
5260     }
5261 }
5262 
5263 #[repr(C)]
5264 #[derive(SrcsAsSlice, DstsAsSlice)]
5265 pub struct OpCCtl {
5266     pub op: CCtlOp,
5267 
5268     pub mem_space: MemSpace,
5269 
5270     #[src_type(GPR)]
5271     pub addr: Src,
5272 
5273     pub addr_offset: i32,
5274 }
5275 
5276 impl DisplayOp for OpCCtl {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5277     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5278         write!(f, "cctl{}", self.mem_space)?;
5279         if !self.op.is_all() {
5280             write!(f, " [{}", self.addr)?;
5281             if self.addr_offset > 0 {
5282                 write!(f, "+{:#x}", self.addr_offset)?;
5283             }
5284             write!(f, "]")?;
5285         }
5286         Ok(())
5287     }
5288 }
5289 impl_display_for_op!(OpCCtl);
5290 
5291 #[repr(C)]
5292 #[derive(SrcsAsSlice, DstsAsSlice)]
5293 pub struct OpMemBar {
5294     pub scope: MemScope,
5295 }
5296 
5297 impl DisplayOp for OpMemBar {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5298     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5299         write!(f, "membar.sc.{}", self.scope)
5300     }
5301 }
5302 impl_display_for_op!(OpMemBar);
5303 
5304 #[repr(C)]
5305 #[derive(SrcsAsSlice, DstsAsSlice)]
5306 pub struct OpBClear {
5307     pub dst: Dst,
5308 }
5309 
5310 impl DisplayOp for OpBClear {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5311     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5312         write!(f, "bclear")
5313     }
5314 }
5315 impl_display_for_op!(OpBClear);
5316 
5317 #[repr(C)]
5318 #[derive(SrcsAsSlice, DstsAsSlice)]
5319 pub struct OpBMov {
5320     pub dst: Dst,
5321     pub src: Src,
5322     pub clear: bool,
5323 }
5324 
5325 impl DisplayOp for OpBMov {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5326     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5327         write!(f, "bmov.32")?;
5328         if self.clear {
5329             write!(f, ".clear")?;
5330         }
5331         write!(f, " {}", self.src)
5332     }
5333 }
5334 impl_display_for_op!(OpBMov);
5335 
5336 #[repr(C)]
5337 #[derive(SrcsAsSlice, DstsAsSlice)]
5338 pub struct OpBreak {
5339     #[dst_type(Bar)]
5340     pub bar_out: Dst,
5341 
5342     #[src_type(Bar)]
5343     pub bar_in: Src,
5344 
5345     #[src_type(Pred)]
5346     pub cond: Src,
5347 }
5348 
5349 impl DisplayOp for OpBreak {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5350     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5351         write!(f, "break {} {}", self.bar_in, self.cond)
5352     }
5353 }
5354 impl_display_for_op!(OpBreak);
5355 
5356 #[repr(C)]
5357 #[derive(SrcsAsSlice, DstsAsSlice)]
5358 pub struct OpBSSy {
5359     #[dst_type(Bar)]
5360     pub bar_out: Dst,
5361 
5362     #[src_type(Pred)]
5363     pub bar_in: Src,
5364 
5365     #[src_type(Pred)]
5366     pub cond: Src,
5367 
5368     pub target: Label,
5369 }
5370 
5371 impl DisplayOp for OpBSSy {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5372     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5373         write!(f, "bssy {} {} {}", self.bar_in, self.cond, self.target)
5374     }
5375 }
5376 impl_display_for_op!(OpBSSy);
5377 
5378 #[repr(C)]
5379 #[derive(SrcsAsSlice, DstsAsSlice)]
5380 pub struct OpBSync {
5381     #[src_type(Bar)]
5382     pub bar: Src,
5383 
5384     #[src_type(Pred)]
5385     pub cond: Src,
5386 }
5387 
5388 impl DisplayOp for OpBSync {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5389     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5390         write!(f, "bsync {} {}", self.bar, self.cond)
5391     }
5392 }
5393 impl_display_for_op!(OpBSync);
5394 
5395 #[repr(C)]
5396 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
5397 pub struct OpBra {
5398     pub target: Label,
5399 }
5400 
5401 impl DisplayOp for OpBra {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5402     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5403         write!(f, "bra {}", self.target)
5404     }
5405 }
5406 impl_display_for_op!(OpBra);
5407 
5408 #[repr(C)]
5409 #[derive(SrcsAsSlice, DstsAsSlice)]
5410 pub struct OpSSy {
5411     pub target: Label,
5412 }
5413 
5414 impl DisplayOp for OpSSy {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5415     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5416         write!(f, "ssy {}", self.target)
5417     }
5418 }
5419 impl_display_for_op!(OpSSy);
5420 
5421 #[repr(C)]
5422 #[derive(SrcsAsSlice, DstsAsSlice)]
5423 pub struct OpSync {
5424     pub target: Label,
5425 }
5426 
5427 impl DisplayOp for OpSync {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5428     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5429         write!(f, "sync {}", self.target)
5430     }
5431 }
5432 impl_display_for_op!(OpSync);
5433 
5434 #[repr(C)]
5435 #[derive(SrcsAsSlice, DstsAsSlice)]
5436 pub struct OpBrk {
5437     pub target: Label,
5438 }
5439 
5440 impl DisplayOp for OpBrk {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5441     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5442         write!(f, "brk {}", self.target)
5443     }
5444 }
5445 impl_display_for_op!(OpBrk);
5446 
5447 #[repr(C)]
5448 #[derive(SrcsAsSlice, DstsAsSlice)]
5449 pub struct OpPBk {
5450     pub target: Label,
5451 }
5452 
5453 impl DisplayOp for OpPBk {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5454     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5455         write!(f, "pbk {}", self.target)
5456     }
5457 }
5458 impl_display_for_op!(OpPBk);
5459 
5460 #[repr(C)]
5461 #[derive(SrcsAsSlice, DstsAsSlice)]
5462 pub struct OpCont {
5463     pub target: Label,
5464 }
5465 
5466 impl DisplayOp for OpCont {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5467     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5468         write!(f, "cont {}", self.target)
5469     }
5470 }
5471 impl_display_for_op!(OpCont);
5472 
5473 #[repr(C)]
5474 #[derive(SrcsAsSlice, DstsAsSlice)]
5475 pub struct OpPCnt {
5476     pub target: Label,
5477 }
5478 
5479 impl DisplayOp for OpPCnt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5480     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5481         write!(f, "pcnt {}", self.target)
5482     }
5483 }
5484 impl_display_for_op!(OpPCnt);
5485 
5486 #[repr(C)]
5487 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
5488 pub struct OpExit {}
5489 
5490 impl DisplayOp for OpExit {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5491     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5492         write!(f, "exit")
5493     }
5494 }
5495 impl_display_for_op!(OpExit);
5496 
5497 #[repr(C)]
5498 #[derive(SrcsAsSlice, DstsAsSlice)]
5499 pub struct OpWarpSync {
5500     pub mask: u32,
5501 }
5502 
5503 impl DisplayOp for OpWarpSync {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5504     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5505         write!(f, "warpsync 0x{:x}", self.mask)
5506     }
5507 }
5508 impl_display_for_op!(OpWarpSync);
5509 
5510 #[repr(C)]
5511 #[derive(SrcsAsSlice, DstsAsSlice)]
5512 pub struct OpBar {}
5513 
5514 impl DisplayOp for OpBar {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5515     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5516         write!(f, "bar.sync")
5517     }
5518 }
5519 impl_display_for_op!(OpBar);
5520 
5521 #[repr(C)]
5522 #[derive(SrcsAsSlice, DstsAsSlice)]
5523 pub struct OpCS2R {
5524     pub dst: Dst,
5525     pub idx: u8,
5526 }
5527 
5528 impl DisplayOp for OpCS2R {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5529     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5530         write!(f, "cs2r sr[{:#x}]", self.idx)
5531     }
5532 }
5533 impl_display_for_op!(OpCS2R);
5534 
5535 #[repr(C)]
5536 #[derive(SrcsAsSlice, DstsAsSlice)]
5537 pub struct OpIsberd {
5538     #[dst_type(GPR)]
5539     pub dst: Dst,
5540 
5541     #[src_type(SSA)]
5542     pub idx: Src,
5543 }
5544 
5545 impl DisplayOp for OpIsberd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5546     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5547         write!(f, "isberd [{}]", self.idx)
5548     }
5549 }
5550 impl_display_for_op!(OpIsberd);
5551 
5552 #[repr(C)]
5553 #[derive(SrcsAsSlice, DstsAsSlice)]
5554 pub struct OpKill {}
5555 
5556 impl DisplayOp for OpKill {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5557     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5558         write!(f, "kill")
5559     }
5560 }
5561 impl_display_for_op!(OpKill);
5562 
5563 #[repr(C)]
5564 #[derive(SrcsAsSlice, DstsAsSlice)]
5565 pub struct OpNop {
5566     pub label: Option<Label>,
5567 }
5568 
5569 impl DisplayOp for OpNop {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5570     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5571         write!(f, "nop")?;
5572         if let Some(label) = &self.label {
5573             write!(f, " {}", label)?;
5574         }
5575         Ok(())
5576     }
5577 }
5578 impl_display_for_op!(OpNop);
5579 
5580 #[allow(dead_code)]
5581 pub enum PixVal {
5582     MsCount,
5583     CovMask,
5584     Covered,
5585     Offset,
5586     CentroidOffset,
5587     MyIndex,
5588     InnerCoverage,
5589 }
5590 
5591 impl fmt::Display for PixVal {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5592     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5593         match self {
5594             PixVal::MsCount => write!(f, ".mscount"),
5595             PixVal::CovMask => write!(f, ".covmask"),
5596             PixVal::Covered => write!(f, ".covered"),
5597             PixVal::Offset => write!(f, ".offset"),
5598             PixVal::CentroidOffset => write!(f, ".centroid_offset"),
5599             PixVal::MyIndex => write!(f, ".my_index"),
5600             PixVal::InnerCoverage => write!(f, ".inner_coverage"),
5601         }
5602     }
5603 }
5604 
5605 #[repr(C)]
5606 #[derive(SrcsAsSlice, DstsAsSlice)]
5607 pub struct OpPixLd {
5608     pub dst: Dst,
5609     pub val: PixVal,
5610 }
5611 
5612 impl DisplayOp for OpPixLd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5613     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5614         write!(f, "pixld{}", self.val)
5615     }
5616 }
5617 impl_display_for_op!(OpPixLd);
5618 
5619 #[repr(C)]
5620 #[derive(SrcsAsSlice, DstsAsSlice)]
5621 pub struct OpS2R {
5622     pub dst: Dst,
5623     pub idx: u8,
5624 }
5625 
5626 impl DisplayOp for OpS2R {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5627     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5628         write!(f, "s2r sr[{:#x}]", self.idx)
5629     }
5630 }
5631 impl_display_for_op!(OpS2R);
5632 
5633 pub enum VoteOp {
5634     Any,
5635     All,
5636     Eq,
5637 }
5638 
5639 impl fmt::Display for VoteOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5640     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5641         match self {
5642             VoteOp::Any => write!(f, "any"),
5643             VoteOp::All => write!(f, "all"),
5644             VoteOp::Eq => write!(f, "eq"),
5645         }
5646     }
5647 }
5648 
5649 #[repr(C)]
5650 #[derive(SrcsAsSlice, DstsAsSlice)]
5651 pub struct OpVote {
5652     pub op: VoteOp,
5653 
5654     #[dst_type(GPR)]
5655     pub ballot: Dst,
5656 
5657     #[dst_type(Pred)]
5658     pub vote: Dst,
5659 
5660     #[src_type(Pred)]
5661     pub pred: Src,
5662 }
5663 
5664 impl DisplayOp for OpVote {
fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5665     fn fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5666         if self.ballot.is_none() && self.vote.is_none() {
5667             write!(f, "none")
5668         } else {
5669             if !self.ballot.is_none() {
5670                 write!(f, "{}", self.ballot)?;
5671             }
5672             if !self.vote.is_none() {
5673                 write!(f, "{}", self.vote)?;
5674             }
5675             Ok(())
5676         }
5677     }
5678 
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5679     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5680         write!(f, "vote.{} {}", self.op, self.pred)
5681     }
5682 }
5683 impl_display_for_op!(OpVote);
5684 
5685 #[repr(C)]
5686 #[derive(SrcsAsSlice, DstsAsSlice)]
5687 pub struct OpUndef {
5688     pub dst: Dst,
5689 }
5690 
5691 impl DisplayOp for OpUndef {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5692     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5693         write!(f, "undef {}", self.dst)
5694     }
5695 }
5696 impl_display_for_op!(OpUndef);
5697 
5698 #[repr(C)]
5699 #[derive(SrcsAsSlice, DstsAsSlice)]
5700 pub struct OpSrcBar {
5701     pub src: Src,
5702 }
5703 
5704 impl DisplayOp for OpSrcBar {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5705     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5706         write!(f, "src_bar {}", self.src)
5707     }
5708 }
5709 impl_display_for_op!(OpSrcBar);
5710 
5711 pub struct VecPair<A, B> {
5712     a: Vec<A>,
5713     b: Vec<B>,
5714 }
5715 
5716 impl<A, B> VecPair<A, B> {
append(&mut self, other: &mut VecPair<A, B>)5717     pub fn append(&mut self, other: &mut VecPair<A, B>) {
5718         self.a.append(&mut other.a);
5719         self.b.append(&mut other.b);
5720     }
5721 
is_empty(&self) -> bool5722     pub fn is_empty(&self) -> bool {
5723         debug_assert!(self.a.len() == self.b.len());
5724         self.a.is_empty()
5725     }
5726 
iter(&self) -> Zip<slice::Iter<'_, A>, slice::Iter<'_, B>>5727     pub fn iter(&self) -> Zip<slice::Iter<'_, A>, slice::Iter<'_, B>> {
5728         debug_assert!(self.a.len() == self.b.len());
5729         self.a.iter().zip(self.b.iter())
5730     }
5731 
iter_mut( &mut self, ) -> Zip<slice::IterMut<'_, A>, slice::IterMut<'_, B>>5732     pub fn iter_mut(
5733         &mut self,
5734     ) -> Zip<slice::IterMut<'_, A>, slice::IterMut<'_, B>> {
5735         debug_assert!(self.a.len() == self.b.len());
5736         self.a.iter_mut().zip(self.b.iter_mut())
5737     }
5738 
len(&self) -> usize5739     pub fn len(&self) -> usize {
5740         debug_assert!(self.a.len() == self.b.len());
5741         self.a.len()
5742     }
5743 
new() -> Self5744     pub fn new() -> Self {
5745         Self {
5746             a: Vec::new(),
5747             b: Vec::new(),
5748         }
5749     }
5750 
push(&mut self, a: A, b: B)5751     pub fn push(&mut self, a: A, b: B) {
5752         debug_assert!(self.a.len() == self.b.len());
5753         self.a.push(a);
5754         self.b.push(b);
5755     }
5756 }
5757 
5758 impl<A: Clone, B: Clone> VecPair<A, B> {
retain(&mut self, mut f: impl FnMut(&A, &B) -> bool)5759     pub fn retain(&mut self, mut f: impl FnMut(&A, &B) -> bool) {
5760         debug_assert!(self.a.len() == self.b.len());
5761         let len = self.a.len();
5762         let mut i = 0_usize;
5763         while i < len {
5764             if !f(&self.a[i], &self.b[i]) {
5765                 break;
5766             }
5767             i += 1;
5768         }
5769 
5770         let mut new_len = i;
5771 
5772         // Don't check this one twice.
5773         i += 1;
5774 
5775         while i < len {
5776             // This could be more efficient but it's good enough for our
5777             // purposes since everything we're storing is small and has a
5778             // trivial Drop.
5779             if f(&self.a[i], &self.b[i]) {
5780                 self.a[new_len] = self.a[i].clone();
5781                 self.b[new_len] = self.b[i].clone();
5782                 new_len += 1;
5783             }
5784             i += 1;
5785         }
5786 
5787         if new_len < len {
5788             self.a.truncate(new_len);
5789             self.b.truncate(new_len);
5790         }
5791     }
5792 }
5793 
5794 pub struct PhiAllocator {
5795     count: u32,
5796 }
5797 
5798 impl PhiAllocator {
new() -> PhiAllocator5799     pub fn new() -> PhiAllocator {
5800         PhiAllocator { count: 0 }
5801     }
5802 
alloc(&mut self) -> u325803     pub fn alloc(&mut self) -> u32 {
5804         let idx = self.count;
5805         self.count = idx + 1;
5806         idx
5807     }
5808 }
5809 
5810 #[repr(C)]
5811 #[derive(DstsAsSlice)]
5812 pub struct OpPhiSrcs {
5813     pub srcs: VecPair<u32, Src>,
5814 }
5815 
5816 impl OpPhiSrcs {
new() -> OpPhiSrcs5817     pub fn new() -> OpPhiSrcs {
5818         OpPhiSrcs {
5819             srcs: VecPair::new(),
5820         }
5821     }
5822 }
5823 
5824 impl AsSlice<Src> for OpPhiSrcs {
5825     type Attr = SrcType;
5826 
as_slice(&self) -> &[Src]5827     fn as_slice(&self) -> &[Src] {
5828         &self.srcs.b
5829     }
5830 
as_mut_slice(&mut self) -> &mut [Src]5831     fn as_mut_slice(&mut self) -> &mut [Src] {
5832         &mut self.srcs.b
5833     }
5834 
attrs(&self) -> SrcTypeList5835     fn attrs(&self) -> SrcTypeList {
5836         SrcTypeList::Uniform(SrcType::GPR)
5837     }
5838 }
5839 
5840 impl DisplayOp for OpPhiSrcs {
fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result5841     fn fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
5842         Ok(())
5843     }
5844 
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5845     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5846         write!(f, "phi_src ")?;
5847         for (i, (id, src)) in self.srcs.iter().enumerate() {
5848             if i > 0 {
5849                 write!(f, ", ")?;
5850             }
5851             write!(f, "φ{} = {}", id, src)?;
5852         }
5853         Ok(())
5854     }
5855 }
5856 impl_display_for_op!(OpPhiSrcs);
5857 
5858 #[repr(C)]
5859 #[derive(SrcsAsSlice)]
5860 pub struct OpPhiDsts {
5861     pub dsts: VecPair<u32, Dst>,
5862 }
5863 
5864 impl OpPhiDsts {
new() -> OpPhiDsts5865     pub fn new() -> OpPhiDsts {
5866         OpPhiDsts {
5867             dsts: VecPair::new(),
5868         }
5869     }
5870 }
5871 
5872 impl AsSlice<Dst> for OpPhiDsts {
5873     type Attr = DstType;
5874 
as_slice(&self) -> &[Dst]5875     fn as_slice(&self) -> &[Dst] {
5876         &self.dsts.b
5877     }
5878 
as_mut_slice(&mut self) -> &mut [Dst]5879     fn as_mut_slice(&mut self) -> &mut [Dst] {
5880         &mut self.dsts.b
5881     }
5882 
attrs(&self) -> DstTypeList5883     fn attrs(&self) -> DstTypeList {
5884         DstTypeList::Uniform(DstType::Vec)
5885     }
5886 }
5887 
5888 impl DisplayOp for OpPhiDsts {
fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result5889     fn fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
5890         Ok(())
5891     }
5892 
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5893     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5894         write!(f, "phi_dst ")?;
5895         for (i, (id, dst)) in self.dsts.iter().enumerate() {
5896             if i > 0 {
5897                 write!(f, ", ")?;
5898             }
5899             write!(f, "{} = φ{}", dst, id)?;
5900         }
5901         Ok(())
5902     }
5903 }
5904 impl_display_for_op!(OpPhiDsts);
5905 
5906 #[repr(C)]
5907 #[derive(SrcsAsSlice, DstsAsSlice)]
5908 pub struct OpCopy {
5909     pub dst: Dst,
5910     pub src: Src,
5911 }
5912 
5913 impl DisplayOp for OpCopy {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5914     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5915         write!(f, "copy {}", self.src)
5916     }
5917 }
5918 impl_display_for_op!(OpCopy);
5919 
5920 #[repr(C)]
5921 #[derive(SrcsAsSlice, DstsAsSlice)]
5922 /// Copies a value and pins its destination in the register file
5923 pub struct OpPin {
5924     pub dst: Dst,
5925     #[src_type(SSA)]
5926     pub src: Src,
5927 }
5928 
5929 impl DisplayOp for OpPin {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5930     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5931         write!(f, "pin {}", self.src)
5932     }
5933 }
5934 impl_display_for_op!(OpPin);
5935 
5936 #[repr(C)]
5937 #[derive(SrcsAsSlice, DstsAsSlice)]
5938 /// Copies a pinned value to an unpinned value
5939 pub struct OpUnpin {
5940     pub dst: Dst,
5941     #[src_type(SSA)]
5942     pub src: Src,
5943 }
5944 
5945 impl DisplayOp for OpUnpin {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5946     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5947         write!(f, "unpin {}", self.src)
5948     }
5949 }
5950 impl_display_for_op!(OpUnpin);
5951 
5952 #[repr(C)]
5953 #[derive(SrcsAsSlice, DstsAsSlice)]
5954 pub struct OpSwap {
5955     pub dsts: [Dst; 2],
5956     pub srcs: [Src; 2],
5957 }
5958 
5959 impl DisplayOp for OpSwap {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5960     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5961         write!(f, "swap {} {}", self.srcs[0], self.srcs[1])
5962     }
5963 }
5964 impl_display_for_op!(OpSwap);
5965 
5966 #[repr(C)]
5967 pub struct OpParCopy {
5968     pub dsts_srcs: VecPair<Dst, Src>,
5969     pub tmp: Option<RegRef>,
5970 }
5971 
5972 impl OpParCopy {
new() -> OpParCopy5973     pub fn new() -> OpParCopy {
5974         OpParCopy {
5975             dsts_srcs: VecPair::new(),
5976             tmp: None,
5977         }
5978     }
5979 
is_empty(&self) -> bool5980     pub fn is_empty(&self) -> bool {
5981         self.dsts_srcs.is_empty()
5982     }
5983 
push(&mut self, dst: Dst, src: Src)5984     pub fn push(&mut self, dst: Dst, src: Src) {
5985         self.dsts_srcs.push(dst, src);
5986     }
5987 }
5988 
5989 impl AsSlice<Src> for OpParCopy {
5990     type Attr = SrcType;
5991 
as_slice(&self) -> &[Src]5992     fn as_slice(&self) -> &[Src] {
5993         &self.dsts_srcs.b
5994     }
5995 
as_mut_slice(&mut self) -> &mut [Src]5996     fn as_mut_slice(&mut self) -> &mut [Src] {
5997         &mut self.dsts_srcs.b
5998     }
5999 
attrs(&self) -> SrcTypeList6000     fn attrs(&self) -> SrcTypeList {
6001         SrcTypeList::Uniform(SrcType::GPR)
6002     }
6003 }
6004 
6005 impl AsSlice<Dst> for OpParCopy {
6006     type Attr = DstType;
6007 
as_slice(&self) -> &[Dst]6008     fn as_slice(&self) -> &[Dst] {
6009         &self.dsts_srcs.a
6010     }
6011 
as_mut_slice(&mut self) -> &mut [Dst]6012     fn as_mut_slice(&mut self) -> &mut [Dst] {
6013         &mut self.dsts_srcs.a
6014     }
6015 
attrs(&self) -> DstTypeList6016     fn attrs(&self) -> DstTypeList {
6017         DstTypeList::Uniform(DstType::Vec)
6018     }
6019 }
6020 
6021 impl DisplayOp for OpParCopy {
fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result6022     fn fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
6023         Ok(())
6024     }
6025 
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6026     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6027         write!(f, "par_copy")?;
6028         for (i, (dst, src)) in self.dsts_srcs.iter().enumerate() {
6029             if i > 0 {
6030                 write!(f, ",")?;
6031             }
6032             write!(f, " {} = {}", dst, src)?;
6033         }
6034         Ok(())
6035     }
6036 }
6037 impl_display_for_op!(OpParCopy);
6038 
6039 #[repr(C)]
6040 #[derive(DstsAsSlice)]
6041 pub struct OpRegOut {
6042     pub srcs: Vec<Src>,
6043 }
6044 
6045 impl AsSlice<Src> for OpRegOut {
6046     type Attr = SrcType;
6047 
as_slice(&self) -> &[Src]6048     fn as_slice(&self) -> &[Src] {
6049         &self.srcs
6050     }
6051 
as_mut_slice(&mut self) -> &mut [Src]6052     fn as_mut_slice(&mut self) -> &mut [Src] {
6053         &mut self.srcs
6054     }
6055 
attrs(&self) -> SrcTypeList6056     fn attrs(&self) -> SrcTypeList {
6057         SrcTypeList::Uniform(SrcType::GPR)
6058     }
6059 }
6060 
6061 impl DisplayOp for OpRegOut {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6062     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6063         write!(f, "reg_out {{")?;
6064         for (i, src) in self.srcs.iter().enumerate() {
6065             if i > 0 {
6066                 write!(f, ",")?;
6067             }
6068             write!(f, " {}", src)?;
6069         }
6070         write!(f, " }}")
6071     }
6072 }
6073 impl_display_for_op!(OpRegOut);
6074 
6075 #[derive(Copy, Clone, Debug, PartialEq)]
6076 pub enum OutType {
6077     Emit,
6078     Cut,
6079     EmitThenCut,
6080 }
6081 
6082 impl fmt::Display for OutType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6083     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6084         match self {
6085             OutType::Emit => write!(f, "emit"),
6086             OutType::Cut => write!(f, "cut"),
6087             OutType::EmitThenCut => write!(f, "emit_then_cut"),
6088         }
6089     }
6090 }
6091 
6092 #[repr(C)]
6093 #[derive(SrcsAsSlice, DstsAsSlice)]
6094 pub struct OpOut {
6095     pub dst: Dst,
6096 
6097     #[src_type(SSA)]
6098     pub handle: Src,
6099 
6100     #[src_type(ALU)]
6101     pub stream: Src,
6102 
6103     pub out_type: OutType,
6104 }
6105 
6106 impl DisplayOp for OpOut {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6107     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6108         write!(f, "out.{} {} {}", self.out_type, self.handle, self.stream)
6109     }
6110 }
6111 impl_display_for_op!(OpOut);
6112 
6113 #[repr(C)]
6114 #[derive(SrcsAsSlice, DstsAsSlice)]
6115 pub struct OpOutFinal {
6116     #[src_type(SSA)]
6117     pub handle: Src,
6118 }
6119 
6120 impl DisplayOp for OpOutFinal {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6121     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6122         write!(f, "out.final {{ {} }}", self.handle)
6123     }
6124 }
6125 impl_display_for_op!(OpOutFinal);
6126 
6127 /// Describes an annotation on an instruction.
6128 #[repr(C)]
6129 #[derive(SrcsAsSlice, DstsAsSlice)]
6130 pub struct OpAnnotate {
6131     /// The annotation
6132     pub annotation: String,
6133 }
6134 
6135 impl DisplayOp for OpAnnotate {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6136     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6137         write!(f, "// {}", self.annotation)
6138     }
6139 }
6140 
6141 impl fmt::Display for OpAnnotate {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6142     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6143         self.fmt_op(f)
6144     }
6145 }
6146 
6147 #[derive(DisplayOp, DstsAsSlice, SrcsAsSlice, FromVariants)]
6148 pub enum Op {
6149     FAdd(OpFAdd),
6150     FFma(OpFFma),
6151     FMnMx(OpFMnMx),
6152     FMul(OpFMul),
6153     Rro(OpRro),
6154     MuFu(OpMuFu),
6155     FSet(OpFSet),
6156     FSetP(OpFSetP),
6157     FSwzAdd(OpFSwzAdd),
6158     DAdd(OpDAdd),
6159     DFma(OpDFma),
6160     DMnMx(OpDMnMx),
6161     DMul(OpDMul),
6162     DSetP(OpDSetP),
6163     HAdd2(OpHAdd2),
6164     HFma2(OpHFma2),
6165     HMul2(OpHMul2),
6166     HSet2(OpHSet2),
6167     HSetP2(OpHSetP2),
6168     HMnMx2(OpHMnMx2),
6169     BMsk(OpBMsk),
6170     BRev(OpBRev),
6171     Bfe(OpBfe),
6172     Flo(OpFlo),
6173     IAbs(OpIAbs),
6174     IAdd2(OpIAdd2),
6175     IAdd2X(OpIAdd2X),
6176     IAdd3(OpIAdd3),
6177     IAdd3X(OpIAdd3X),
6178     IDp4(OpIDp4),
6179     IMad(OpIMad),
6180     IMad64(OpIMad64),
6181     IMul(OpIMul),
6182     IMnMx(OpIMnMx),
6183     ISetP(OpISetP),
6184     Lop2(OpLop2),
6185     Lop3(OpLop3),
6186     PopC(OpPopC),
6187     Shf(OpShf),
6188     Shl(OpShl),
6189     Shr(OpShr),
6190     F2F(OpF2F),
6191     F2FP(OpF2FP),
6192     F2I(OpF2I),
6193     I2F(OpI2F),
6194     I2I(OpI2I),
6195     FRnd(OpFRnd),
6196     Mov(OpMov),
6197     Prmt(OpPrmt),
6198     Sel(OpSel),
6199     Shfl(OpShfl),
6200     PLop3(OpPLop3),
6201     PSetP(OpPSetP),
6202     R2UR(OpR2UR),
6203     Tex(OpTex),
6204     Tld(OpTld),
6205     Tld4(OpTld4),
6206     Tmml(OpTmml),
6207     Txd(OpTxd),
6208     Txq(OpTxq),
6209     SuLd(OpSuLd),
6210     SuSt(OpSuSt),
6211     SuAtom(OpSuAtom),
6212     Ld(OpLd),
6213     Ldc(OpLdc),
6214     St(OpSt),
6215     Atom(OpAtom),
6216     AL2P(OpAL2P),
6217     ALd(OpALd),
6218     ASt(OpASt),
6219     Ipa(OpIpa),
6220     LdTram(OpLdTram),
6221     CCtl(OpCCtl),
6222     MemBar(OpMemBar),
6223     BClear(OpBClear),
6224     BMov(OpBMov),
6225     Break(OpBreak),
6226     BSSy(OpBSSy),
6227     BSync(OpBSync),
6228     Bra(OpBra),
6229     SSy(OpSSy),
6230     Sync(OpSync),
6231     Brk(OpBrk),
6232     PBk(OpPBk),
6233     Cont(OpCont),
6234     PCnt(OpPCnt),
6235     Exit(OpExit),
6236     WarpSync(OpWarpSync),
6237     Bar(OpBar),
6238     CS2R(OpCS2R),
6239     Isberd(OpIsberd),
6240     Kill(OpKill),
6241     Nop(OpNop),
6242     PixLd(OpPixLd),
6243     S2R(OpS2R),
6244     Vote(OpVote),
6245     Undef(OpUndef),
6246     SrcBar(OpSrcBar),
6247     PhiSrcs(OpPhiSrcs),
6248     PhiDsts(OpPhiDsts),
6249     Copy(OpCopy),
6250     Pin(OpPin),
6251     Unpin(OpUnpin),
6252     Swap(OpSwap),
6253     ParCopy(OpParCopy),
6254     RegOut(OpRegOut),
6255     Out(OpOut),
6256     OutFinal(OpOutFinal),
6257     Annotate(OpAnnotate),
6258 }
6259 impl_display_for_op!(Op);
6260 
6261 impl Op {
is_branch(&self) -> bool6262     pub fn is_branch(&self) -> bool {
6263         match self {
6264             Op::Bra(_)
6265             | Op::Sync(_)
6266             | Op::Brk(_)
6267             | Op::Cont(_)
6268             | Op::Exit(_) => true,
6269             _ => false,
6270         }
6271     }
6272 }
6273 
6274 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
6275 pub enum PredRef {
6276     None,
6277     SSA(SSAValue),
6278     Reg(RegRef),
6279 }
6280 
6281 impl PredRef {
6282     #[allow(dead_code)]
as_reg(&self) -> Option<&RegRef>6283     pub fn as_reg(&self) -> Option<&RegRef> {
6284         match self {
6285             PredRef::Reg(r) => Some(r),
6286             _ => None,
6287         }
6288     }
6289 
6290     #[allow(dead_code)]
as_ssa(&self) -> Option<&SSAValue>6291     pub fn as_ssa(&self) -> Option<&SSAValue> {
6292         match self {
6293             PredRef::SSA(r) => Some(r),
6294             _ => None,
6295         }
6296     }
6297 
is_none(&self) -> bool6298     pub fn is_none(&self) -> bool {
6299         matches!(self, PredRef::None)
6300     }
6301 
iter_ssa(&self) -> slice::Iter<'_, SSAValue>6302     pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
6303         match self {
6304             PredRef::None | PredRef::Reg(_) => &[],
6305             PredRef::SSA(ssa) => slice::from_ref(ssa),
6306         }
6307         .iter()
6308     }
6309 
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>6310     pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
6311         match self {
6312             PredRef::None | PredRef::Reg(_) => &mut [],
6313             PredRef::SSA(ssa) => slice::from_mut(ssa),
6314         }
6315         .iter_mut()
6316     }
6317 }
6318 
6319 impl From<RegRef> for PredRef {
from(reg: RegRef) -> PredRef6320     fn from(reg: RegRef) -> PredRef {
6321         PredRef::Reg(reg)
6322     }
6323 }
6324 
6325 impl From<SSAValue> for PredRef {
from(ssa: SSAValue) -> PredRef6326     fn from(ssa: SSAValue) -> PredRef {
6327         PredRef::SSA(ssa)
6328     }
6329 }
6330 
6331 impl fmt::Display for PredRef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6332     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6333         match self {
6334             PredRef::None => write!(f, "pT"),
6335             PredRef::SSA(ssa) => ssa.fmt_plain(f),
6336             PredRef::Reg(reg) => reg.fmt(f),
6337         }
6338     }
6339 }
6340 
6341 #[derive(Clone, Copy)]
6342 pub struct Pred {
6343     pub pred_ref: PredRef,
6344     pub pred_inv: bool,
6345 }
6346 
6347 impl Pred {
is_true(&self) -> bool6348     pub fn is_true(&self) -> bool {
6349         self.pred_ref.is_none() && !self.pred_inv
6350     }
6351 
is_false(&self) -> bool6352     pub fn is_false(&self) -> bool {
6353         self.pred_ref.is_none() && self.pred_inv
6354     }
6355 
iter_ssa(&self) -> slice::Iter<'_, SSAValue>6356     pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
6357         self.pred_ref.iter_ssa()
6358     }
6359 
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>6360     pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
6361         self.pred_ref.iter_ssa_mut()
6362     }
6363 
bnot(self) -> Self6364     pub fn bnot(self) -> Self {
6365         Pred {
6366             pred_ref: self.pred_ref,
6367             pred_inv: !self.pred_inv,
6368         }
6369     }
6370 }
6371 
6372 impl From<bool> for Pred {
from(b: bool) -> Self6373     fn from(b: bool) -> Self {
6374         Pred {
6375             pred_ref: PredRef::None,
6376             pred_inv: !b,
6377         }
6378     }
6379 }
6380 
6381 impl<T: Into<PredRef>> From<T> for Pred {
from(p: T) -> Self6382     fn from(p: T) -> Self {
6383         Pred {
6384             pred_ref: p.into(),
6385             pred_inv: false,
6386         }
6387     }
6388 }
6389 
6390 impl fmt::Display for Pred {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6391     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6392         if self.pred_inv {
6393             write!(f, "!")?;
6394         }
6395         self.pred_ref.fmt(f)
6396     }
6397 }
6398 
6399 pub const MIN_INSTR_DELAY: u8 = 1;
6400 pub const MAX_INSTR_DELAY: u8 = 15;
6401 
6402 pub struct InstrDeps {
6403     pub delay: u8,
6404     pub yld: bool,
6405     wr_bar: i8,
6406     rd_bar: i8,
6407     pub wt_bar_mask: u8,
6408     pub reuse_mask: u8,
6409 }
6410 
6411 impl InstrDeps {
new() -> InstrDeps6412     pub fn new() -> InstrDeps {
6413         InstrDeps {
6414             delay: 0,
6415             yld: false,
6416             wr_bar: -1,
6417             rd_bar: -1,
6418             wt_bar_mask: 0,
6419             reuse_mask: 0,
6420         }
6421     }
6422 
rd_bar(&self) -> Option<u8>6423     pub fn rd_bar(&self) -> Option<u8> {
6424         if self.rd_bar < 0 {
6425             None
6426         } else {
6427             Some(self.rd_bar.try_into().unwrap())
6428         }
6429     }
6430 
wr_bar(&self) -> Option<u8>6431     pub fn wr_bar(&self) -> Option<u8> {
6432         if self.wr_bar < 0 {
6433             None
6434         } else {
6435             Some(self.wr_bar.try_into().unwrap())
6436         }
6437     }
6438 
set_delay(&mut self, delay: u8)6439     pub fn set_delay(&mut self, delay: u8) {
6440         assert!(delay <= MAX_INSTR_DELAY);
6441         self.delay = delay;
6442     }
6443 
set_yield(&mut self, yld: bool)6444     pub fn set_yield(&mut self, yld: bool) {
6445         self.yld = yld;
6446     }
6447 
set_rd_bar(&mut self, idx: u8)6448     pub fn set_rd_bar(&mut self, idx: u8) {
6449         assert!(idx < 6);
6450         self.rd_bar = idx.try_into().unwrap();
6451     }
6452 
set_wr_bar(&mut self, idx: u8)6453     pub fn set_wr_bar(&mut self, idx: u8) {
6454         assert!(idx < 6);
6455         self.wr_bar = idx.try_into().unwrap();
6456     }
6457 
add_wt_bar(&mut self, idx: u8)6458     pub fn add_wt_bar(&mut self, idx: u8) {
6459         self.add_wt_bar_mask(1 << idx);
6460     }
6461 
add_wt_bar_mask(&mut self, bar_mask: u8)6462     pub fn add_wt_bar_mask(&mut self, bar_mask: u8) {
6463         assert!(bar_mask < 1 << 6);
6464         self.wt_bar_mask |= bar_mask;
6465     }
6466 
6467     #[allow(dead_code)]
add_reuse(&mut self, idx: u8)6468     pub fn add_reuse(&mut self, idx: u8) {
6469         assert!(idx < 6);
6470         self.reuse_mask |= 1_u8 << idx;
6471     }
6472 }
6473 
6474 impl fmt::Display for InstrDeps {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6475     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6476         if self.delay > 0 {
6477             write!(f, " delay={}", self.delay)?;
6478         }
6479         if self.wt_bar_mask != 0 {
6480             write!(f, " wt={:06b}", self.wt_bar_mask)?;
6481         }
6482         if self.rd_bar >= 0 {
6483             write!(f, " rd:{}", self.rd_bar)?;
6484         }
6485         if self.wr_bar >= 0 {
6486             write!(f, " wr:{}", self.wr_bar)?;
6487         }
6488         if self.reuse_mask != 0 {
6489             write!(f, " reuse={:06b}", self.reuse_mask)?;
6490         }
6491         if self.yld {
6492             write!(f, " yld")?;
6493         }
6494         Ok(())
6495     }
6496 }
6497 
6498 pub struct Instr {
6499     pub pred: Pred,
6500     pub op: Op,
6501     pub deps: InstrDeps,
6502 }
6503 
6504 impl Instr {
new(op: impl Into<Op>) -> Instr6505     pub fn new(op: impl Into<Op>) -> Instr {
6506         Instr {
6507             op: op.into(),
6508             pred: true.into(),
6509             deps: InstrDeps::new(),
6510         }
6511     }
6512 
new_boxed(op: impl Into<Op>) -> Box<Self>6513     pub fn new_boxed(op: impl Into<Op>) -> Box<Self> {
6514         Box::new(Instr::new(op))
6515     }
6516 
dsts(&self) -> &[Dst]6517     pub fn dsts(&self) -> &[Dst] {
6518         self.op.dsts_as_slice()
6519     }
6520 
dsts_mut(&mut self) -> &mut [Dst]6521     pub fn dsts_mut(&mut self) -> &mut [Dst] {
6522         self.op.dsts_as_mut_slice()
6523     }
6524 
srcs(&self) -> &[Src]6525     pub fn srcs(&self) -> &[Src] {
6526         self.op.srcs_as_slice()
6527     }
6528 
srcs_mut(&mut self) -> &mut [Src]6529     pub fn srcs_mut(&mut self) -> &mut [Src] {
6530         self.op.srcs_as_mut_slice()
6531     }
6532 
src_types(&self) -> SrcTypeList6533     pub fn src_types(&self) -> SrcTypeList {
6534         self.op.src_types()
6535     }
6536 
for_each_ssa_use(&self, mut f: impl FnMut(&SSAValue))6537     pub fn for_each_ssa_use(&self, mut f: impl FnMut(&SSAValue)) {
6538         for ssa in self.pred.iter_ssa() {
6539             f(ssa);
6540         }
6541         for src in self.srcs() {
6542             for ssa in src.iter_ssa() {
6543                 f(ssa);
6544             }
6545         }
6546     }
6547 
for_each_ssa_use_mut(&mut self, mut f: impl FnMut(&mut SSAValue))6548     pub fn for_each_ssa_use_mut(&mut self, mut f: impl FnMut(&mut SSAValue)) {
6549         for ssa in self.pred.iter_ssa_mut() {
6550             f(ssa);
6551         }
6552         for src in self.srcs_mut() {
6553             for ssa in src.iter_ssa_mut() {
6554                 f(ssa);
6555             }
6556         }
6557     }
6558 
for_each_ssa_def(&self, mut f: impl FnMut(&SSAValue))6559     pub fn for_each_ssa_def(&self, mut f: impl FnMut(&SSAValue)) {
6560         for dst in self.dsts() {
6561             for ssa in dst.iter_ssa() {
6562                 f(ssa);
6563             }
6564         }
6565     }
6566 
for_each_ssa_def_mut(&mut self, mut f: impl FnMut(&mut SSAValue))6567     pub fn for_each_ssa_def_mut(&mut self, mut f: impl FnMut(&mut SSAValue)) {
6568         for dst in self.dsts_mut() {
6569             for ssa in dst.iter_ssa_mut() {
6570                 f(ssa);
6571             }
6572         }
6573     }
6574 
is_branch(&self) -> bool6575     pub fn is_branch(&self) -> bool {
6576         self.op.is_branch()
6577     }
6578 
uses_global_mem(&self) -> bool6579     pub fn uses_global_mem(&self) -> bool {
6580         match &self.op {
6581             Op::Atom(op) => op.mem_space != MemSpace::Local,
6582             Op::Ld(op) => op.access.space != MemSpace::Local,
6583             Op::St(op) => op.access.space != MemSpace::Local,
6584             Op::SuAtom(_) | Op::SuLd(_) | Op::SuSt(_) => true,
6585             _ => false,
6586         }
6587     }
6588 
writes_global_mem(&self) -> bool6589     pub fn writes_global_mem(&self) -> bool {
6590         match &self.op {
6591             Op::Atom(op) => matches!(op.mem_space, MemSpace::Global(_)),
6592             Op::St(op) => matches!(op.access.space, MemSpace::Global(_)),
6593             Op::SuAtom(_) | Op::SuSt(_) => true,
6594             _ => false,
6595         }
6596     }
6597 
can_eliminate(&self) -> bool6598     pub fn can_eliminate(&self) -> bool {
6599         match &self.op {
6600             Op::ASt(_)
6601             | Op::SuSt(_)
6602             | Op::SuAtom(_)
6603             | Op::St(_)
6604             | Op::Atom(_)
6605             | Op::CCtl(_)
6606             | Op::MemBar(_)
6607             | Op::Kill(_)
6608             | Op::Nop(_)
6609             | Op::BSync(_)
6610             | Op::Bra(_)
6611             | Op::SSy(_)
6612             | Op::Sync(_)
6613             | Op::Brk(_)
6614             | Op::PBk(_)
6615             | Op::Cont(_)
6616             | Op::PCnt(_)
6617             | Op::Exit(_)
6618             | Op::WarpSync(_)
6619             | Op::Bar(_)
6620             | Op::RegOut(_)
6621             | Op::Out(_)
6622             | Op::OutFinal(_)
6623             | Op::Annotate(_) => false,
6624             Op::BMov(op) => !op.clear,
6625             _ => true,
6626         }
6627     }
6628 
is_uniform(&self) -> bool6629     pub fn is_uniform(&self) -> bool {
6630         match &self.op {
6631             Op::PhiDsts(_) => false,
6632             op => op.is_uniform(),
6633         }
6634     }
6635 
has_fixed_latency(&self, sm: u8) -> bool6636     pub fn has_fixed_latency(&self, sm: u8) -> bool {
6637         match &self.op {
6638             // Float ALU
6639             Op::F2FP(_)
6640             | Op::FAdd(_)
6641             | Op::FFma(_)
6642             | Op::FMnMx(_)
6643             | Op::FMul(_)
6644             | Op::FSet(_)
6645             | Op::FSetP(_)
6646             | Op::HAdd2(_)
6647             | Op::HFma2(_)
6648             | Op::HMul2(_)
6649             | Op::HSet2(_)
6650             | Op::HSetP2(_)
6651             | Op::HMnMx2(_)
6652             | Op::FSwzAdd(_) => true,
6653 
6654             // Multi-function unit is variable latency
6655             Op::Rro(_) | Op::MuFu(_) => false,
6656 
6657             // Double-precision float ALU
6658             Op::DAdd(_)
6659             | Op::DFma(_)
6660             | Op::DMnMx(_)
6661             | Op::DMul(_)
6662             | Op::DSetP(_) => false,
6663 
6664             // Integer ALU
6665             Op::BRev(_) | Op::Flo(_) | Op::PopC(_) => false,
6666             Op::IMad(_) | Op::IMul(_) => sm >= 70,
6667             Op::BMsk(_)
6668             | Op::IAbs(_)
6669             | Op::IAdd2(_)
6670             | Op::IAdd2X(_)
6671             | Op::IAdd3(_)
6672             | Op::IAdd3X(_)
6673             | Op::IDp4(_)
6674             | Op::IMad64(_)
6675             | Op::IMnMx(_)
6676             | Op::ISetP(_)
6677             | Op::Lop2(_)
6678             | Op::Lop3(_)
6679             | Op::Shf(_)
6680             | Op::Shl(_)
6681             | Op::Shr(_)
6682             | Op::Bfe(_) => true,
6683 
6684             // Conversions are variable latency?!?
6685             Op::F2F(_) | Op::F2I(_) | Op::I2F(_) | Op::I2I(_) | Op::FRnd(_) => {
6686                 false
6687             }
6688 
6689             // Move ops
6690             Op::Mov(_) | Op::Prmt(_) | Op::Sel(_) => true,
6691             Op::Shfl(_) => false,
6692 
6693             // Predicate ops
6694             Op::PLop3(_) | Op::PSetP(_) => true,
6695 
6696             // Uniform ops
6697             Op::R2UR(_) => false,
6698 
6699             // Texture ops
6700             Op::Tex(_)
6701             | Op::Tld(_)
6702             | Op::Tld4(_)
6703             | Op::Tmml(_)
6704             | Op::Txd(_)
6705             | Op::Txq(_) => false,
6706 
6707             // Surface ops
6708             Op::SuLd(_) | Op::SuSt(_) | Op::SuAtom(_) => false,
6709 
6710             // Memory ops
6711             Op::Ld(_)
6712             | Op::Ldc(_)
6713             | Op::St(_)
6714             | Op::Atom(_)
6715             | Op::AL2P(_)
6716             | Op::ALd(_)
6717             | Op::ASt(_)
6718             | Op::Ipa(_)
6719             | Op::CCtl(_)
6720             | Op::LdTram(_)
6721             | Op::MemBar(_) => false,
6722 
6723             // Control-flow ops
6724             Op::BClear(_) | Op::Break(_) | Op::BSSy(_) | Op::BSync(_) => true,
6725             Op::SSy(_)
6726             | Op::Sync(_)
6727             | Op::Brk(_)
6728             | Op::PBk(_)
6729             | Op::Cont(_)
6730             | Op::PCnt(_) => true,
6731             Op::Bra(_) | Op::Exit(_) => true,
6732             Op::WarpSync(_) => false,
6733 
6734             // The barrier half is HW scoreboarded by the GPR isn't.  When
6735             // moving from a GPR to a barrier, we still need a token for WaR
6736             // hazards.
6737             Op::BMov(_) => false,
6738 
6739             // Geometry ops
6740             Op::Out(_) | Op::OutFinal(_) => false,
6741 
6742             // Miscellaneous ops
6743             Op::Bar(_)
6744             | Op::CS2R(_)
6745             | Op::Isberd(_)
6746             | Op::Kill(_)
6747             | Op::PixLd(_)
6748             | Op::S2R(_) => false,
6749             Op::Nop(_) | Op::Vote(_) => true,
6750 
6751             // Virtual ops
6752             Op::Undef(_)
6753             | Op::SrcBar(_)
6754             | Op::PhiSrcs(_)
6755             | Op::PhiDsts(_)
6756             | Op::Copy(_)
6757             | Op::Pin(_)
6758             | Op::Unpin(_)
6759             | Op::Swap(_)
6760             | Op::ParCopy(_)
6761             | Op::RegOut(_)
6762             | Op::Annotate(_) => {
6763                 panic!("Not a hardware opcode")
6764             }
6765         }
6766     }
6767 
needs_yield(&self) -> bool6768     pub fn needs_yield(&self) -> bool {
6769         matches!(&self.op, Op::Bar(_) | Op::BSync(_))
6770     }
6771 
fmt_pred(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6772     fn fmt_pred(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6773         if !self.pred.is_true() {
6774             write!(f, "@{} ", self.pred)?;
6775         }
6776         Ok(())
6777     }
6778 }
6779 
6780 impl fmt::Display for Instr {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6781     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6782         write!(f, "{} {}{}", Fmt(|f| self.fmt_pred(f)), self.op, self.deps)
6783     }
6784 }
6785 
6786 impl<T: Into<Op>> From<T> for Instr {
from(value: T) -> Self6787     fn from(value: T) -> Self {
6788         Self::new(value)
6789     }
6790 }
6791 
6792 pub type MappedInstrs = SmallVec<Box<Instr>>;
6793 
6794 pub struct BasicBlock {
6795     pub label: Label,
6796 
6797     /// Whether or not this block is uniform
6798     ///
6799     /// If true, then all non-exited lanes in a warp which execute this block
6800     /// are guaranteed to execute it together
6801     pub uniform: bool,
6802 
6803     pub instrs: Vec<Box<Instr>>,
6804 }
6805 
6806 impl BasicBlock {
map_instrs( &mut self, mut map: impl FnMut(Box<Instr>) -> MappedInstrs, )6807     pub fn map_instrs(
6808         &mut self,
6809         mut map: impl FnMut(Box<Instr>) -> MappedInstrs,
6810     ) {
6811         let mut instrs = Vec::new();
6812         for i in self.instrs.drain(..) {
6813             match map(i) {
6814                 MappedInstrs::None => (),
6815                 MappedInstrs::One(i) => {
6816                     instrs.push(i);
6817                 }
6818                 MappedInstrs::Many(mut v) => {
6819                     instrs.append(&mut v);
6820                 }
6821             }
6822         }
6823         self.instrs = instrs;
6824     }
6825 
phi_dsts_ip(&self) -> Option<usize>6826     pub fn phi_dsts_ip(&self) -> Option<usize> {
6827         for (ip, instr) in self.instrs.iter().enumerate() {
6828             match &instr.op {
6829                 Op::Annotate(_) => (),
6830                 Op::PhiDsts(_) => return Some(ip),
6831                 _ => break,
6832             }
6833         }
6834         None
6835     }
6836 
phi_dsts(&self) -> Option<&OpPhiDsts>6837     pub fn phi_dsts(&self) -> Option<&OpPhiDsts> {
6838         self.phi_dsts_ip().map(|ip| match &self.instrs[ip].op {
6839             Op::PhiDsts(phi) => phi,
6840             _ => panic!("Expected to find the phi"),
6841         })
6842     }
6843 
6844     #[allow(dead_code)]
phi_dsts_mut(&mut self) -> Option<&mut OpPhiDsts>6845     pub fn phi_dsts_mut(&mut self) -> Option<&mut OpPhiDsts> {
6846         self.phi_dsts_ip().map(|ip| match &mut self.instrs[ip].op {
6847             Op::PhiDsts(phi) => phi,
6848             _ => panic!("Expected to find the phi"),
6849         })
6850     }
6851 
phi_srcs_ip(&self) -> Option<usize>6852     pub fn phi_srcs_ip(&self) -> Option<usize> {
6853         for (ip, instr) in self.instrs.iter().enumerate().rev() {
6854             match &instr.op {
6855                 Op::Annotate(_) => (),
6856                 Op::PhiSrcs(_) => return Some(ip),
6857                 _ if instr.is_branch() => (),
6858                 _ => break,
6859             }
6860         }
6861         None
6862     }
phi_srcs(&self) -> Option<&OpPhiSrcs>6863     pub fn phi_srcs(&self) -> Option<&OpPhiSrcs> {
6864         self.phi_srcs_ip().map(|ip| match &self.instrs[ip].op {
6865             Op::PhiSrcs(phi) => phi,
6866             _ => panic!("Expected to find the phi"),
6867         })
6868     }
6869 
phi_srcs_mut(&mut self) -> Option<&mut OpPhiSrcs>6870     pub fn phi_srcs_mut(&mut self) -> Option<&mut OpPhiSrcs> {
6871         self.phi_srcs_ip().map(|ip| match &mut self.instrs[ip].op {
6872             Op::PhiSrcs(phi) => phi,
6873             _ => panic!("Expected to find the phi"),
6874         })
6875     }
6876 
branch(&self) -> Option<&Instr>6877     pub fn branch(&self) -> Option<&Instr> {
6878         if let Some(i) = self.instrs.last() {
6879             if i.is_branch() {
6880                 Some(i)
6881             } else {
6882                 None
6883             }
6884         } else {
6885             None
6886         }
6887     }
6888 
branch_ip(&self) -> Option<usize>6889     pub fn branch_ip(&self) -> Option<usize> {
6890         if let Some(i) = self.instrs.last() {
6891             if i.is_branch() {
6892                 Some(self.instrs.len() - 1)
6893             } else {
6894                 None
6895             }
6896         } else {
6897             None
6898         }
6899     }
6900 
6901     #[allow(dead_code)]
branch_mut(&mut self) -> Option<&mut Instr>6902     pub fn branch_mut(&mut self) -> Option<&mut Instr> {
6903         if let Some(i) = self.instrs.last_mut() {
6904             if i.is_branch() {
6905                 Some(i)
6906             } else {
6907                 None
6908             }
6909         } else {
6910             None
6911         }
6912     }
6913 
falls_through(&self) -> bool6914     pub fn falls_through(&self) -> bool {
6915         if let Some(i) = self.branch() {
6916             !i.pred.is_true()
6917         } else {
6918             true
6919         }
6920     }
6921 }
6922 
6923 pub struct Function {
6924     pub ssa_alloc: SSAValueAllocator,
6925     pub phi_alloc: PhiAllocator,
6926     pub blocks: CFG<BasicBlock>,
6927 }
6928 
6929 impl Function {
map_instrs( &mut self, mut map: impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs, )6930     pub fn map_instrs(
6931         &mut self,
6932         mut map: impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs,
6933     ) {
6934         let alloc = &mut self.ssa_alloc;
6935         for b in &mut self.blocks {
6936             b.map_instrs(|i| map(i, alloc));
6937         }
6938     }
6939 }
6940 
6941 impl fmt::Display for Function {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6942     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6943         let mut pred_width = 0;
6944         let mut dsts_width = 0;
6945         let mut op_width = 0;
6946 
6947         let mut blocks = Vec::new();
6948         for b in &self.blocks {
6949             let mut instrs = Vec::new();
6950             for i in &b.instrs {
6951                 let mut pred = String::new();
6952                 write!(pred, "{}", Fmt(|f| i.fmt_pred(f)))?;
6953                 let mut dsts = String::new();
6954                 write!(dsts, "{}", Fmt(|f| i.op.fmt_dsts(f)))?;
6955                 let mut op = String::new();
6956                 write!(op, "{}", Fmt(|f| i.op.fmt_op(f)))?;
6957                 let mut deps = String::new();
6958                 write!(deps, "{}", i.deps)?;
6959 
6960                 pred_width = max(pred_width, pred.len());
6961                 dsts_width = max(dsts_width, dsts.len());
6962                 op_width = max(op_width, op.len());
6963                 let is_annotation = matches!(i.op, Op::Annotate(_));
6964 
6965                 instrs.push((pred, dsts, op, deps, is_annotation));
6966             }
6967             blocks.push(instrs);
6968         }
6969 
6970         for (i, mut b) in blocks.drain(..).enumerate() {
6971             let u = if self.blocks[i].uniform { ".u" } else { "" };
6972             write!(f, "block{u} {} {} [", i, self.blocks[i].label)?;
6973             for (pi, p) in self.blocks.pred_indices(i).iter().enumerate() {
6974                 if pi > 0 {
6975                     write!(f, ", ")?;
6976                 }
6977                 write!(f, "{}", p)?;
6978             }
6979             write!(f, "] -> {{\n")?;
6980 
6981             for (pred, dsts, op, deps, is_annotation) in b.drain(..) {
6982                 let eq_sym = if dsts.is_empty() { " " } else { "=" };
6983                 if is_annotation {
6984                     write!(f, "\n{}\n", op)?;
6985                 } else if deps.is_empty() {
6986                     write!(
6987                         f,
6988                         "{:<pred_width$} {:<dsts_width$} {} {}\n",
6989                         pred, dsts, eq_sym, op,
6990                     )?;
6991                 } else {
6992                     write!(
6993                         f,
6994                         "{:<pred_width$} {:<dsts_width$} {} \
6995                          {:<op_width$} //{}\n",
6996                         pred, dsts, eq_sym, op, deps,
6997                     )?;
6998                 }
6999             }
7000 
7001             write!(f, "}} -> [")?;
7002             for (si, s) in self.blocks.succ_indices(i).iter().enumerate() {
7003                 if si > 0 {
7004                     write!(f, ", ")?;
7005                 }
7006                 write!(f, "{}", s)?;
7007             }
7008             write!(f, "]\n")?;
7009         }
7010         Ok(())
7011     }
7012 }
7013 
7014 #[derive(Debug)]
7015 pub struct ComputeShaderInfo {
7016     pub local_size: [u16; 3],
7017     pub smem_size: u16,
7018 }
7019 
7020 #[derive(Debug)]
7021 pub struct FragmentShaderInfo {
7022     pub uses_kill: bool,
7023     pub does_interlock: bool,
7024     pub post_depth_coverage: bool,
7025     pub early_fragment_tests: bool,
7026     pub uses_sample_shading: bool,
7027 }
7028 
7029 #[derive(Debug)]
7030 pub struct GeometryShaderInfo {
7031     pub passthrough_enable: bool,
7032     pub stream_out_mask: u8,
7033     pub threads_per_input_primitive: u8,
7034     pub output_topology: OutputTopology,
7035     pub max_output_vertex_count: u16,
7036 }
7037 
7038 impl Default for GeometryShaderInfo {
default() -> Self7039     fn default() -> Self {
7040         Self {
7041             passthrough_enable: false,
7042             stream_out_mask: 0,
7043             threads_per_input_primitive: 0,
7044             output_topology: OutputTopology::LineStrip,
7045             max_output_vertex_count: 0,
7046         }
7047     }
7048 }
7049 
7050 #[derive(Debug)]
7051 pub struct TessellationInitShaderInfo {
7052     pub per_patch_attribute_count: u8,
7053     pub threads_per_patch: u8,
7054 }
7055 
7056 #[repr(u8)]
7057 #[derive(Clone, Copy, Debug)]
7058 pub enum TessellationDomain {
7059     Isoline = NAK_TS_DOMAIN_ISOLINE,
7060     Triangle = NAK_TS_DOMAIN_TRIANGLE,
7061     Quad = NAK_TS_DOMAIN_QUAD,
7062 }
7063 
7064 #[repr(u8)]
7065 #[derive(Clone, Copy, Debug)]
7066 pub enum TessellationSpacing {
7067     Integer = NAK_TS_SPACING_INTEGER,
7068     FractionalOdd = NAK_TS_SPACING_FRACT_ODD,
7069     FractionalEven = NAK_TS_SPACING_FRACT_EVEN,
7070 }
7071 
7072 #[repr(u8)]
7073 #[derive(Clone, Copy, Debug)]
7074 pub enum TessellationPrimitives {
7075     Points = NAK_TS_PRIMS_POINTS,
7076     Lines = NAK_TS_PRIMS_LINES,
7077     TrianglesCW = NAK_TS_PRIMS_TRIANGLES_CW,
7078     TrianglesCCW = NAK_TS_PRIMS_TRIANGLES_CCW,
7079 }
7080 
7081 #[derive(Debug)]
7082 pub struct TessellationShaderInfo {
7083     pub domain: TessellationDomain,
7084     pub spacing: TessellationSpacing,
7085     pub primitives: TessellationPrimitives,
7086 }
7087 
7088 #[derive(Debug)]
7089 pub enum ShaderStageInfo {
7090     Compute(ComputeShaderInfo),
7091     Vertex,
7092     Fragment(FragmentShaderInfo),
7093     Geometry(GeometryShaderInfo),
7094     TessellationInit(TessellationInitShaderInfo),
7095     Tessellation(TessellationShaderInfo),
7096 }
7097 
7098 #[derive(Debug, Default)]
7099 pub struct SysValInfo {
7100     pub ab: u32,
7101     pub c: u16,
7102 }
7103 
7104 #[derive(Debug)]
7105 pub struct VtgIoInfo {
7106     pub sysvals_in: SysValInfo,
7107     pub sysvals_in_d: u8,
7108     pub sysvals_out: SysValInfo,
7109     pub sysvals_out_d: u8,
7110     pub attr_in: [u32; 4],
7111     pub attr_out: [u32; 4],
7112     pub store_req_start: u8,
7113     pub store_req_end: u8,
7114     pub clip_enable: u8,
7115     pub cull_enable: u8,
7116     pub xfb: Option<Box<nak_xfb_info>>,
7117 }
7118 
7119 impl VtgIoInfo {
mark_attrs(&mut self, addrs: Range<u16>, written: bool)7120     fn mark_attrs(&mut self, addrs: Range<u16>, written: bool) {
7121         let sysvals = if written {
7122             &mut self.sysvals_out
7123         } else {
7124             &mut self.sysvals_in
7125         };
7126 
7127         let sysvals_d = if written {
7128             &mut self.sysvals_out_d
7129         } else {
7130             &mut self.sysvals_in_d
7131         };
7132 
7133         let mut attr = BitMutView::new(if written {
7134             &mut self.attr_out
7135         } else {
7136             &mut self.attr_in
7137         });
7138 
7139         let mut addrs = addrs;
7140         addrs.start &= !3;
7141         for addr in addrs.step_by(4) {
7142             if addr < 0x080 {
7143                 sysvals.ab |= 1 << (addr / 4);
7144             } else if addr < 0x280 {
7145                 let attr_idx = (addr - 0x080) as usize / 4;
7146                 attr.set_bit(attr_idx, true);
7147             } else if addr < 0x2c0 {
7148                 panic!("FF color I/O not supported");
7149             } else if addr < 0x300 {
7150                 sysvals.c |= 1 << ((addr - 0x2c0) / 4);
7151             } else if addr >= 0x3a0 && addr < 0x3c0 {
7152                 *sysvals_d |= 1 << ((addr - 0x3a0) / 4);
7153             }
7154         }
7155     }
7156 
mark_attrs_read(&mut self, addrs: Range<u16>)7157     pub fn mark_attrs_read(&mut self, addrs: Range<u16>) {
7158         self.mark_attrs(addrs, false);
7159     }
7160 
mark_attrs_written(&mut self, addrs: Range<u16>)7161     pub fn mark_attrs_written(&mut self, addrs: Range<u16>) {
7162         self.mark_attrs(addrs, true);
7163     }
7164 
attr_written(&self, addr: u16) -> bool7165     pub fn attr_written(&self, addr: u16) -> bool {
7166         if addr < 0x080 {
7167             self.sysvals_out.ab & (1 << (addr / 4)) != 0
7168         } else if addr < 0x280 {
7169             let attr_idx = (addr - 0x080) as usize / 4;
7170             BitView::new(&self.attr_out).get_bit(attr_idx)
7171         } else if addr < 0x2c0 {
7172             panic!("FF color I/O not supported");
7173         } else if addr < 0x300 {
7174             self.sysvals_out.c & (1 << ((addr - 0x2c0) / 4)) != 0
7175         } else if addr >= 0x3a0 && addr < 0x3c0 {
7176             self.sysvals_out_d & (1 << ((addr - 0x3a0) / 4)) != 0
7177         } else {
7178             panic!("Unknown I/O address");
7179         }
7180     }
7181 
mark_store_req(&mut self, addrs: Range<u16>)7182     pub fn mark_store_req(&mut self, addrs: Range<u16>) {
7183         let start = (addrs.start / 4).try_into().unwrap();
7184         let end = ((addrs.end - 1) / 4).try_into().unwrap();
7185         self.store_req_start = min(self.store_req_start, start);
7186         self.store_req_end = max(self.store_req_end, end);
7187     }
7188 }
7189 
7190 #[derive(Debug)]
7191 pub struct FragmentIoInfo {
7192     pub sysvals_in: SysValInfo,
7193     pub sysvals_in_d: [PixelImap; 8],
7194     pub attr_in: [PixelImap; 128],
7195     pub barycentric_attr_in: [u32; 4],
7196 
7197     pub reads_sample_mask: bool,
7198     pub writes_color: u32,
7199     pub writes_sample_mask: bool,
7200     pub writes_depth: bool,
7201 }
7202 
7203 impl FragmentIoInfo {
mark_attr_read(&mut self, addr: u16, interp: PixelImap)7204     pub fn mark_attr_read(&mut self, addr: u16, interp: PixelImap) {
7205         if addr < 0x080 {
7206             self.sysvals_in.ab |= 1 << (addr / 4);
7207         } else if addr < 0x280 {
7208             let attr_idx = (addr - 0x080) as usize / 4;
7209             self.attr_in[attr_idx] = interp;
7210         } else if addr < 0x2c0 {
7211             panic!("FF color I/O not supported");
7212         } else if addr < 0x300 {
7213             self.sysvals_in.c |= 1 << ((addr - 0x2c0) / 4);
7214         } else if addr >= 0x3a0 && addr < 0x3c0 {
7215             let attr_idx = (addr - 0x3a0) as usize / 4;
7216             self.sysvals_in_d[attr_idx] = interp;
7217         }
7218     }
7219 
mark_barycentric_attr_in(&mut self, addr: u16)7220     pub fn mark_barycentric_attr_in(&mut self, addr: u16) {
7221         assert!(addr >= 0x80 && addr < 0x280);
7222 
7223         let mut attr = BitMutView::new(&mut self.barycentric_attr_in);
7224 
7225         let attr_idx = (addr - 0x080) as usize / 4;
7226         attr.set_bit(attr_idx, true);
7227     }
7228 }
7229 
7230 #[derive(Debug)]
7231 pub enum ShaderIoInfo {
7232     None,
7233     Vtg(VtgIoInfo),
7234     Fragment(FragmentIoInfo),
7235 }
7236 
7237 #[derive(Debug)]
7238 pub struct ShaderInfo {
7239     pub num_gprs: u8,
7240     pub num_control_barriers: u8,
7241     pub num_instrs: u32,
7242     pub slm_size: u32,
7243     pub max_crs_depth: u32,
7244     pub uses_global_mem: bool,
7245     pub writes_global_mem: bool,
7246     pub uses_fp64: bool,
7247     pub stage: ShaderStageInfo,
7248     pub io: ShaderIoInfo,
7249 }
7250 
7251 pub trait ShaderModel {
sm(&self) -> u87252     fn sm(&self) -> u8;
num_regs(&self, file: RegFile) -> u327253     fn num_regs(&self, file: RegFile) -> u32;
hw_reserved_gprs(&self) -> u327254     fn hw_reserved_gprs(&self) -> u32;
crs_size(&self, max_crs_depth: u32) -> u327255     fn crs_size(&self, max_crs_depth: u32) -> u32;
7256 
op_can_be_uniform(&self, op: &Op) -> bool7257     fn op_can_be_uniform(&self, op: &Op) -> bool;
7258 
legalize_op(&self, b: &mut LegalizeBuilder, op: &mut Op)7259     fn legalize_op(&self, b: &mut LegalizeBuilder, op: &mut Op);
encode_shader(&self, s: &Shader<'_>) -> Vec<u32>7260     fn encode_shader(&self, s: &Shader<'_>) -> Vec<u32>;
7261 }
7262 
7263 /// For compute shaders, large values of local_size impose an additional limit
7264 /// on the number of GPRs per thread
gpr_limit_from_local_size(local_size: &[u16; 3]) -> u327265 pub fn gpr_limit_from_local_size(local_size: &[u16; 3]) -> u32 {
prev_multiple_of(x: u32, y: u32) -> u327266     fn prev_multiple_of(x: u32, y: u32) -> u32 {
7267         (x / y) * y
7268     }
7269 
7270     let local_size = local_size[0] * local_size[1] * local_size[2];
7271     // Warps are allocated in multiples of 4
7272     // Multiply that by 32 threads/warp
7273     let local_size = local_size.next_multiple_of(4 * 32) as u32;
7274     let total_regs: u32 = 65536;
7275 
7276     let out = total_regs / local_size;
7277     // GPRs are allocated in multiples of 8
7278     let out = prev_multiple_of(out, 8);
7279     min(out, 255)
7280 }
7281 
7282 pub struct Shader<'a> {
7283     pub sm: &'a dyn ShaderModel,
7284     pub info: ShaderInfo,
7285     pub functions: Vec<Function>,
7286 }
7287 
7288 impl Shader<'_> {
for_each_instr(&self, f: &mut impl FnMut(&Instr))7289     pub fn for_each_instr(&self, f: &mut impl FnMut(&Instr)) {
7290         for func in &self.functions {
7291             for b in &func.blocks {
7292                 for i in &b.instrs {
7293                     f(i);
7294                 }
7295             }
7296         }
7297     }
7298 
map_instrs( &mut self, mut map: impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs, )7299     pub fn map_instrs(
7300         &mut self,
7301         mut map: impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs,
7302     ) {
7303         for f in &mut self.functions {
7304             f.map_instrs(&mut map);
7305         }
7306     }
7307 
7308     /// Remove all annotations, presumably before encoding the shader.
remove_annotations(&mut self)7309     pub fn remove_annotations(&mut self) {
7310         self.map_instrs(|instr: Box<Instr>, _| -> MappedInstrs {
7311             if matches!(instr.op, Op::Annotate(_)) {
7312                 MappedInstrs::None
7313             } else {
7314                 MappedInstrs::One(instr)
7315             }
7316         })
7317     }
7318 
gather_info(&mut self)7319     pub fn gather_info(&mut self) {
7320         let mut num_instrs = 0;
7321         let mut uses_global_mem = false;
7322         let mut writes_global_mem = false;
7323 
7324         self.for_each_instr(&mut |instr| {
7325             num_instrs += 1;
7326 
7327             if !uses_global_mem {
7328                 uses_global_mem = instr.uses_global_mem();
7329             }
7330 
7331             if !writes_global_mem {
7332                 writes_global_mem = instr.writes_global_mem();
7333             }
7334         });
7335 
7336         self.info.num_instrs = num_instrs;
7337         self.info.uses_global_mem = uses_global_mem;
7338         self.info.writes_global_mem = writes_global_mem;
7339     }
7340 }
7341 
7342 impl fmt::Display for Shader<'_> {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result7343     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
7344         for func in &self.functions {
7345             write!(f, "{}", func)?;
7346         }
7347         Ok(())
7348     }
7349 }
7350