• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright © 2022 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3 
4 extern crate bitview;
5 extern crate nak_ir_proc;
6 
7 use bitview::BitMutView;
8 
9 use crate::api::{GetDebugFlags, DEBUG};
10 pub use crate::builder::{Builder, InstrBuilder, SSABuilder, SSAInstrBuilder};
11 use crate::cfg::CFG;
12 use crate::sph::{OutputTopology, PixelImap};
13 use nak_ir_proc::*;
14 use std::cmp::{max, min};
15 use std::fmt;
16 use std::fmt::Write;
17 use std::iter::Zip;
18 use std::ops::{BitAnd, BitOr, Deref, DerefMut, Index, IndexMut, Not, Range};
19 use std::slice;
20 
21 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
22 pub struct Label {
23     idx: u32,
24 }
25 
26 impl fmt::Display for Label {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result27     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
28         write!(f, "L{}", self.idx)
29     }
30 }
31 
32 pub struct LabelAllocator {
33     count: u32,
34 }
35 
36 impl LabelAllocator {
new() -> LabelAllocator37     pub fn new() -> LabelAllocator {
38         LabelAllocator { count: 0 }
39     }
40 
alloc(&mut self) -> Label41     pub fn alloc(&mut self) -> Label {
42         let idx = self.count;
43         self.count += 1;
44         Label { idx: idx }
45     }
46 }
47 
48 /// Represents a register file
49 #[repr(u8)]
50 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
51 pub enum RegFile {
52     /// The general-purpose register file
53     ///
54     /// General-purpose registers are 32 bits per SIMT channel.
55     GPR = 0,
56 
57     /// The general-purpose uniform register file
58     ///
59     /// General-purpose uniform registers are 32 bits each and uniform across a
60     /// wave.
61     UGPR = 1,
62 
63     /// The predicate reigster file
64     ///
65     /// Predicate registers are 1 bit per SIMT channel.
66     Pred = 2,
67 
68     /// The uniform predicate reigster file
69     ///
70     /// Uniform predicate registers are 1 bit and uniform across a wave.
71     UPred = 3,
72 
73     /// The carry flag register file
74     ///
75     /// Only one carry flag register exists in hardware, but representing it as
76     /// a reg file simplifies dependency tracking.
77     ///
78     /// This is used only on SM50.
79     Carry = 4,
80 
81     /// The barrier register file
82     ///
83     /// This is a lane mask used for wave re-convergence instructions.
84     Bar = 5,
85 
86     /// The memory register file
87     ///
88     /// This is a virtual register file for things which will get spilled to
89     /// local memory.  Each memory location is 32 bits per SIMT channel.
90     Mem = 6,
91 }
92 
93 const NUM_REG_FILES: usize = 7;
94 
95 impl RegFile {
96     /// Returns true if the register file is uniform across a wave
is_uniform(&self) -> bool97     pub fn is_uniform(&self) -> bool {
98         match self {
99             RegFile::GPR
100             | RegFile::Pred
101             | RegFile::Carry
102             | RegFile::Bar
103             | RegFile::Mem => false,
104             RegFile::UGPR | RegFile::UPred => true,
105         }
106     }
107 
108     /// Returns true if the register file is general-purpose
is_gpr(&self) -> bool109     pub fn is_gpr(&self) -> bool {
110         match self {
111             RegFile::GPR | RegFile::UGPR => true,
112             RegFile::Pred
113             | RegFile::UPred
114             | RegFile::Carry
115             | RegFile::Bar
116             | RegFile::Mem => false,
117         }
118     }
119 
120     /// Returns true if the register file is a predicate register file
is_predicate(&self) -> bool121     pub fn is_predicate(&self) -> bool {
122         match self {
123             RegFile::GPR
124             | RegFile::UGPR
125             | RegFile::Carry
126             | RegFile::Bar
127             | RegFile::Mem => false,
128             RegFile::Pred | RegFile::UPred => true,
129         }
130     }
131 
num_regs(&self, sm: u8) -> u32132     pub fn num_regs(&self, sm: u8) -> u32 {
133         match self {
134             RegFile::GPR => {
135                 if DEBUG.spill() {
136                     // We need at least 16 registers to satisfy RA constraints
137                     // for texture ops and another 2 for parallel copy lowering
138                     18
139                 } else if sm >= 70 {
140                     // Volta+ has a maximum of 253 registers.  Presumably
141                     // because two registers get burned for UGPRs? Unclear
142                     // on why we need it on Volta though.
143                     253
144                 } else {
145                     255
146                 }
147             }
148             RegFile::UGPR => {
149                 if sm >= 75 {
150                     63
151                 } else {
152                     0
153                 }
154             }
155             RegFile::Pred => 7,
156             RegFile::UPred => {
157                 if sm >= 75 {
158                     7
159                 } else {
160                     0
161                 }
162             }
163             RegFile::Carry => {
164                 if sm >= 70 {
165                     0
166                 } else {
167                     1
168                 }
169             }
170             RegFile::Bar => {
171                 if sm >= 70 {
172                     16
173                 } else {
174                     0
175                 }
176             }
177             RegFile::Mem => 1 << 24,
178         }
179     }
180 
fmt_prefix(&self) -> &'static str181     fn fmt_prefix(&self) -> &'static str {
182         match self {
183             RegFile::GPR => "r",
184             RegFile::UGPR => "ur",
185             RegFile::Pred => "p",
186             RegFile::UPred => "up",
187             RegFile::Carry => "c",
188             RegFile::Bar => "b",
189             RegFile::Mem => "m",
190         }
191     }
192 }
193 
194 impl fmt::Display for RegFile {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result195     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
196         match self {
197             RegFile::GPR => write!(f, "GPR"),
198             RegFile::UGPR => write!(f, "UGPR"),
199             RegFile::Pred => write!(f, "Pred"),
200             RegFile::UPred => write!(f, "UPred"),
201             RegFile::Carry => write!(f, "Carry"),
202             RegFile::Bar => write!(f, "Bar"),
203             RegFile::Mem => write!(f, "Mem"),
204         }
205     }
206 }
207 
208 impl From<RegFile> for u8 {
from(value: RegFile) -> u8209     fn from(value: RegFile) -> u8 {
210         value as u8
211     }
212 }
213 
214 impl TryFrom<u32> for RegFile {
215     type Error = &'static str;
216 
try_from(value: u32) -> Result<Self, Self::Error>217     fn try_from(value: u32) -> Result<Self, Self::Error> {
218         match value {
219             0 => Ok(RegFile::GPR),
220             1 => Ok(RegFile::UGPR),
221             2 => Ok(RegFile::Pred),
222             3 => Ok(RegFile::UPred),
223             4 => Ok(RegFile::Carry),
224             5 => Ok(RegFile::Bar),
225             6 => Ok(RegFile::Mem),
226             _ => Err("Invalid register file number"),
227         }
228     }
229 }
230 
231 impl TryFrom<u16> for RegFile {
232     type Error = &'static str;
233 
try_from(value: u16) -> Result<Self, Self::Error>234     fn try_from(value: u16) -> Result<Self, Self::Error> {
235         RegFile::try_from(u32::from(value))
236     }
237 }
238 
239 impl TryFrom<u8> for RegFile {
240     type Error = &'static str;
241 
try_from(value: u8) -> Result<Self, Self::Error>242     fn try_from(value: u8) -> Result<Self, Self::Error> {
243         RegFile::try_from(u32::from(value))
244     }
245 }
246 
247 /// A trait for things which have an associated register file
248 pub trait HasRegFile {
file(&self) -> RegFile249     fn file(&self) -> RegFile;
250 
is_uniform(&self) -> bool251     fn is_uniform(&self) -> bool {
252         self.file().is_uniform()
253     }
254 
is_gpr(&self) -> bool255     fn is_gpr(&self) -> bool {
256         self.file().is_gpr()
257     }
258 
is_predicate(&self) -> bool259     fn is_predicate(&self) -> bool {
260         self.file().is_predicate()
261     }
262 }
263 
264 #[derive(Clone)]
265 pub struct RegFileSet {
266     bits: u8,
267 }
268 
269 impl RegFileSet {
new() -> RegFileSet270     pub fn new() -> RegFileSet {
271         RegFileSet { bits: 0 }
272     }
273 
len(&self) -> usize274     pub fn len(&self) -> usize {
275         self.bits.count_ones() as usize
276     }
277 
contains(&self, file: RegFile) -> bool278     pub fn contains(&self, file: RegFile) -> bool {
279         self.bits & (1 << (file as u8)) != 0
280     }
281 
insert(&mut self, file: RegFile) -> bool282     pub fn insert(&mut self, file: RegFile) -> bool {
283         let has_file = self.contains(file);
284         self.bits |= 1 << (file as u8);
285         !has_file
286     }
287 
is_empty(&self) -> bool288     pub fn is_empty(&self) -> bool {
289         self.bits == 0
290     }
291 
292     #[allow(dead_code)]
iter(&self) -> RegFileSet293     pub fn iter(&self) -> RegFileSet {
294         self.clone()
295     }
296 
remove(&mut self, file: RegFile) -> bool297     pub fn remove(&mut self, file: RegFile) -> bool {
298         let has_file = self.contains(file);
299         self.bits &= !(1 << (file as u8));
300         has_file
301     }
302 }
303 
304 impl FromIterator<RegFile> for RegFileSet {
from_iter<T: IntoIterator<Item = RegFile>>(iter: T) -> Self305     fn from_iter<T: IntoIterator<Item = RegFile>>(iter: T) -> Self {
306         let mut set = RegFileSet::new();
307         for file in iter {
308             set.insert(file);
309         }
310         set
311     }
312 }
313 
314 impl Iterator for RegFileSet {
315     type Item = RegFile;
316 
next(&mut self) -> Option<RegFile>317     fn next(&mut self) -> Option<RegFile> {
318         if self.is_empty() {
319             None
320         } else {
321             let file = self.bits.trailing_zeros().try_into().unwrap();
322             self.remove(file);
323             Some(file)
324         }
325     }
326 
size_hint(&self) -> (usize, Option<usize>)327     fn size_hint(&self) -> (usize, Option<usize>) {
328         let len = self.len();
329         (len, Some(len))
330     }
331 }
332 
333 #[derive(Clone, Copy)]
334 pub struct PerRegFile<T> {
335     per_file: [T; NUM_REG_FILES],
336 }
337 
338 impl<T> PerRegFile<T> {
new_with<F: Fn(RegFile) -> T>(f: F) -> Self339     pub fn new_with<F: Fn(RegFile) -> T>(f: F) -> Self {
340         PerRegFile {
341             per_file: [
342                 f(RegFile::GPR),
343                 f(RegFile::UGPR),
344                 f(RegFile::Pred),
345                 f(RegFile::UPred),
346                 f(RegFile::Carry),
347                 f(RegFile::Bar),
348                 f(RegFile::Mem),
349             ],
350         }
351     }
352 
353     #[allow(dead_code)]
values(&self) -> slice::Iter<T>354     pub fn values(&self) -> slice::Iter<T> {
355         self.per_file.iter()
356     }
357 
358     #[allow(dead_code)]
values_mut(&mut self) -> slice::IterMut<T>359     pub fn values_mut(&mut self) -> slice::IterMut<T> {
360         self.per_file.iter_mut()
361     }
362 }
363 
364 impl<T: Default> Default for PerRegFile<T> {
default() -> Self365     fn default() -> Self {
366         PerRegFile {
367             per_file: Default::default(),
368         }
369     }
370 }
371 
372 impl<T> Index<RegFile> for PerRegFile<T> {
373     type Output = T;
374 
index(&self, idx: RegFile) -> &T375     fn index(&self, idx: RegFile) -> &T {
376         &self.per_file[idx as u8 as usize]
377     }
378 }
379 
380 impl<T> IndexMut<RegFile> for PerRegFile<T> {
index_mut(&mut self, idx: RegFile) -> &mut T381     fn index_mut(&mut self, idx: RegFile) -> &mut T {
382         &mut self.per_file[idx as u8 as usize]
383     }
384 }
385 
386 /// An SSA value
387 ///
388 /// Each SSA in NAK represents a single 32-bit or 1-bit (if a predicate) value
389 /// which must either be spilled to memory or allocated space in the specified
390 /// register file.  Whenever more data is required such as a 64-bit memory
391 /// address, double-precision float, or a vec4 texture result, multiple SSA
392 /// values are used.
393 ///
394 /// Each SSA value logically contains two things: an index and a register file.
395 /// It is required that each index refers to a unique SSA value, regardless of
396 /// register file.  This way the index can be used to index tightly-packed data
397 /// structures such as bitsets without having to determine separate ranges for
398 /// each register file.
399 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
400 pub struct SSAValue {
401     packed: u32,
402 }
403 
404 impl SSAValue {
405     /// A special SSA value which is always invalid
406     pub const NONE: Self = SSAValue { packed: 0 };
407 
408     /// Returns an SSA value with the given register file and index
new(file: RegFile, idx: u32) -> SSAValue409     pub fn new(file: RegFile, idx: u32) -> SSAValue {
410         assert!(idx > 0 && idx < (1 << 29) - 2);
411         let mut packed = idx;
412         assert!(u8::from(file) < 8);
413         packed |= u32::from(u8::from(file)) << 29;
414         SSAValue { packed: packed }
415     }
416 
417     /// Returns the index of this SSA value
idx(&self) -> u32418     pub fn idx(&self) -> u32 {
419         self.packed & 0x1fffffff
420     }
421 
422     /// Returns true if this SSA value is equal to SSAValue::NONE
423     #[allow(dead_code)]
is_none(&self) -> bool424     pub fn is_none(&self) -> bool {
425         self.packed == 0
426     }
427 
fmt_plain(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result428     fn fmt_plain(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
429         write!(f, "{}{}", self.file().fmt_prefix(), self.idx())
430     }
431 }
432 
433 impl HasRegFile for SSAValue {
434     /// Returns the register file of this SSA value
file(&self) -> RegFile435     fn file(&self) -> RegFile {
436         RegFile::try_from(self.packed >> 29).unwrap()
437     }
438 }
439 
440 impl fmt::Display for SSAValue {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result441     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
442         write!(f, "%")?;
443         self.fmt_plain(f)
444     }
445 }
446 
447 /// A reference to one or more SSA values
448 ///
449 /// Because each SSA value represents a single 1 or 32-bit scalar, we need a way
450 /// to reference multiple SSA values for instructions which read or write
451 /// multiple registers in the same source.  When the register allocator runs,
452 /// all the SSA values in a given SSA ref will be placed in consecutive
453 /// registers, with the base register aligned to the number of values, aligned
454 /// to the next power of two.
455 ///
456 /// An SSA reference can reference between 1 and 4 SSA values.  It dereferences
457 /// to a slice for easy access to individual SSA values.  The structure is
458 /// designed so that is always 16B, regardless of how many SSA values are
459 /// referenced so it's easy and fairly cheap to copy around and embed in other
460 /// structures.
461 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
462 pub struct SSARef {
463     v: [SSAValue; 4],
464 }
465 
466 impl SSARef {
467     /// Returns a new SSA reference
468     #[inline]
new(comps: &[SSAValue]) -> SSARef469     fn new(comps: &[SSAValue]) -> SSARef {
470         assert!(comps.len() > 0 && comps.len() <= 4);
471         let mut r = SSARef {
472             v: [SSAValue::NONE; 4],
473         };
474         for i in 0..comps.len() {
475             r.v[i] = comps[i];
476         }
477         if comps.len() < 4 {
478             r.v[3].packed = (comps.len() as u32).wrapping_neg();
479         }
480         r
481     }
482 
483     /// Returns the number of components in this SSA reference
comps(&self) -> u8484     pub fn comps(&self) -> u8 {
485         if self.v[3].packed >= u32::MAX - 2 {
486             self.v[3].packed.wrapping_neg() as u8
487         } else {
488             4
489         }
490     }
491 }
492 
493 impl HasRegFile for SSARef {
file(&self) -> RegFile494     fn file(&self) -> RegFile {
495         let comps = usize::from(self.comps());
496         for i in 1..comps {
497             assert!(self.v[i].file() == self.v[0].file());
498         }
499         self.v[0].file()
500     }
501 }
502 
503 impl Deref for SSARef {
504     type Target = [SSAValue];
505 
deref(&self) -> &[SSAValue]506     fn deref(&self) -> &[SSAValue] {
507         let comps = usize::from(self.comps());
508         &self.v[..comps]
509     }
510 }
511 
512 impl DerefMut for SSARef {
deref_mut(&mut self) -> &mut [SSAValue]513     fn deref_mut(&mut self) -> &mut [SSAValue] {
514         let comps = usize::from(self.comps());
515         &mut self.v[..comps]
516     }
517 }
518 
519 impl TryFrom<&[SSAValue]> for SSARef {
520     type Error = &'static str;
521 
try_from(comps: &[SSAValue]) -> Result<Self, Self::Error>522     fn try_from(comps: &[SSAValue]) -> Result<Self, Self::Error> {
523         if comps.len() == 0 {
524             Err("Empty vector")
525         } else if comps.len() > 4 {
526             Err("Too many vector components")
527         } else {
528             Ok(SSARef::new(comps))
529         }
530     }
531 }
532 
533 impl TryFrom<Vec<SSAValue>> for SSARef {
534     type Error = &'static str;
535 
try_from(comps: Vec<SSAValue>) -> Result<Self, Self::Error>536     fn try_from(comps: Vec<SSAValue>) -> Result<Self, Self::Error> {
537         SSARef::try_from(&comps[..])
538     }
539 }
540 
541 macro_rules! impl_ssa_ref_from_arr {
542     ($n: expr) => {
543         impl From<[SSAValue; $n]> for SSARef {
544             fn from(comps: [SSAValue; $n]) -> Self {
545                 SSARef::new(&comps[..])
546             }
547         }
548     };
549 }
550 impl_ssa_ref_from_arr!(1);
551 impl_ssa_ref_from_arr!(2);
552 impl_ssa_ref_from_arr!(3);
553 impl_ssa_ref_from_arr!(4);
554 
555 impl From<SSAValue> for SSARef {
from(val: SSAValue) -> Self556     fn from(val: SSAValue) -> Self {
557         [val].into()
558     }
559 }
560 
561 impl fmt::Display for SSARef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result562     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
563         if self.comps() == 1 {
564             write!(f, "{}", self[0])
565         } else {
566             write!(f, "{{")?;
567             for (i, v) in self.iter().enumerate() {
568                 if i != 0 {
569                     write!(f, " ")?;
570                 }
571                 write!(f, "{}", v)?;
572             }
573             write!(f, "}}")
574         }
575     }
576 }
577 
578 pub struct SSAValueAllocator {
579     count: u32,
580 }
581 
582 impl SSAValueAllocator {
new() -> SSAValueAllocator583     pub fn new() -> SSAValueAllocator {
584         SSAValueAllocator { count: 0 }
585     }
586 
max_idx(&self) -> u32587     pub fn max_idx(&self) -> u32 {
588         self.count
589     }
590 
alloc(&mut self, file: RegFile) -> SSAValue591     pub fn alloc(&mut self, file: RegFile) -> SSAValue {
592         self.count += 1;
593         SSAValue::new(file, self.count)
594     }
595 
alloc_vec(&mut self, file: RegFile, comps: u8) -> SSARef596     pub fn alloc_vec(&mut self, file: RegFile, comps: u8) -> SSARef {
597         assert!(comps >= 1 && comps <= 4);
598         let mut vec = [SSAValue::NONE; 4];
599         for c in 0..comps {
600             vec[usize::from(c)] = self.alloc(file);
601         }
602         vec[0..usize::from(comps)].try_into().unwrap()
603     }
604 }
605 
606 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
607 pub struct RegRef {
608     packed: u32,
609 }
610 
611 impl RegRef {
zero_idx(file: RegFile) -> u32612     fn zero_idx(file: RegFile) -> u32 {
613         match file {
614             RegFile::GPR => 255,
615             RegFile::UGPR => 63,
616             RegFile::Pred => 7,
617             RegFile::UPred => 7,
618             RegFile::Carry => panic!("Carry has no zero index"),
619             RegFile::Bar => panic!("Bar has no zero index"),
620             RegFile::Mem => panic!("Mem has no zero index"),
621         }
622     }
623 
new(file: RegFile, base_idx: u32, comps: u8) -> RegRef624     pub fn new(file: RegFile, base_idx: u32, comps: u8) -> RegRef {
625         assert!(base_idx < (1 << 26));
626         let mut packed = base_idx;
627         assert!(comps > 0 && comps <= 8);
628         packed |= u32::from(comps - 1) << 26;
629         assert!(u8::from(file) < 8);
630         packed |= u32::from(u8::from(file)) << 29;
631         RegRef { packed: packed }
632     }
633 
zero(file: RegFile, comps: u8) -> RegRef634     pub fn zero(file: RegFile, comps: u8) -> RegRef {
635         RegRef::new(file, RegRef::zero_idx(file), comps)
636     }
637 
base_idx(&self) -> u32638     pub fn base_idx(&self) -> u32 {
639         self.packed & 0x03ffffff
640     }
641 
idx_range(&self) -> Range<u32>642     pub fn idx_range(&self) -> Range<u32> {
643         let start = self.base_idx();
644         let end = start + u32::from(self.comps());
645         start..end
646     }
647 
comps(&self) -> u8648     pub fn comps(&self) -> u8 {
649         (((self.packed >> 26) & 0x7) + 1).try_into().unwrap()
650     }
651 
comp(&self, c: u8) -> RegRef652     pub fn comp(&self, c: u8) -> RegRef {
653         assert!(c < self.comps());
654         RegRef::new(self.file(), self.base_idx() + u32::from(c), 1)
655     }
656 }
657 
658 impl HasRegFile for RegRef {
file(&self) -> RegFile659     fn file(&self) -> RegFile {
660         ((self.packed >> 29) & 0x7).try_into().unwrap()
661     }
662 }
663 
664 impl fmt::Display for RegRef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result665     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
666         write!(f, "{}{}", self.file().fmt_prefix(), self.base_idx())?;
667         if self.comps() > 1 {
668             write!(f, "..{}", self.idx_range().end)?;
669         }
670         Ok(())
671     }
672 }
673 
674 #[derive(Clone, Copy)]
675 pub enum Dst {
676     None,
677     SSA(SSARef),
678     Reg(RegRef),
679 }
680 
681 impl Dst {
is_none(&self) -> bool682     pub fn is_none(&self) -> bool {
683         matches!(self, Dst::None)
684     }
685 
as_reg(&self) -> Option<&RegRef>686     pub fn as_reg(&self) -> Option<&RegRef> {
687         match self {
688             Dst::Reg(r) => Some(r),
689             _ => None,
690         }
691     }
692 
as_ssa(&self) -> Option<&SSARef>693     pub fn as_ssa(&self) -> Option<&SSARef> {
694         match self {
695             Dst::SSA(r) => Some(r),
696             _ => None,
697         }
698     }
699 
iter_ssa(&self) -> slice::Iter<'_, SSAValue>700     pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
701         match self {
702             Dst::None | Dst::Reg(_) => &[],
703             Dst::SSA(ssa) => ssa.deref(),
704         }
705         .iter()
706     }
707 
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>708     pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
709         match self {
710             Dst::None | Dst::Reg(_) => &mut [],
711             Dst::SSA(ssa) => ssa.deref_mut(),
712         }
713         .iter_mut()
714     }
715 }
716 
717 impl From<RegRef> for Dst {
from(reg: RegRef) -> Dst718     fn from(reg: RegRef) -> Dst {
719         Dst::Reg(reg)
720     }
721 }
722 
723 impl<T: Into<SSARef>> From<T> for Dst {
from(ssa: T) -> Dst724     fn from(ssa: T) -> Dst {
725         Dst::SSA(ssa.into())
726     }
727 }
728 
729 impl fmt::Display for Dst {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result730     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
731         match self {
732             Dst::None => write!(f, "null")?,
733             Dst::SSA(v) => v.fmt(f)?,
734             Dst::Reg(r) => r.fmt(f)?,
735         }
736         Ok(())
737     }
738 }
739 
740 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
741 pub enum CBuf {
742     Binding(u8),
743 
744     #[allow(dead_code)]
745     BindlessSSA(SSAValue),
746 
747     #[allow(dead_code)]
748     BindlessGPR(RegRef),
749 }
750 
751 impl fmt::Display for CBuf {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result752     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
753         match self {
754             CBuf::Binding(idx) => write!(f, "c[{:#x}]", idx),
755             CBuf::BindlessSSA(v) => write!(f, "cx[{}]", v),
756             CBuf::BindlessGPR(r) => write!(f, "cx[{}]", r),
757         }
758     }
759 }
760 
761 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
762 pub struct CBufRef {
763     pub buf: CBuf,
764     pub offset: u16,
765 }
766 
767 impl CBufRef {
offset(self, offset: u16) -> CBufRef768     pub fn offset(self, offset: u16) -> CBufRef {
769         CBufRef {
770             buf: self.buf,
771             offset: self.offset + offset,
772         }
773     }
774 }
775 
776 impl fmt::Display for CBufRef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result777     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
778         write!(f, "{}[{:#x}]", self.buf, self.offset)
779     }
780 }
781 
782 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
783 pub enum SrcRef {
784     Zero,
785     True,
786     False,
787     Imm32(u32),
788     CBuf(CBufRef),
789     SSA(SSARef),
790     Reg(RegRef),
791 }
792 
793 impl SrcRef {
is_alu(&self) -> bool794     pub fn is_alu(&self) -> bool {
795         match self {
796             SrcRef::Zero | SrcRef::Imm32(_) | SrcRef::CBuf(_) => true,
797             SrcRef::SSA(ssa) => ssa.is_gpr(),
798             SrcRef::Reg(reg) => reg.is_gpr(),
799             SrcRef::True | SrcRef::False => false,
800         }
801     }
802 
is_predicate(&self) -> bool803     pub fn is_predicate(&self) -> bool {
804         match self {
805             SrcRef::Zero | SrcRef::Imm32(_) | SrcRef::CBuf(_) => false,
806             SrcRef::True | SrcRef::False => true,
807             SrcRef::SSA(ssa) => ssa.is_predicate(),
808             SrcRef::Reg(reg) => reg.is_predicate(),
809         }
810     }
811 
is_barrier(&self) -> bool812     pub fn is_barrier(&self) -> bool {
813         match self {
814             SrcRef::SSA(ssa) => ssa.file() == RegFile::Bar,
815             SrcRef::Reg(reg) => reg.file() == RegFile::Bar,
816             _ => false,
817         }
818     }
819 
as_reg(&self) -> Option<&RegRef>820     pub fn as_reg(&self) -> Option<&RegRef> {
821         match self {
822             SrcRef::Reg(r) => Some(r),
823             _ => None,
824         }
825     }
826 
as_ssa(&self) -> Option<&SSARef>827     pub fn as_ssa(&self) -> Option<&SSARef> {
828         match self {
829             SrcRef::SSA(r) => Some(r),
830             _ => None,
831         }
832     }
833 
get_reg(&self) -> Option<&RegRef>834     pub fn get_reg(&self) -> Option<&RegRef> {
835         match self {
836             SrcRef::Zero
837             | SrcRef::True
838             | SrcRef::False
839             | SrcRef::Imm32(_)
840             | SrcRef::SSA(_) => None,
841             SrcRef::CBuf(cb) => match &cb.buf {
842                 CBuf::Binding(_) | CBuf::BindlessSSA(_) => None,
843                 CBuf::BindlessGPR(reg) => Some(reg),
844             },
845             SrcRef::Reg(reg) => Some(reg),
846         }
847     }
848 
iter_ssa(&self) -> slice::Iter<'_, SSAValue>849     pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
850         match self {
851             SrcRef::Zero
852             | SrcRef::True
853             | SrcRef::False
854             | SrcRef::Imm32(_)
855             | SrcRef::Reg(_) => &[],
856             SrcRef::CBuf(cb) => match &cb.buf {
857                 CBuf::Binding(_) | CBuf::BindlessGPR(_) => &[],
858                 CBuf::BindlessSSA(ssa) => slice::from_ref(ssa),
859             },
860             SrcRef::SSA(ssa) => ssa.deref(),
861         }
862         .iter()
863     }
864 
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>865     pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
866         match self {
867             SrcRef::Zero
868             | SrcRef::True
869             | SrcRef::False
870             | SrcRef::Imm32(_)
871             | SrcRef::Reg(_) => &mut [],
872             SrcRef::CBuf(cb) => match &mut cb.buf {
873                 CBuf::Binding(_) | CBuf::BindlessGPR(_) => &mut [],
874                 CBuf::BindlessSSA(ssa) => slice::from_mut(ssa),
875             },
876             SrcRef::SSA(ssa) => ssa.deref_mut(),
877         }
878         .iter_mut()
879     }
880 }
881 
882 impl From<bool> for SrcRef {
from(b: bool) -> SrcRef883     fn from(b: bool) -> SrcRef {
884         if b {
885             SrcRef::True
886         } else {
887             SrcRef::False
888         }
889     }
890 }
891 
892 impl From<u32> for SrcRef {
from(u: u32) -> SrcRef893     fn from(u: u32) -> SrcRef {
894         if u == 0 {
895             SrcRef::Zero
896         } else {
897             SrcRef::Imm32(u)
898         }
899     }
900 }
901 
902 impl From<f32> for SrcRef {
from(f: f32) -> SrcRef903     fn from(f: f32) -> SrcRef {
904         f.to_bits().into()
905     }
906 }
907 
908 impl From<CBufRef> for SrcRef {
from(cb: CBufRef) -> SrcRef909     fn from(cb: CBufRef) -> SrcRef {
910         SrcRef::CBuf(cb)
911     }
912 }
913 
914 impl From<RegRef> for SrcRef {
from(reg: RegRef) -> SrcRef915     fn from(reg: RegRef) -> SrcRef {
916         SrcRef::Reg(reg)
917     }
918 }
919 
920 impl<T: Into<SSARef>> From<T> for SrcRef {
from(ssa: T) -> SrcRef921     fn from(ssa: T) -> SrcRef {
922         SrcRef::SSA(ssa.into())
923     }
924 }
925 
926 impl fmt::Display for SrcRef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result927     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
928         match self {
929             SrcRef::Zero => write!(f, "rZ"),
930             SrcRef::True => write!(f, "pT"),
931             SrcRef::False => write!(f, "pF"),
932             SrcRef::Imm32(u) => write!(f, "{:#x}", u),
933             SrcRef::CBuf(c) => c.fmt(f),
934             SrcRef::SSA(v) => v.fmt(f),
935             SrcRef::Reg(r) => r.fmt(f),
936         }
937     }
938 }
939 
940 #[derive(Clone, Copy, PartialEq)]
941 pub enum SrcMod {
942     None,
943     FAbs,
944     FNeg,
945     FNegAbs,
946     INeg,
947     BNot,
948 }
949 
950 impl SrcMod {
is_none(&self) -> bool951     pub fn is_none(&self) -> bool {
952         matches!(self, SrcMod::None)
953     }
954 
has_fabs(&self) -> bool955     pub fn has_fabs(&self) -> bool {
956         match self {
957             SrcMod::None | SrcMod::FNeg => false,
958             SrcMod::FAbs | SrcMod::FNegAbs => true,
959             _ => panic!("Not a float modifier"),
960         }
961     }
962 
has_fneg(&self) -> bool963     pub fn has_fneg(&self) -> bool {
964         match self {
965             SrcMod::None | SrcMod::FAbs => false,
966             SrcMod::FNeg | SrcMod::FNegAbs => true,
967             _ => panic!("Not a float modifier"),
968         }
969     }
970 
is_ineg(&self) -> bool971     pub fn is_ineg(&self) -> bool {
972         match self {
973             SrcMod::None => false,
974             SrcMod::INeg => true,
975             _ => panic!("Not an integer modifier"),
976         }
977     }
978 
is_bnot(&self) -> bool979     pub fn is_bnot(&self) -> bool {
980         match self {
981             SrcMod::None => false,
982             SrcMod::BNot => true,
983             _ => panic!("Not a bitwise modifier"),
984         }
985     }
986 
fabs(self) -> SrcMod987     pub fn fabs(self) -> SrcMod {
988         match self {
989             SrcMod::None | SrcMod::FAbs | SrcMod::FNeg | SrcMod::FNegAbs => {
990                 SrcMod::FAbs
991             }
992             _ => panic!("Not a float source modifier"),
993         }
994     }
995 
fneg(self) -> SrcMod996     pub fn fneg(self) -> SrcMod {
997         match self {
998             SrcMod::None => SrcMod::FNeg,
999             SrcMod::FAbs => SrcMod::FNegAbs,
1000             SrcMod::FNeg => SrcMod::None,
1001             SrcMod::FNegAbs => SrcMod::FAbs,
1002             _ => panic!("Not a float source modifier"),
1003         }
1004     }
1005 
ineg(self) -> SrcMod1006     pub fn ineg(self) -> SrcMod {
1007         match self {
1008             SrcMod::None => SrcMod::INeg,
1009             SrcMod::INeg => SrcMod::None,
1010             _ => panic!("Not an integer source modifier"),
1011         }
1012     }
1013 
bnot(self) -> SrcMod1014     pub fn bnot(self) -> SrcMod {
1015         match self {
1016             SrcMod::None => SrcMod::BNot,
1017             SrcMod::BNot => SrcMod::None,
1018             _ => panic!("Not a boolean source modifier"),
1019         }
1020     }
1021 
modify(self, other: SrcMod) -> SrcMod1022     pub fn modify(self, other: SrcMod) -> SrcMod {
1023         match other {
1024             SrcMod::None => self,
1025             SrcMod::FAbs => self.fabs(),
1026             SrcMod::FNeg => self.fneg(),
1027             SrcMod::FNegAbs => self.fabs().fneg(),
1028             SrcMod::INeg => self.ineg(),
1029             SrcMod::BNot => self.bnot(),
1030         }
1031     }
1032 }
1033 
1034 #[repr(u8)]
1035 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1036 pub enum SrcType {
1037     SSA,
1038     GPR,
1039     ALU,
1040     F32,
1041     F64,
1042     I32,
1043     B32,
1044     Pred,
1045     Bar,
1046 }
1047 
1048 #[derive(Clone, Copy, PartialEq)]
1049 pub struct Src {
1050     pub src_ref: SrcRef,
1051     pub src_mod: SrcMod,
1052 }
1053 
1054 impl Src {
new_zero() -> Src1055     pub fn new_zero() -> Src {
1056         SrcRef::Zero.into()
1057     }
1058 
new_imm_u32(u: u32) -> Src1059     pub fn new_imm_u32(u: u32) -> Src {
1060         u.into()
1061     }
1062 
new_imm_bool(b: bool) -> Src1063     pub fn new_imm_bool(b: bool) -> Src {
1064         b.into()
1065     }
1066 
fabs(&self) -> Src1067     pub fn fabs(&self) -> Src {
1068         Src {
1069             src_ref: self.src_ref,
1070             src_mod: self.src_mod.fabs(),
1071         }
1072     }
1073 
fneg(&self) -> Src1074     pub fn fneg(&self) -> Src {
1075         Src {
1076             src_ref: self.src_ref,
1077             src_mod: self.src_mod.fneg(),
1078         }
1079     }
1080 
ineg(&self) -> Src1081     pub fn ineg(&self) -> Src {
1082         Src {
1083             src_ref: self.src_ref,
1084             src_mod: self.src_mod.ineg(),
1085         }
1086     }
1087 
bnot(&self) -> Src1088     pub fn bnot(&self) -> Src {
1089         Src {
1090             src_ref: self.src_ref,
1091             src_mod: self.src_mod.bnot(),
1092         }
1093     }
1094 
as_ssa(&self) -> Option<&SSARef>1095     pub fn as_ssa(&self) -> Option<&SSARef> {
1096         if self.src_mod.is_none() {
1097             self.src_ref.as_ssa()
1098         } else {
1099             None
1100         }
1101     }
1102 
as_bool(&self) -> Option<bool>1103     pub fn as_bool(&self) -> Option<bool> {
1104         match self.src_ref {
1105             SrcRef::True => Some(!self.src_mod.is_bnot()),
1106             SrcRef::False => Some(self.src_mod.is_bnot()),
1107             SrcRef::SSA(vec) => {
1108                 assert!(vec.is_predicate() && vec.comps() == 1);
1109                 None
1110             }
1111             SrcRef::Reg(reg) => {
1112                 assert!(reg.is_predicate() && reg.comps() == 1);
1113                 None
1114             }
1115             _ => panic!("Not a boolean source"),
1116         }
1117     }
1118 
as_u32(&self) -> Option<u32>1119     pub fn as_u32(&self) -> Option<u32> {
1120         if self.src_mod.is_none() {
1121             match self.src_ref {
1122                 SrcRef::Zero => Some(0),
1123                 SrcRef::Imm32(u) => Some(u),
1124                 SrcRef::CBuf(_) | SrcRef::SSA(_) | SrcRef::Reg(_) => None,
1125                 _ => panic!("Invalid integer source"),
1126             }
1127         } else {
1128             None
1129         }
1130     }
1131 
as_imm_not_i20(&self) -> Option<u32>1132     pub fn as_imm_not_i20(&self) -> Option<u32> {
1133         match self.src_ref {
1134             SrcRef::Imm32(i) => {
1135                 assert!(self.src_mod.is_none());
1136                 let top = i & 0xfff80000;
1137                 if top == 0 || top == 0xfff80000 {
1138                     None
1139                 } else {
1140                     Some(i)
1141                 }
1142             }
1143             _ => None,
1144         }
1145     }
1146 
as_imm_not_f20(&self) -> Option<u32>1147     pub fn as_imm_not_f20(&self) -> Option<u32> {
1148         match self.src_ref {
1149             SrcRef::Imm32(i) => {
1150                 assert!(self.src_mod.is_none());
1151                 if (i & 0xfff) == 0 {
1152                     None
1153                 } else {
1154                     Some(i)
1155                 }
1156             }
1157             _ => None,
1158         }
1159     }
1160 
iter_ssa(&self) -> slice::Iter<'_, SSAValue>1161     pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
1162         self.src_ref.iter_ssa()
1163     }
1164 
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>1165     pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
1166         self.src_ref.iter_ssa_mut()
1167     }
1168 
1169     #[allow(dead_code)]
is_uniform(&self) -> bool1170     pub fn is_uniform(&self) -> bool {
1171         match self.src_ref {
1172             SrcRef::Zero
1173             | SrcRef::True
1174             | SrcRef::False
1175             | SrcRef::Imm32(_)
1176             | SrcRef::CBuf(_) => true,
1177             SrcRef::SSA(ssa) => ssa.is_uniform(),
1178             SrcRef::Reg(reg) => reg.is_uniform(),
1179         }
1180     }
1181 
is_predicate(&self) -> bool1182     pub fn is_predicate(&self) -> bool {
1183         self.src_ref.is_predicate()
1184     }
1185 
is_zero(&self) -> bool1186     pub fn is_zero(&self) -> bool {
1187         match self.src_ref {
1188             SrcRef::Zero | SrcRef::Imm32(0) => match self.src_mod {
1189                 SrcMod::None | SrcMod::FAbs | SrcMod::INeg => true,
1190                 SrcMod::FNeg | SrcMod::FNegAbs | SrcMod::BNot => false,
1191             },
1192             _ => false,
1193         }
1194     }
1195 
is_fneg_zero(&self, src_type: SrcType) -> bool1196     pub fn is_fneg_zero(&self, src_type: SrcType) -> bool {
1197         match self.src_ref {
1198             SrcRef::Zero | SrcRef::Imm32(0) => {
1199                 matches!(self.src_mod, SrcMod::FNeg | SrcMod::FNegAbs)
1200             }
1201             SrcRef::Imm32(0x80000000) => {
1202                 src_type == SrcType::F32 && self.src_mod.is_none()
1203             }
1204             _ => false,
1205         }
1206     }
1207 
1208     #[allow(dead_code)]
supports_type(&self, src_type: &SrcType) -> bool1209     pub fn supports_type(&self, src_type: &SrcType) -> bool {
1210         match src_type {
1211             SrcType::SSA => {
1212                 if !self.src_mod.is_none() {
1213                     return false;
1214                 }
1215 
1216                 matches!(self.src_ref, SrcRef::SSA(_) | SrcRef::Reg(_))
1217             }
1218             SrcType::GPR => {
1219                 if !self.src_mod.is_none() {
1220                     return false;
1221                 }
1222 
1223                 matches!(
1224                     self.src_ref,
1225                     SrcRef::Zero | SrcRef::SSA(_) | SrcRef::Reg(_)
1226                 )
1227             }
1228             SrcType::ALU => self.src_mod.is_none() && self.src_ref.is_alu(),
1229             SrcType::F32 | SrcType::F64 => {
1230                 match self.src_mod {
1231                     SrcMod::None
1232                     | SrcMod::FAbs
1233                     | SrcMod::FNeg
1234                     | SrcMod::FNegAbs => (),
1235                     _ => return false,
1236                 }
1237 
1238                 self.src_ref.is_alu()
1239             }
1240             SrcType::I32 => {
1241                 match self.src_mod {
1242                     SrcMod::None | SrcMod::INeg => (),
1243                     _ => return false,
1244                 }
1245 
1246                 self.src_ref.is_alu()
1247             }
1248             SrcType::B32 => {
1249                 match self.src_mod {
1250                     SrcMod::None | SrcMod::BNot => (),
1251                     _ => return false,
1252                 }
1253 
1254                 self.src_ref.is_alu()
1255             }
1256             SrcType::Pred => {
1257                 match self.src_mod {
1258                     SrcMod::None | SrcMod::BNot => (),
1259                     _ => return false,
1260                 }
1261 
1262                 self.src_ref.is_predicate()
1263             }
1264             SrcType::Bar => self.src_mod.is_none() && self.src_ref.is_barrier(),
1265         }
1266     }
1267 }
1268 
1269 impl<T: Into<SrcRef>> From<T> for Src {
from(value: T) -> Src1270     fn from(value: T) -> Src {
1271         Src {
1272             src_ref: value.into(),
1273             src_mod: SrcMod::None,
1274         }
1275     }
1276 }
1277 
1278 impl fmt::Display for Src {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1279     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1280         match self.src_mod {
1281             SrcMod::None => write!(f, "{}", self.src_ref),
1282             SrcMod::FAbs => write!(f, "|{}|", self.src_ref),
1283             SrcMod::FNeg => write!(f, "-{}", self.src_ref),
1284             SrcMod::FNegAbs => write!(f, "-|{}|", self.src_ref),
1285             SrcMod::INeg => write!(f, "-{}", self.src_ref),
1286             SrcMod::BNot => write!(f, "!{}", self.src_ref),
1287         }
1288     }
1289 }
1290 
1291 impl SrcType {
1292     const DEFAULT: SrcType = SrcType::GPR;
1293 }
1294 
1295 pub enum SrcTypeList {
1296     Array(&'static [SrcType]),
1297     Uniform(SrcType),
1298 }
1299 
1300 impl Index<usize> for SrcTypeList {
1301     type Output = SrcType;
1302 
index(&self, idx: usize) -> &SrcType1303     fn index(&self, idx: usize) -> &SrcType {
1304         match self {
1305             SrcTypeList::Array(arr) => &arr[idx],
1306             SrcTypeList::Uniform(typ) => typ,
1307         }
1308     }
1309 }
1310 
1311 pub trait SrcsAsSlice {
srcs_as_slice(&self) -> &[Src]1312     fn srcs_as_slice(&self) -> &[Src];
srcs_as_mut_slice(&mut self) -> &mut [Src]1313     fn srcs_as_mut_slice(&mut self) -> &mut [Src];
src_types(&self) -> SrcTypeList1314     fn src_types(&self) -> SrcTypeList;
1315 }
1316 
1317 pub trait DstsAsSlice {
dsts_as_slice(&self) -> &[Dst]1318     fn dsts_as_slice(&self) -> &[Dst];
dsts_as_mut_slice(&mut self) -> &mut [Dst]1319     fn dsts_as_mut_slice(&mut self) -> &mut [Dst];
1320 }
1321 
fmt_dst_slice(f: &mut fmt::Formatter<'_>, dsts: &[Dst]) -> fmt::Result1322 fn fmt_dst_slice(f: &mut fmt::Formatter<'_>, dsts: &[Dst]) -> fmt::Result {
1323     if dsts.is_empty() {
1324         return Ok(());
1325     }
1326 
1327     // Figure out the last non-null dst
1328     //
1329     // Note: By making the top inclusive and starting at 0, we ensure that
1330     // at least one dst always gets printed.
1331     let mut last_dst = 0;
1332     for (i, dst) in dsts.iter().enumerate() {
1333         if !dst.is_none() {
1334             last_dst = i;
1335         }
1336     }
1337 
1338     for i in 0..(last_dst + 1) {
1339         if i != 0 {
1340             write!(f, " ")?;
1341         }
1342         write!(f, "{}", &dsts[i])?;
1343     }
1344     Ok(())
1345 }
1346 
1347 pub trait DisplayOp: DstsAsSlice {
fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1348     fn fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1349         fmt_dst_slice(f, self.dsts_as_slice())
1350     }
1351 
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1352     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result;
1353 }
1354 
1355 // Hack struct so we can re-use Formatters.  Shamelessly stolen from
1356 // https://users.rust-lang.org/t/reusing-an-fmt-formatter/8531/4
1357 pub struct Fmt<F>(pub F)
1358 where
1359     F: Fn(&mut fmt::Formatter) -> fmt::Result;
1360 
1361 impl<F> fmt::Display for Fmt<F>
1362 where
1363     F: Fn(&mut fmt::Formatter) -> fmt::Result,
1364 {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result1365     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1366         (self.0)(f)
1367     }
1368 }
1369 
1370 macro_rules! impl_display_for_op {
1371     ($op: ident) => {
1372         impl fmt::Display for $op {
1373             fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1374                 let mut s = String::new();
1375                 write!(s, "{}", Fmt(|f| self.fmt_dsts(f)))?;
1376                 if !s.is_empty() {
1377                     write!(f, "{} = ", s)?;
1378                 }
1379                 self.fmt_op(f)
1380             }
1381         }
1382     };
1383 }
1384 
1385 #[allow(dead_code)]
1386 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1387 pub enum PredSetOp {
1388     And,
1389     Or,
1390     Xor,
1391 }
1392 
1393 impl PredSetOp {
is_trivial(&self, accum: &Src) -> bool1394     pub fn is_trivial(&self, accum: &Src) -> bool {
1395         if let Some(b) = accum.as_bool() {
1396             match self {
1397                 PredSetOp::And => b,
1398                 PredSetOp::Or => !b,
1399                 PredSetOp::Xor => !b,
1400             }
1401         } else {
1402             false
1403         }
1404     }
1405 }
1406 
1407 impl fmt::Display for PredSetOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1408     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1409         match self {
1410             PredSetOp::And => write!(f, ".and"),
1411             PredSetOp::Or => write!(f, ".or"),
1412             PredSetOp::Xor => write!(f, ".xor"),
1413         }
1414     }
1415 }
1416 
1417 #[allow(dead_code)]
1418 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1419 pub enum FloatCmpOp {
1420     OrdEq,
1421     OrdNe,
1422     OrdLt,
1423     OrdLe,
1424     OrdGt,
1425     OrdGe,
1426     UnordEq,
1427     UnordNe,
1428     UnordLt,
1429     UnordLe,
1430     UnordGt,
1431     UnordGe,
1432     IsNum,
1433     IsNan,
1434 }
1435 
1436 impl FloatCmpOp {
flip(self) -> FloatCmpOp1437     pub fn flip(self) -> FloatCmpOp {
1438         match self {
1439             FloatCmpOp::OrdEq | FloatCmpOp::OrdNe => self,
1440             FloatCmpOp::OrdLt => FloatCmpOp::OrdGt,
1441             FloatCmpOp::OrdLe => FloatCmpOp::OrdGe,
1442             FloatCmpOp::OrdGt => FloatCmpOp::OrdLt,
1443             FloatCmpOp::OrdGe => FloatCmpOp::OrdLe,
1444             FloatCmpOp::UnordEq | FloatCmpOp::UnordNe => self,
1445             FloatCmpOp::UnordLt => FloatCmpOp::UnordGt,
1446             FloatCmpOp::UnordLe => FloatCmpOp::UnordGe,
1447             FloatCmpOp::UnordGt => FloatCmpOp::UnordLt,
1448             FloatCmpOp::UnordGe => FloatCmpOp::UnordLe,
1449             FloatCmpOp::IsNum | FloatCmpOp::IsNan => panic!("Cannot flip unop"),
1450         }
1451     }
1452 }
1453 
1454 impl fmt::Display for FloatCmpOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1455     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1456         match self {
1457             FloatCmpOp::OrdEq => write!(f, ".eq"),
1458             FloatCmpOp::OrdNe => write!(f, ".ne"),
1459             FloatCmpOp::OrdLt => write!(f, ".lt"),
1460             FloatCmpOp::OrdLe => write!(f, ".le"),
1461             FloatCmpOp::OrdGt => write!(f, ".gt"),
1462             FloatCmpOp::OrdGe => write!(f, ".ge"),
1463             FloatCmpOp::UnordEq => write!(f, ".equ"),
1464             FloatCmpOp::UnordNe => write!(f, ".neu"),
1465             FloatCmpOp::UnordLt => write!(f, ".ltu"),
1466             FloatCmpOp::UnordLe => write!(f, ".leu"),
1467             FloatCmpOp::UnordGt => write!(f, ".gtu"),
1468             FloatCmpOp::UnordGe => write!(f, ".geu"),
1469             FloatCmpOp::IsNum => write!(f, ".num"),
1470             FloatCmpOp::IsNan => write!(f, ".nan"),
1471         }
1472     }
1473 }
1474 
1475 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1476 pub enum IntCmpOp {
1477     Eq,
1478     Ne,
1479     Lt,
1480     Le,
1481     Gt,
1482     Ge,
1483 }
1484 
1485 impl IntCmpOp {
flip(self) -> IntCmpOp1486     pub fn flip(self) -> IntCmpOp {
1487         match self {
1488             IntCmpOp::Eq | IntCmpOp::Ne => self,
1489             IntCmpOp::Lt => IntCmpOp::Gt,
1490             IntCmpOp::Le => IntCmpOp::Ge,
1491             IntCmpOp::Gt => IntCmpOp::Lt,
1492             IntCmpOp::Ge => IntCmpOp::Le,
1493         }
1494     }
1495 }
1496 
1497 impl fmt::Display for IntCmpOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1498     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1499         match self {
1500             IntCmpOp::Eq => write!(f, ".eq"),
1501             IntCmpOp::Ne => write!(f, ".ne"),
1502             IntCmpOp::Lt => write!(f, ".lt"),
1503             IntCmpOp::Le => write!(f, ".le"),
1504             IntCmpOp::Gt => write!(f, ".gt"),
1505             IntCmpOp::Ge => write!(f, ".ge"),
1506         }
1507     }
1508 }
1509 
1510 pub enum IntCmpType {
1511     U32,
1512     I32,
1513 }
1514 
1515 impl fmt::Display for IntCmpType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1516     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1517         match self {
1518             IntCmpType::U32 => write!(f, ".u32"),
1519             IntCmpType::I32 => write!(f, ".i32"),
1520         }
1521     }
1522 }
1523 
1524 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1525 pub enum LogicOp2 {
1526     And,
1527     Or,
1528     Xor,
1529     PassB,
1530 }
1531 
1532 impl fmt::Display for LogicOp2 {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1533     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1534         match self {
1535             LogicOp2::And => write!(f, "and"),
1536             LogicOp2::Or => write!(f, "or"),
1537             LogicOp2::Xor => write!(f, "xor"),
1538             LogicOp2::PassB => write!(f, "pass_b"),
1539         }
1540     }
1541 }
1542 
1543 impl LogicOp2 {
to_lut(self) -> LogicOp31544     pub fn to_lut(self) -> LogicOp3 {
1545         match self {
1546             LogicOp2::And => LogicOp3::new_lut(&|x, y, _| x & y),
1547             LogicOp2::Or => LogicOp3::new_lut(&|x, y, _| x | y),
1548             LogicOp2::Xor => LogicOp3::new_lut(&|x, y, _| x ^ y),
1549             LogicOp2::PassB => LogicOp3::new_lut(&|_, b, _| b),
1550         }
1551     }
1552 }
1553 
1554 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1555 pub struct LogicOp3 {
1556     pub lut: u8,
1557 }
1558 
1559 impl LogicOp3 {
1560     pub const SRC_MASKS: [u8; 3] = [0xf0, 0xcc, 0xaa];
1561 
1562     #[inline]
new_lut<F: Fn(u8, u8, u8) -> u8>(f: &F) -> LogicOp31563     pub fn new_lut<F: Fn(u8, u8, u8) -> u8>(f: &F) -> LogicOp3 {
1564         LogicOp3 {
1565             lut: f(
1566                 LogicOp3::SRC_MASKS[0],
1567                 LogicOp3::SRC_MASKS[1],
1568                 LogicOp3::SRC_MASKS[2],
1569             ),
1570         }
1571     }
1572 
new_const(val: bool) -> LogicOp31573     pub fn new_const(val: bool) -> LogicOp3 {
1574         LogicOp3 {
1575             lut: if val { !0 } else { 0 },
1576         }
1577     }
1578 
src_used(&self, src_idx: usize) -> bool1579     pub fn src_used(&self, src_idx: usize) -> bool {
1580         let mask = LogicOp3::SRC_MASKS[src_idx];
1581         let shift = LogicOp3::SRC_MASKS[src_idx].trailing_zeros();
1582         self.lut & !mask != (self.lut >> shift) & !mask
1583     }
1584 
fix_src(&mut self, src_idx: usize, val: bool)1585     pub fn fix_src(&mut self, src_idx: usize, val: bool) {
1586         let mask = LogicOp3::SRC_MASKS[src_idx];
1587         let shift = LogicOp3::SRC_MASKS[src_idx].trailing_zeros();
1588         if val {
1589             let t_bits = self.lut & mask;
1590             self.lut = t_bits | (t_bits >> shift)
1591         } else {
1592             let f_bits = self.lut & !mask;
1593             self.lut = (f_bits << shift) | f_bits
1594         };
1595     }
1596 
invert_src(&mut self, src_idx: usize)1597     pub fn invert_src(&mut self, src_idx: usize) {
1598         let mask = LogicOp3::SRC_MASKS[src_idx];
1599         let shift = LogicOp3::SRC_MASKS[src_idx].trailing_zeros();
1600         let t_bits = self.lut & mask;
1601         let f_bits = self.lut & !mask;
1602         self.lut = (f_bits << shift) | (t_bits >> shift);
1603     }
1604 
eval< T: BitAnd<Output = T> + BitOr<Output = T> + Copy + Not<Output = T>, >( &self, x: T, y: T, z: T, ) -> T1605     pub fn eval<
1606         T: BitAnd<Output = T> + BitOr<Output = T> + Copy + Not<Output = T>,
1607     >(
1608         &self,
1609         x: T,
1610         y: T,
1611         z: T,
1612     ) -> T {
1613         let mut res = x & !x; // zero
1614         if (self.lut & (1 << 0)) != 0 {
1615             res = res | (!x & !y & !z);
1616         }
1617         if (self.lut & (1 << 1)) != 0 {
1618             res = res | (!x & !y & z);
1619         }
1620         if (self.lut & (1 << 2)) != 0 {
1621             res = res | (!x & y & !z);
1622         }
1623         if (self.lut & (1 << 3)) != 0 {
1624             res = res | (!x & y & z);
1625         }
1626         if (self.lut & (1 << 4)) != 0 {
1627             res = res | (x & !y & !z);
1628         }
1629         if (self.lut & (1 << 5)) != 0 {
1630             res = res | (x & !y & z);
1631         }
1632         if (self.lut & (1 << 6)) != 0 {
1633             res = res | (x & y & !z);
1634         }
1635         if (self.lut & (1 << 7)) != 0 {
1636             res = res | (x & y & z);
1637         }
1638         res
1639     }
1640 }
1641 
1642 impl fmt::Display for LogicOp3 {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1643     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1644         write!(f, "LUT[{:#x}]", self.lut)
1645     }
1646 }
1647 
1648 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1649 pub enum FloatType {
1650     F16,
1651     F32,
1652     F64,
1653 }
1654 
1655 impl FloatType {
from_bits(bytes: usize) -> FloatType1656     pub fn from_bits(bytes: usize) -> FloatType {
1657         match bytes {
1658             16 => FloatType::F16,
1659             32 => FloatType::F32,
1660             64 => FloatType::F64,
1661             _ => panic!("Invalid float type size"),
1662         }
1663     }
1664 
bits(&self) -> usize1665     pub fn bits(&self) -> usize {
1666         match self {
1667             FloatType::F16 => 16,
1668             FloatType::F32 => 32,
1669             FloatType::F64 => 64,
1670         }
1671     }
1672 }
1673 
1674 impl fmt::Display for FloatType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1675     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1676         match self {
1677             FloatType::F16 => write!(f, ".f16"),
1678             FloatType::F32 => write!(f, ".f32"),
1679             FloatType::F64 => write!(f, ".f64"),
1680         }
1681     }
1682 }
1683 
1684 #[allow(dead_code)]
1685 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1686 pub enum FRndMode {
1687     NearestEven,
1688     NegInf,
1689     PosInf,
1690     Zero,
1691 }
1692 
1693 impl fmt::Display for FRndMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1694     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1695         match self {
1696             FRndMode::NearestEven => write!(f, ".re"),
1697             FRndMode::NegInf => write!(f, ".rm"),
1698             FRndMode::PosInf => write!(f, ".rp"),
1699             FRndMode::Zero => write!(f, ".rz"),
1700         }
1701     }
1702 }
1703 
1704 #[derive(Clone, Copy, Eq, PartialEq)]
1705 pub enum TexDim {
1706     _1D,
1707     Array1D,
1708     _2D,
1709     Array2D,
1710     _3D,
1711     Cube,
1712     ArrayCube,
1713 }
1714 
1715 impl fmt::Display for TexDim {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1716     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1717         match self {
1718             TexDim::_1D => write!(f, ".1d"),
1719             TexDim::Array1D => write!(f, ".a1d"),
1720             TexDim::_2D => write!(f, ".2d"),
1721             TexDim::Array2D => write!(f, ".a2d"),
1722             TexDim::_3D => write!(f, ".3d"),
1723             TexDim::Cube => write!(f, ".cube"),
1724             TexDim::ArrayCube => write!(f, ".acube"),
1725         }
1726     }
1727 }
1728 
1729 #[derive(Clone, Copy, Eq, PartialEq)]
1730 pub enum TexLodMode {
1731     Auto,
1732     Zero,
1733     Bias,
1734     Lod,
1735     Clamp,
1736     BiasClamp,
1737 }
1738 
1739 impl fmt::Display for TexLodMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1740     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1741         match self {
1742             TexLodMode::Auto => write!(f, "la"),
1743             TexLodMode::Zero => write!(f, "lz"),
1744             TexLodMode::Bias => write!(f, "lb"),
1745             TexLodMode::Lod => write!(f, "ll"),
1746             TexLodMode::Clamp => write!(f, "lc"),
1747             TexLodMode::BiasClamp => write!(f, "lb.lc"),
1748         }
1749     }
1750 }
1751 
1752 #[derive(Clone, Copy, Eq, PartialEq)]
1753 pub enum Tld4OffsetMode {
1754     None,
1755     AddOffI,
1756     PerPx,
1757 }
1758 
1759 impl fmt::Display for Tld4OffsetMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1760     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1761         match self {
1762             Tld4OffsetMode::None => write!(f, "no_off"),
1763             Tld4OffsetMode::AddOffI => write!(f, "aoffi"),
1764             Tld4OffsetMode::PerPx => write!(f, "ptp"),
1765         }
1766     }
1767 }
1768 
1769 #[allow(dead_code)]
1770 #[derive(Clone, Copy, Eq, PartialEq)]
1771 pub enum TexQuery {
1772     Dimension,
1773     TextureType,
1774     SamplerPos,
1775 }
1776 
1777 impl fmt::Display for TexQuery {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1778     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1779         match self {
1780             TexQuery::Dimension => write!(f, "dimension"),
1781             TexQuery::TextureType => write!(f, "texture_type"),
1782             TexQuery::SamplerPos => write!(f, "sampler_pos"),
1783         }
1784     }
1785 }
1786 
1787 #[derive(Clone, Copy, Eq, PartialEq)]
1788 pub enum ImageDim {
1789     _1D,
1790     _1DBuffer,
1791     _1DArray,
1792     _2D,
1793     _2DArray,
1794     _3D,
1795 }
1796 
1797 impl ImageDim {
coord_comps(&self) -> u81798     pub fn coord_comps(&self) -> u8 {
1799         match self {
1800             ImageDim::_1D => 1,
1801             ImageDim::_1DBuffer => 1,
1802             ImageDim::_1DArray => 2,
1803             ImageDim::_2D => 2,
1804             ImageDim::_2DArray => 3,
1805             ImageDim::_3D => 3,
1806         }
1807     }
1808 }
1809 
1810 impl fmt::Display for ImageDim {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1811     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1812         match self {
1813             ImageDim::_1D => write!(f, ".1d"),
1814             ImageDim::_1DBuffer => write!(f, ".buf"),
1815             ImageDim::_1DArray => write!(f, ".a1d"),
1816             ImageDim::_2D => write!(f, ".2d"),
1817             ImageDim::_2DArray => write!(f, ".a2d"),
1818             ImageDim::_3D => write!(f, ".3d"),
1819         }
1820     }
1821 }
1822 
1823 pub enum IntType {
1824     U8,
1825     I8,
1826     U16,
1827     I16,
1828     U32,
1829     I32,
1830     U64,
1831     I64,
1832 }
1833 
1834 impl IntType {
from_bits(bits: usize, is_signed: bool) -> IntType1835     pub fn from_bits(bits: usize, is_signed: bool) -> IntType {
1836         match bits {
1837             8 => {
1838                 if is_signed {
1839                     IntType::I8
1840                 } else {
1841                     IntType::U8
1842                 }
1843             }
1844             16 => {
1845                 if is_signed {
1846                     IntType::I16
1847                 } else {
1848                     IntType::U16
1849                 }
1850             }
1851             32 => {
1852                 if is_signed {
1853                     IntType::I32
1854                 } else {
1855                     IntType::U32
1856                 }
1857             }
1858             64 => {
1859                 if is_signed {
1860                     IntType::I64
1861                 } else {
1862                     IntType::U64
1863                 }
1864             }
1865             _ => panic!("Invalid integer type size"),
1866         }
1867     }
1868 
is_signed(&self) -> bool1869     pub fn is_signed(&self) -> bool {
1870         match self {
1871             IntType::U8 | IntType::U16 | IntType::U32 | IntType::U64 => false,
1872             IntType::I8 | IntType::I16 | IntType::I32 | IntType::I64 => true,
1873         }
1874     }
1875 
bits(&self) -> usize1876     pub fn bits(&self) -> usize {
1877         match self {
1878             IntType::U8 | IntType::I8 => 8,
1879             IntType::U16 | IntType::I16 => 16,
1880             IntType::U32 | IntType::I32 => 32,
1881             IntType::U64 | IntType::I64 => 64,
1882         }
1883     }
1884 }
1885 
1886 impl fmt::Display for IntType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1887     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1888         match self {
1889             IntType::U8 => write!(f, ".u8"),
1890             IntType::I8 => write!(f, ".i8"),
1891             IntType::U16 => write!(f, ".u16"),
1892             IntType::I16 => write!(f, ".i16"),
1893             IntType::U32 => write!(f, ".u32"),
1894             IntType::I32 => write!(f, ".i32"),
1895             IntType::U64 => write!(f, ".u64"),
1896             IntType::I64 => write!(f, ".i64"),
1897         }
1898     }
1899 }
1900 
1901 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1902 pub enum MemAddrType {
1903     A32,
1904     A64,
1905 }
1906 
1907 impl fmt::Display for MemAddrType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1908     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1909         match self {
1910             MemAddrType::A32 => write!(f, ".a32"),
1911             MemAddrType::A64 => write!(f, ".a64"),
1912         }
1913     }
1914 }
1915 
1916 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1917 pub enum MemType {
1918     U8,
1919     I8,
1920     U16,
1921     I16,
1922     B32,
1923     B64,
1924     B128,
1925 }
1926 
1927 impl MemType {
from_size(size: u8, is_signed: bool) -> MemType1928     pub fn from_size(size: u8, is_signed: bool) -> MemType {
1929         match size {
1930             1 => {
1931                 if is_signed {
1932                     MemType::I8
1933                 } else {
1934                     MemType::U8
1935                 }
1936             }
1937             2 => {
1938                 if is_signed {
1939                     MemType::I16
1940                 } else {
1941                     MemType::U16
1942                 }
1943             }
1944             4 => MemType::B32,
1945             8 => MemType::B64,
1946             16 => MemType::B128,
1947             _ => panic!("Invalid memory load/store size"),
1948         }
1949     }
1950 }
1951 
1952 impl fmt::Display for MemType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1953     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1954         match self {
1955             MemType::U8 => write!(f, ".u8"),
1956             MemType::I8 => write!(f, ".i8"),
1957             MemType::U16 => write!(f, ".u16"),
1958             MemType::I16 => write!(f, ".i16"),
1959             MemType::B32 => write!(f, ".b32"),
1960             MemType::B64 => write!(f, ".b64"),
1961             MemType::B128 => write!(f, ".b128"),
1962         }
1963     }
1964 }
1965 
1966 #[allow(dead_code)]
1967 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1968 pub enum MemOrder {
1969     Constant,
1970     Weak,
1971     Strong(MemScope),
1972 }
1973 
1974 impl fmt::Display for MemOrder {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1975     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1976         match self {
1977             MemOrder::Constant => write!(f, ".constant"),
1978             MemOrder::Weak => write!(f, ".weak"),
1979             MemOrder::Strong(scope) => write!(f, ".strong{}", scope),
1980         }
1981     }
1982 }
1983 
1984 #[allow(dead_code)]
1985 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1986 pub enum MemScope {
1987     CTA,
1988     GPU,
1989     System,
1990 }
1991 
1992 impl fmt::Display for MemScope {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1993     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1994         match self {
1995             MemScope::CTA => write!(f, ".cta"),
1996             MemScope::GPU => write!(f, ".gpu"),
1997             MemScope::System => write!(f, ".sys"),
1998         }
1999     }
2000 }
2001 
2002 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2003 pub enum MemSpace {
2004     Global(MemAddrType),
2005     Local,
2006     Shared,
2007 }
2008 
2009 impl MemSpace {
addr_type(&self) -> MemAddrType2010     pub fn addr_type(&self) -> MemAddrType {
2011         match self {
2012             MemSpace::Global(t) => *t,
2013             MemSpace::Local => MemAddrType::A32,
2014             MemSpace::Shared => MemAddrType::A32,
2015         }
2016     }
2017 }
2018 
2019 impl fmt::Display for MemSpace {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2020     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2021         match self {
2022             MemSpace::Global(t) => write!(f, ".global{t}"),
2023             MemSpace::Local => write!(f, ".local"),
2024             MemSpace::Shared => write!(f, ".shared"),
2025         }
2026     }
2027 }
2028 
2029 #[allow(dead_code)]
2030 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2031 pub enum MemEvictionPriority {
2032     First,
2033     Normal,
2034     Last,
2035     Unchanged,
2036 }
2037 
2038 impl fmt::Display for MemEvictionPriority {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2039     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2040         match self {
2041             MemEvictionPriority::First => write!(f, ".ef"),
2042             MemEvictionPriority::Normal => Ok(()),
2043             MemEvictionPriority::Last => write!(f, ".el"),
2044             MemEvictionPriority::Unchanged => write!(f, ".lu"),
2045         }
2046     }
2047 }
2048 
2049 #[derive(Clone)]
2050 pub struct MemAccess {
2051     pub mem_type: MemType,
2052     pub space: MemSpace,
2053     pub order: MemOrder,
2054     pub eviction_priority: MemEvictionPriority,
2055 }
2056 
2057 impl fmt::Display for MemAccess {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2058     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2059         write!(
2060             f,
2061             "{}{}{}{}",
2062             self.space, self.order, self.eviction_priority, self.mem_type,
2063         )
2064     }
2065 }
2066 
2067 #[allow(dead_code)]
2068 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2069 pub enum AtomType {
2070     F16x2,
2071     U32,
2072     I32,
2073     F32,
2074     U64,
2075     I64,
2076     F64,
2077 }
2078 
2079 impl AtomType {
F(bits: u8) -> AtomType2080     pub fn F(bits: u8) -> AtomType {
2081         match bits {
2082             16 => panic!("16-bit float atomics not yet supported"),
2083             32 => AtomType::F32,
2084             64 => AtomType::F64,
2085             _ => panic!("Invalid float atomic type"),
2086         }
2087     }
2088 
U(bits: u8) -> AtomType2089     pub fn U(bits: u8) -> AtomType {
2090         match bits {
2091             32 => AtomType::U32,
2092             64 => AtomType::U64,
2093             _ => panic!("Invalid uint atomic type"),
2094         }
2095     }
2096 
I(bits: u8) -> AtomType2097     pub fn I(bits: u8) -> AtomType {
2098         match bits {
2099             32 => AtomType::I32,
2100             64 => AtomType::I64,
2101             _ => panic!("Invalid int atomic type"),
2102         }
2103     }
2104 }
2105 
2106 impl fmt::Display for AtomType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2107     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2108         match self {
2109             AtomType::F16x2 => write!(f, ".f16x2"),
2110             AtomType::U32 => write!(f, ".u32"),
2111             AtomType::I32 => write!(f, ".i32"),
2112             AtomType::F32 => write!(f, ".f32"),
2113             AtomType::U64 => write!(f, ".u64"),
2114             AtomType::I64 => write!(f, ".i64"),
2115             AtomType::F64 => write!(f, ".f64"),
2116         }
2117     }
2118 }
2119 
2120 #[allow(dead_code)]
2121 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2122 pub enum AtomOp {
2123     Add,
2124     Min,
2125     Max,
2126     Inc,
2127     Dec,
2128     And,
2129     Or,
2130     Xor,
2131     Exch,
2132     CmpExch,
2133 }
2134 
2135 impl fmt::Display for AtomOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2136     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2137         match self {
2138             AtomOp::Add => write!(f, ".add"),
2139             AtomOp::Min => write!(f, ".min"),
2140             AtomOp::Max => write!(f, ".max"),
2141             AtomOp::Inc => write!(f, ".inc"),
2142             AtomOp::Dec => write!(f, ".dec"),
2143             AtomOp::And => write!(f, ".and"),
2144             AtomOp::Or => write!(f, ".or"),
2145             AtomOp::Xor => write!(f, ".xor"),
2146             AtomOp::Exch => write!(f, ".exch"),
2147             AtomOp::CmpExch => write!(f, ".cmpexch"),
2148         }
2149     }
2150 }
2151 
2152 #[allow(dead_code)]
2153 #[derive(Clone, Copy, Eq, PartialEq)]
2154 pub enum InterpFreq {
2155     Pass,
2156     PassMulW,
2157     Constant,
2158     State,
2159 }
2160 
2161 #[allow(dead_code)]
2162 #[derive(Clone, Copy, Eq, PartialEq)]
2163 pub enum InterpLoc {
2164     Default,
2165     Centroid,
2166     Offset,
2167 }
2168 
2169 pub struct AttrAccess {
2170     pub addr: u16,
2171     pub comps: u8,
2172     pub patch: bool,
2173     pub output: bool,
2174     pub phys: bool,
2175 }
2176 
2177 #[repr(C)]
2178 #[derive(SrcsAsSlice, DstsAsSlice)]
2179 pub struct OpFAdd {
2180     pub dst: Dst,
2181 
2182     #[src_type(F32)]
2183     pub srcs: [Src; 2],
2184 
2185     pub saturate: bool,
2186     pub rnd_mode: FRndMode,
2187     pub ftz: bool,
2188 }
2189 
2190 impl DisplayOp for OpFAdd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2191     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2192         let sat = if self.saturate { ".sat" } else { "" };
2193         write!(f, "fadd{sat}")?;
2194         if self.rnd_mode != FRndMode::NearestEven {
2195             write!(f, "{}", self.rnd_mode)?;
2196         }
2197         if self.ftz {
2198             write!(f, ".ftz")?;
2199         }
2200         write!(f, " {} {}", self.srcs[0], self.srcs[1],)
2201     }
2202 }
2203 impl_display_for_op!(OpFAdd);
2204 
2205 #[repr(C)]
2206 #[derive(SrcsAsSlice, DstsAsSlice)]
2207 pub struct OpFFma {
2208     pub dst: Dst,
2209 
2210     #[src_type(F32)]
2211     pub srcs: [Src; 3],
2212 
2213     pub saturate: bool,
2214     pub rnd_mode: FRndMode,
2215     pub ftz: bool,
2216     pub dnz: bool,
2217 }
2218 
2219 impl DisplayOp for OpFFma {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2220     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2221         let sat = if self.saturate { ".sat" } else { "" };
2222         write!(f, "ffma{sat}")?;
2223         if self.rnd_mode != FRndMode::NearestEven {
2224             write!(f, "{}", self.rnd_mode)?;
2225         }
2226         if self.dnz {
2227             write!(f, ".dnz")?;
2228         } else if self.ftz {
2229             write!(f, ".ftz")?;
2230         }
2231         write!(f, " {} {} {}", self.srcs[0], self.srcs[1], self.srcs[2])
2232     }
2233 }
2234 impl_display_for_op!(OpFFma);
2235 
2236 #[repr(C)]
2237 #[derive(SrcsAsSlice, DstsAsSlice)]
2238 pub struct OpFMnMx {
2239     pub dst: Dst,
2240 
2241     #[src_type(F32)]
2242     pub srcs: [Src; 2],
2243 
2244     #[src_type(Pred)]
2245     pub min: Src,
2246 
2247     pub ftz: bool,
2248 }
2249 
2250 impl DisplayOp for OpFMnMx {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2251     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2252         let ftz = if self.ftz { ".ftz" } else { "" };
2253         write!(
2254             f,
2255             "fmnmx{ftz} {} {} {}",
2256             self.srcs[0], self.srcs[1], self.min
2257         )
2258     }
2259 }
2260 impl_display_for_op!(OpFMnMx);
2261 
2262 #[repr(C)]
2263 #[derive(SrcsAsSlice, DstsAsSlice)]
2264 pub struct OpFMul {
2265     pub dst: Dst,
2266 
2267     #[src_type(F32)]
2268     pub srcs: [Src; 2],
2269 
2270     pub saturate: bool,
2271     pub rnd_mode: FRndMode,
2272     pub ftz: bool,
2273     pub dnz: bool,
2274 }
2275 
2276 impl DisplayOp for OpFMul {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2277     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2278         let sat = if self.saturate { ".sat" } else { "" };
2279         write!(f, "fmul{sat}")?;
2280         if self.rnd_mode != FRndMode::NearestEven {
2281             write!(f, "{}", self.rnd_mode)?;
2282         }
2283         if self.dnz {
2284             write!(f, ".dnz")?;
2285         } else if self.ftz {
2286             write!(f, ".ftz")?;
2287         }
2288         write!(f, " {} {}", self.srcs[0], self.srcs[1],)
2289     }
2290 }
2291 impl_display_for_op!(OpFMul);
2292 
2293 #[repr(C)]
2294 #[derive(SrcsAsSlice, DstsAsSlice)]
2295 pub struct OpFSet {
2296     pub dst: Dst,
2297     pub cmp_op: FloatCmpOp,
2298 
2299     #[src_type(F32)]
2300     pub srcs: [Src; 2],
2301 
2302     pub ftz: bool,
2303 }
2304 
2305 impl DisplayOp for OpFSet {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2306     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2307         let ftz = if self.ftz { ".ftz" } else { "" };
2308         write!(
2309             f,
2310             "fset{}{ftz} {} {}",
2311             self.cmp_op, self.srcs[0], self.srcs[1]
2312         )
2313     }
2314 }
2315 impl_display_for_op!(OpFSet);
2316 
2317 #[repr(C)]
2318 #[derive(SrcsAsSlice, DstsAsSlice)]
2319 pub struct OpFSetP {
2320     pub dst: Dst,
2321 
2322     pub set_op: PredSetOp,
2323     pub cmp_op: FloatCmpOp,
2324 
2325     #[src_type(F32)]
2326     pub srcs: [Src; 2],
2327 
2328     #[src_type(Pred)]
2329     pub accum: Src,
2330 
2331     pub ftz: bool,
2332 }
2333 
2334 impl DisplayOp for OpFSetP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2335     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2336         let ftz = if self.ftz { ".ftz" } else { "" };
2337         write!(f, "fsetp{}{ftz}", self.cmp_op)?;
2338         if !self.set_op.is_trivial(&self.accum) {
2339             write!(f, "{}", self.set_op)?;
2340         }
2341         write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
2342         if !self.set_op.is_trivial(&self.accum) {
2343             write!(f, " {}", self.accum)?;
2344         }
2345         Ok(())
2346     }
2347 }
2348 impl_display_for_op!(OpFSetP);
2349 
2350 #[allow(dead_code)]
2351 #[derive(Clone, Copy, Eq, PartialEq)]
2352 pub enum FSwzAddOp {
2353     Add,
2354     SubRight,
2355     SubLeft,
2356     MoveLeft,
2357 }
2358 
2359 impl fmt::Display for FSwzAddOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2360     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2361         match self {
2362             FSwzAddOp::Add => write!(f, "add"),
2363             FSwzAddOp::SubRight => write!(f, "subr"),
2364             FSwzAddOp::SubLeft => write!(f, "sub"),
2365             FSwzAddOp::MoveLeft => write!(f, "mov2"),
2366         }
2367     }
2368 }
2369 
2370 #[repr(C)]
2371 #[derive(SrcsAsSlice, DstsAsSlice)]
2372 pub struct OpFSwzAdd {
2373     pub dst: Dst,
2374 
2375     #[src_type(GPR)]
2376     pub srcs: [Src; 2],
2377 
2378     pub rnd_mode: FRndMode,
2379     pub ftz: bool,
2380 
2381     pub ops: [FSwzAddOp; 4],
2382 }
2383 
2384 impl DisplayOp for OpFSwzAdd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2385     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2386         write!(f, "fswzadd",)?;
2387         if self.rnd_mode != FRndMode::NearestEven {
2388             write!(f, "{}", self.rnd_mode)?;
2389         }
2390         if self.ftz {
2391             write!(f, ".ftz")?;
2392         }
2393         write!(
2394             f,
2395             " {} {} [{}, {}, {}, {}]",
2396             self.srcs[0],
2397             self.srcs[1],
2398             self.ops[0],
2399             self.ops[1],
2400             self.ops[2],
2401             self.ops[3],
2402         )
2403     }
2404 }
2405 impl_display_for_op!(OpFSwzAdd);
2406 
2407 pub enum RroOp {
2408     SinCos,
2409     Exp2,
2410 }
2411 
2412 impl fmt::Display for RroOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2413     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2414         match self {
2415             RroOp::SinCos => write!(f, ".sincos"),
2416             RroOp::Exp2 => write!(f, ".exp2"),
2417         }
2418     }
2419 }
2420 
2421 /// MuFu range reduction operator
2422 ///
2423 /// Not available on SM70+
2424 #[repr(C)]
2425 #[derive(SrcsAsSlice, DstsAsSlice)]
2426 pub struct OpRro {
2427     pub dst: Dst,
2428     pub op: RroOp,
2429 
2430     #[src_type(F32)]
2431     pub src: Src,
2432 }
2433 
2434 impl DisplayOp for OpRro {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2435     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2436         write!(f, "rro{} {}", self.op, self.src)
2437     }
2438 }
2439 impl_display_for_op!(OpRro);
2440 
2441 #[allow(dead_code)]
2442 #[derive(Clone, Copy, Eq, PartialEq)]
2443 pub enum MuFuOp {
2444     Cos,
2445     Sin,
2446     Exp2,
2447     Log2,
2448     Rcp,
2449     Rsq,
2450     Rcp64H,
2451     Rsq64H,
2452     Sqrt,
2453     Tanh,
2454 }
2455 
2456 impl fmt::Display for MuFuOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2457     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2458         match self {
2459             MuFuOp::Cos => write!(f, "cos"),
2460             MuFuOp::Sin => write!(f, "sin"),
2461             MuFuOp::Exp2 => write!(f, "exp2"),
2462             MuFuOp::Log2 => write!(f, "log2"),
2463             MuFuOp::Rcp => write!(f, "rcp"),
2464             MuFuOp::Rsq => write!(f, "rsq"),
2465             MuFuOp::Rcp64H => write!(f, "rcp64h"),
2466             MuFuOp::Rsq64H => write!(f, "rsq64h"),
2467             MuFuOp::Sqrt => write!(f, "sqrt"),
2468             MuFuOp::Tanh => write!(f, "tanh"),
2469         }
2470     }
2471 }
2472 
2473 #[repr(C)]
2474 #[derive(SrcsAsSlice, DstsAsSlice)]
2475 pub struct OpMuFu {
2476     pub dst: Dst,
2477     pub op: MuFuOp,
2478 
2479     #[src_type(F32)]
2480     pub src: Src,
2481 }
2482 
2483 impl DisplayOp for OpMuFu {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2484     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2485         write!(f, "mufu.{} {}", self.op, self.src)
2486     }
2487 }
2488 impl_display_for_op!(OpMuFu);
2489 
2490 #[repr(C)]
2491 #[derive(SrcsAsSlice, DstsAsSlice)]
2492 pub struct OpDAdd {
2493     pub dst: Dst,
2494 
2495     #[src_type(F64)]
2496     pub srcs: [Src; 2],
2497 
2498     pub rnd_mode: FRndMode,
2499 }
2500 
2501 impl DisplayOp for OpDAdd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2502     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2503         write!(f, "dadd")?;
2504         if self.rnd_mode != FRndMode::NearestEven {
2505             write!(f, "{}", self.rnd_mode)?;
2506         }
2507         write!(f, " {} {}", self.srcs[0], self.srcs[1],)
2508     }
2509 }
2510 impl_display_for_op!(OpDAdd);
2511 
2512 #[repr(C)]
2513 #[derive(SrcsAsSlice, DstsAsSlice)]
2514 pub struct OpDMul {
2515     pub dst: Dst,
2516 
2517     #[src_type(F64)]
2518     pub srcs: [Src; 2],
2519 
2520     pub rnd_mode: FRndMode,
2521 }
2522 
2523 impl DisplayOp for OpDMul {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2524     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2525         write!(f, "dmul")?;
2526         if self.rnd_mode != FRndMode::NearestEven {
2527             write!(f, "{}", self.rnd_mode)?;
2528         }
2529         write!(f, " {} {}", self.srcs[0], self.srcs[1],)
2530     }
2531 }
2532 impl_display_for_op!(OpDMul);
2533 
2534 #[repr(C)]
2535 #[derive(SrcsAsSlice, DstsAsSlice)]
2536 pub struct OpDFma {
2537     pub dst: Dst,
2538 
2539     #[src_type(F64)]
2540     pub srcs: [Src; 3],
2541 
2542     pub rnd_mode: FRndMode,
2543 }
2544 
2545 impl DisplayOp for OpDFma {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2546     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2547         write!(f, "dfma")?;
2548         if self.rnd_mode != FRndMode::NearestEven {
2549             write!(f, "{}", self.rnd_mode)?;
2550         }
2551         write!(f, " {} {} {}", self.srcs[0], self.srcs[1], self.srcs[2])
2552     }
2553 }
2554 impl_display_for_op!(OpDFma);
2555 
2556 #[repr(C)]
2557 #[derive(SrcsAsSlice, DstsAsSlice)]
2558 pub struct OpDMnMx {
2559     pub dst: Dst,
2560 
2561     #[src_type(F64)]
2562     pub srcs: [Src; 2],
2563 
2564     #[src_type(Pred)]
2565     pub min: Src,
2566 }
2567 
2568 impl DisplayOp for OpDMnMx {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2569     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2570         write!(f, "dmnmx {} {} {}", self.srcs[0], self.srcs[1], self.min)
2571     }
2572 }
2573 impl_display_for_op!(OpDMnMx);
2574 
2575 #[repr(C)]
2576 #[derive(SrcsAsSlice, DstsAsSlice)]
2577 pub struct OpDSetP {
2578     pub dst: Dst,
2579 
2580     pub set_op: PredSetOp,
2581     pub cmp_op: FloatCmpOp,
2582 
2583     #[src_type(F64)]
2584     pub srcs: [Src; 2],
2585 
2586     #[src_type(Pred)]
2587     pub accum: Src,
2588 }
2589 
2590 impl DisplayOp for OpDSetP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2591     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2592         write!(f, "dsetp{}", self.cmp_op)?;
2593         if !self.set_op.is_trivial(&self.accum) {
2594             write!(f, "{}", self.set_op)?;
2595         }
2596         write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
2597         if !self.set_op.is_trivial(&self.accum) {
2598             write!(f, " {}", self.accum)?;
2599         }
2600         Ok(())
2601     }
2602 }
2603 impl_display_for_op!(OpDSetP);
2604 
2605 #[repr(C)]
2606 #[derive(SrcsAsSlice, DstsAsSlice)]
2607 pub struct OpBMsk {
2608     pub dst: Dst,
2609 
2610     #[src_type(ALU)]
2611     pub pos: Src,
2612 
2613     #[src_type(ALU)]
2614     pub width: Src,
2615 
2616     pub wrap: bool,
2617 }
2618 
2619 impl DisplayOp for OpBMsk {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2620     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2621         let wrap = if self.wrap { ".wrap" } else { ".clamp" };
2622         write!(f, "bmsk{} {} {}", wrap, self.pos, self.width)
2623     }
2624 }
2625 impl_display_for_op!(OpBMsk);
2626 
2627 #[repr(C)]
2628 #[derive(SrcsAsSlice, DstsAsSlice)]
2629 pub struct OpBRev {
2630     pub dst: Dst,
2631 
2632     #[src_type(ALU)]
2633     pub src: Src,
2634 }
2635 
2636 impl DisplayOp for OpBRev {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2637     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2638         write!(f, "brev {}", self.src)
2639     }
2640 }
2641 impl_display_for_op!(OpBRev);
2642 
2643 /// Bitfield extract. Extracts all bits from `base` starting at `offset` into
2644 /// `dst`.
2645 #[repr(C)]
2646 #[derive(SrcsAsSlice, DstsAsSlice)]
2647 pub struct OpBfe {
2648     /// Where to insert the bits.
2649     pub dst: Dst,
2650 
2651     /// The source of bits to extract.
2652     #[src_type(ALU)]
2653     pub base: Src,
2654 
2655     /// The range of bits to extract. This source is interpreted as four
2656     /// separate bytes, [b0, b1, b2, b3].
2657     ///
2658     /// b0 and b1: unused
2659     /// b2: the number of bits to extract.
2660     /// b3: the offset of the first bit to extract.
2661     ///
2662     /// This matches the way the hardware works.
2663     #[src_type(ALU)]
2664     pub range: Src,
2665 
2666     /// Whether the output is signed
2667     pub signed: bool,
2668 
2669     /// Whether to reverse the bits before inserting them into `dst`.
2670     pub reverse: bool,
2671 }
2672 
2673 impl DisplayOp for OpBfe {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2674     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2675         write!(f, "bfe")?;
2676         if self.signed {
2677             write!(f, ".s")?;
2678         }
2679         if self.reverse {
2680             write!(f, ".rev")?;
2681         }
2682         write!(f, " {} {}", self.base, self.range,)
2683     }
2684 }
2685 impl_display_for_op!(OpBfe);
2686 
2687 #[repr(C)]
2688 #[derive(SrcsAsSlice, DstsAsSlice)]
2689 pub struct OpFlo {
2690     pub dst: Dst,
2691 
2692     #[src_type(ALU)]
2693     pub src: Src,
2694 
2695     pub signed: bool,
2696     pub return_shift_amount: bool,
2697 }
2698 
2699 impl DisplayOp for OpFlo {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2700     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2701         write!(f, "flo")?;
2702         if self.return_shift_amount {
2703             write!(f, ".samt")?;
2704         }
2705         write!(f, " {}", self.src)
2706     }
2707 }
2708 impl_display_for_op!(OpFlo);
2709 
2710 #[repr(C)]
2711 #[derive(SrcsAsSlice, DstsAsSlice)]
2712 pub struct OpIAbs {
2713     pub dst: Dst,
2714 
2715     #[src_type(ALU)]
2716     pub src: Src,
2717 }
2718 
2719 impl DisplayOp for OpIAbs {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2720     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2721         write!(f, "iabs {}", self.src)
2722     }
2723 }
2724 impl_display_for_op!(OpIAbs);
2725 
2726 #[repr(C)]
2727 #[derive(SrcsAsSlice, DstsAsSlice)]
2728 pub struct OpINeg {
2729     pub dst: Dst,
2730 
2731     #[src_type(ALU)]
2732     pub src: Src,
2733 }
2734 
2735 impl DisplayOp for OpINeg {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2736     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2737         write!(f, "ineg {}", self.src)
2738     }
2739 }
2740 impl_display_for_op!(OpINeg);
2741 
2742 /// Only used on SM50
2743 #[repr(C)]
2744 #[derive(SrcsAsSlice, DstsAsSlice)]
2745 pub struct OpIAdd2 {
2746     pub dst: Dst,
2747     pub carry_out: Dst,
2748 
2749     #[src_type(ALU)]
2750     pub srcs: [Src; 2],
2751     pub carry_in: Src,
2752 }
2753 
2754 impl DisplayOp for OpIAdd2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2755     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2756         write!(f, "iadd2 {} {}", self.srcs[0], self.srcs[1])?;
2757         if !self.carry_in.is_zero() {
2758             write!(f, " {}", self.carry_in)?;
2759         }
2760         Ok(())
2761     }
2762 }
2763 
2764 #[repr(C)]
2765 #[derive(SrcsAsSlice, DstsAsSlice)]
2766 pub struct OpIAdd3 {
2767     pub dst: Dst,
2768     pub overflow: [Dst; 2],
2769 
2770     #[src_type(I32)]
2771     pub srcs: [Src; 3],
2772 }
2773 
2774 impl DisplayOp for OpIAdd3 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2775     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2776         write!(
2777             f,
2778             "iadd3 {} {} {}",
2779             self.srcs[0], self.srcs[1], self.srcs[2],
2780         )
2781     }
2782 }
2783 impl_display_for_op!(OpIAdd3);
2784 
2785 #[repr(C)]
2786 #[derive(SrcsAsSlice, DstsAsSlice)]
2787 pub struct OpIAdd3X {
2788     pub dst: Dst,
2789     pub overflow: [Dst; 2],
2790 
2791     #[src_type(B32)]
2792     pub srcs: [Src; 3],
2793 
2794     #[src_type(Pred)]
2795     pub carry: [Src; 2],
2796 }
2797 
2798 impl DisplayOp for OpIAdd3X {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2799     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2800         write!(
2801             f,
2802             "iadd3.x {} {} {} {} {}",
2803             self.srcs[0],
2804             self.srcs[1],
2805             self.srcs[2],
2806             self.carry[0],
2807             self.carry[1]
2808         )
2809     }
2810 }
2811 impl_display_for_op!(OpIAdd3X);
2812 
2813 #[repr(C)]
2814 #[derive(SrcsAsSlice, DstsAsSlice)]
2815 pub struct OpIDp4 {
2816     pub dst: Dst,
2817 
2818     pub src_types: [IntType; 2],
2819 
2820     #[src_type(I32)]
2821     pub srcs: [Src; 3],
2822 }
2823 
2824 impl DisplayOp for OpIDp4 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2825     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2826         write!(
2827             f,
2828             "idp4{}{} {} {} {}",
2829             self.src_types[0],
2830             self.src_types[1],
2831             self.srcs[0],
2832             self.srcs[1],
2833             self.srcs[2],
2834         )
2835     }
2836 }
2837 impl_display_for_op!(OpIDp4);
2838 
2839 #[repr(C)]
2840 #[derive(SrcsAsSlice, DstsAsSlice)]
2841 pub struct OpIMad {
2842     pub dst: Dst,
2843 
2844     #[src_type(ALU)]
2845     pub srcs: [Src; 3],
2846 
2847     pub signed: bool,
2848 }
2849 
2850 impl DisplayOp for OpIMad {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2851     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2852         write!(f, "imad {} {} {}", self.srcs[0], self.srcs[1], self.srcs[2],)
2853     }
2854 }
2855 impl_display_for_op!(OpIMad);
2856 
2857 /// Only used on SM50
2858 #[repr(C)]
2859 #[derive(SrcsAsSlice, DstsAsSlice)]
2860 pub struct OpIMul {
2861     pub dst: Dst,
2862 
2863     #[src_type(ALU)]
2864     pub srcs: [Src; 2],
2865 
2866     pub signed: [bool; 2],
2867     pub high: bool,
2868 }
2869 
2870 impl DisplayOp for OpIMul {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2871     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2872         write!(f, "imul")?;
2873         if self.high {
2874             write!(f, ".hi")?;
2875         }
2876         let src_type = |signed| if signed { ".s32" } else { ".u32" };
2877         write!(
2878             f,
2879             "{}{}",
2880             src_type(self.signed[0]),
2881             src_type(self.signed[1])
2882         )?;
2883         write!(f, " {} {}", self.srcs[0], self.srcs[1])
2884     }
2885 }
2886 
2887 #[repr(C)]
2888 #[derive(SrcsAsSlice, DstsAsSlice)]
2889 pub struct OpIMad64 {
2890     pub dst: Dst,
2891 
2892     #[src_type(ALU)]
2893     pub srcs: [Src; 3],
2894 
2895     pub signed: bool,
2896 }
2897 
2898 impl DisplayOp for OpIMad64 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2899     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2900         write!(
2901             f,
2902             "imad64 {} {} {}",
2903             self.srcs[0], self.srcs[1], self.srcs[2],
2904         )
2905     }
2906 }
2907 impl_display_for_op!(OpIMad64);
2908 
2909 #[repr(C)]
2910 #[derive(SrcsAsSlice, DstsAsSlice)]
2911 pub struct OpIMnMx {
2912     pub dst: Dst,
2913     pub cmp_type: IntCmpType,
2914 
2915     #[src_type(ALU)]
2916     pub srcs: [Src; 2],
2917 
2918     #[src_type(Pred)]
2919     pub min: Src,
2920 }
2921 
2922 impl DisplayOp for OpIMnMx {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2923     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2924         write!(
2925             f,
2926             "imnmx{} {} {} {}",
2927             self.cmp_type, self.srcs[0], self.srcs[1], self.min
2928         )
2929     }
2930 }
2931 impl_display_for_op!(OpIMnMx);
2932 
2933 #[repr(C)]
2934 #[derive(SrcsAsSlice, DstsAsSlice)]
2935 pub struct OpISetP {
2936     pub dst: Dst,
2937 
2938     pub set_op: PredSetOp,
2939     pub cmp_op: IntCmpOp,
2940     pub cmp_type: IntCmpType,
2941     pub ex: bool,
2942 
2943     #[src_type(ALU)]
2944     pub srcs: [Src; 2],
2945 
2946     #[src_type(Pred)]
2947     pub accum: Src,
2948 
2949     #[src_type(Pred)]
2950     pub low_cmp: Src,
2951 }
2952 
2953 impl DisplayOp for OpISetP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2954     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2955         write!(f, "isetp{}{}", self.cmp_op, self.cmp_type)?;
2956         if !self.set_op.is_trivial(&self.accum) {
2957             write!(f, "{}", self.set_op)?;
2958         }
2959         if self.ex {
2960             write!(f, ".ex")?;
2961         }
2962         write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
2963         if !self.set_op.is_trivial(&self.accum) {
2964             write!(f, " {}", self.accum)?;
2965         }
2966         if self.ex {
2967             write!(f, " {}", self.low_cmp)?;
2968         }
2969         Ok(())
2970     }
2971 }
2972 impl_display_for_op!(OpISetP);
2973 
2974 #[repr(C)]
2975 #[derive(SrcsAsSlice, DstsAsSlice)]
2976 pub struct OpLop2 {
2977     pub dst: Dst,
2978 
2979     #[src_type(ALU)]
2980     pub srcs: [Src; 2],
2981 
2982     pub op: LogicOp2,
2983 }
2984 
2985 impl DisplayOp for OpLop2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2986     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2987         write!(f, "lop2.{} {} {}", self.op, self.srcs[0], self.srcs[1],)
2988     }
2989 }
2990 
2991 #[repr(C)]
2992 #[derive(SrcsAsSlice, DstsAsSlice)]
2993 pub struct OpLop3 {
2994     pub dst: Dst,
2995 
2996     #[src_type(ALU)]
2997     pub srcs: [Src; 3],
2998 
2999     pub op: LogicOp3,
3000 }
3001 
3002 impl DisplayOp for OpLop3 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3003     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3004         write!(
3005             f,
3006             "lop3.{} {} {} {}",
3007             self.op, self.srcs[0], self.srcs[1], self.srcs[2],
3008         )
3009     }
3010 }
3011 impl_display_for_op!(OpLop3);
3012 
3013 #[allow(dead_code)]
3014 #[derive(Clone, Copy, Eq, PartialEq)]
3015 pub enum ShflOp {
3016     Idx,
3017     Up,
3018     Down,
3019     Bfly,
3020 }
3021 
3022 impl fmt::Display for ShflOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3023     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3024         match self {
3025             ShflOp::Idx => write!(f, "idx"),
3026             ShflOp::Up => write!(f, "up"),
3027             ShflOp::Down => write!(f, "down"),
3028             ShflOp::Bfly => write!(f, "bfly"),
3029         }
3030     }
3031 }
3032 
3033 #[repr(C)]
3034 #[derive(SrcsAsSlice, DstsAsSlice)]
3035 pub struct OpShf {
3036     pub dst: Dst,
3037 
3038     #[src_type(GPR)]
3039     pub low: Src,
3040 
3041     #[src_type(ALU)]
3042     pub high: Src,
3043 
3044     #[src_type(GPR)]
3045     pub shift: Src,
3046 
3047     pub right: bool,
3048     pub wrap: bool,
3049     pub data_type: IntType,
3050     pub dst_high: bool,
3051 }
3052 
3053 impl DisplayOp for OpShf {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3054     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3055         write!(f, "shf")?;
3056         if self.right {
3057             write!(f, ".r")?;
3058         } else {
3059             write!(f, ".l")?;
3060         }
3061         if self.wrap {
3062             write!(f, ".w")?;
3063         }
3064         write!(f, "{}", self.data_type)?;
3065         if self.dst_high {
3066             write!(f, ".hi")?;
3067         }
3068         write!(f, " {} {} {}", self.low, self.high, self.shift)
3069     }
3070 }
3071 impl_display_for_op!(OpShf);
3072 
3073 /// Only used on SM50
3074 #[repr(C)]
3075 #[derive(SrcsAsSlice, DstsAsSlice)]
3076 pub struct OpShl {
3077     pub dst: Dst,
3078 
3079     #[src_type(GPR)]
3080     pub src: Src,
3081 
3082     #[src_type(ALU)]
3083     pub shift: Src,
3084 
3085     pub wrap: bool,
3086 }
3087 
3088 impl DisplayOp for OpShl {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3089     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3090         write!(f, "shl")?;
3091         if self.wrap {
3092             write!(f, ".w")?;
3093         }
3094         write!(f, " {} {}", self.src, self.shift)
3095     }
3096 }
3097 
3098 /// Only used on SM50
3099 #[repr(C)]
3100 #[derive(SrcsAsSlice, DstsAsSlice)]
3101 pub struct OpShr {
3102     pub dst: Dst,
3103 
3104     #[src_type(GPR)]
3105     pub src: Src,
3106 
3107     #[src_type(ALU)]
3108     pub shift: Src,
3109 
3110     pub wrap: bool,
3111     pub signed: bool,
3112 }
3113 
3114 impl DisplayOp for OpShr {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3115     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3116         write!(f, "shr")?;
3117         if self.wrap {
3118             write!(f, ".w")?;
3119         }
3120         if !self.signed {
3121             write!(f, ".u32")?;
3122         }
3123         write!(f, " {} {}", self.src, self.shift)
3124     }
3125 }
3126 
3127 #[repr(C)]
3128 #[derive(DstsAsSlice)]
3129 pub struct OpF2F {
3130     pub dst: Dst,
3131 
3132     pub src: Src,
3133 
3134     pub src_type: FloatType,
3135     pub dst_type: FloatType,
3136     pub rnd_mode: FRndMode,
3137     pub ftz: bool,
3138     /// Place the result into the upper 16 bits of the destination register
3139     pub high: bool,
3140     /// Round to the nearest integer rather than nearest float
3141     ///
3142     /// Not available on SM70+
3143     pub integer_rnd: bool,
3144 }
3145 
3146 impl SrcsAsSlice for OpF2F {
srcs_as_slice(&self) -> &[Src]3147     fn srcs_as_slice(&self) -> &[Src] {
3148         std::slice::from_ref(&self.src)
3149     }
3150 
srcs_as_mut_slice(&mut self) -> &mut [Src]3151     fn srcs_as_mut_slice(&mut self) -> &mut [Src] {
3152         std::slice::from_mut(&mut self.src)
3153     }
3154 
src_types(&self) -> SrcTypeList3155     fn src_types(&self) -> SrcTypeList {
3156         let src_type = match self.src_type {
3157             FloatType::F16 => SrcType::ALU,
3158             FloatType::F32 => SrcType::F32,
3159             FloatType::F64 => SrcType::F64,
3160         };
3161         SrcTypeList::Uniform(src_type)
3162     }
3163 }
3164 
3165 impl DisplayOp for OpF2F {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3166     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3167         write!(f, "f2f")?;
3168         if self.ftz {
3169             write!(f, ".ftz")?;
3170         }
3171         if self.integer_rnd {
3172             write!(f, ".int")?;
3173         }
3174         write!(
3175             f,
3176             "{}{}{} {}",
3177             self.dst_type, self.src_type, self.rnd_mode, self.src,
3178         )
3179     }
3180 }
3181 impl_display_for_op!(OpF2F);
3182 
3183 #[repr(C)]
3184 #[derive(DstsAsSlice)]
3185 pub struct OpF2I {
3186     pub dst: Dst,
3187 
3188     pub src: Src,
3189 
3190     pub src_type: FloatType,
3191     pub dst_type: IntType,
3192     pub rnd_mode: FRndMode,
3193     pub ftz: bool,
3194 }
3195 
3196 impl SrcsAsSlice for OpF2I {
srcs_as_slice(&self) -> &[Src]3197     fn srcs_as_slice(&self) -> &[Src] {
3198         std::slice::from_ref(&self.src)
3199     }
3200 
srcs_as_mut_slice(&mut self) -> &mut [Src]3201     fn srcs_as_mut_slice(&mut self) -> &mut [Src] {
3202         std::slice::from_mut(&mut self.src)
3203     }
3204 
src_types(&self) -> SrcTypeList3205     fn src_types(&self) -> SrcTypeList {
3206         let src_type = match self.src_type {
3207             FloatType::F16 => SrcType::ALU,
3208             FloatType::F32 => SrcType::F32,
3209             FloatType::F64 => SrcType::F64,
3210         };
3211         SrcTypeList::Uniform(src_type)
3212     }
3213 }
3214 
3215 impl DisplayOp for OpF2I {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3216     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3217         let ftz = if self.ftz { ".ftz" } else { "" };
3218         write!(
3219             f,
3220             "f2i{}{}{}{ftz} {}",
3221             self.dst_type, self.src_type, self.rnd_mode, self.src,
3222         )
3223     }
3224 }
3225 impl_display_for_op!(OpF2I);
3226 
3227 #[repr(C)]
3228 #[derive(DstsAsSlice)]
3229 pub struct OpI2F {
3230     pub dst: Dst,
3231 
3232     pub src: Src,
3233 
3234     pub dst_type: FloatType,
3235     pub src_type: IntType,
3236     pub rnd_mode: FRndMode,
3237 }
3238 
3239 impl SrcsAsSlice for OpI2F {
srcs_as_slice(&self) -> &[Src]3240     fn srcs_as_slice(&self) -> &[Src] {
3241         std::slice::from_ref(&self.src)
3242     }
3243 
srcs_as_mut_slice(&mut self) -> &mut [Src]3244     fn srcs_as_mut_slice(&mut self) -> &mut [Src] {
3245         std::slice::from_mut(&mut self.src)
3246     }
3247 
src_types(&self) -> SrcTypeList3248     fn src_types(&self) -> SrcTypeList {
3249         if self.src_type.bits() <= 32 {
3250             SrcTypeList::Uniform(SrcType::ALU)
3251         } else {
3252             SrcTypeList::Uniform(SrcType::GPR)
3253         }
3254     }
3255 }
3256 
3257 impl DisplayOp for OpI2F {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3258     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3259         write!(
3260             f,
3261             "i2f{}{}{} {}",
3262             self.dst_type, self.src_type, self.rnd_mode, self.src,
3263         )
3264     }
3265 }
3266 impl_display_for_op!(OpI2F);
3267 
3268 /// Not used on SM70+
3269 #[repr(C)]
3270 #[derive(SrcsAsSlice, DstsAsSlice)]
3271 pub struct OpI2I {
3272     pub dst: Dst,
3273 
3274     #[src_type(ALU)]
3275     pub src: Src,
3276 
3277     pub src_type: IntType,
3278     pub dst_type: IntType,
3279 
3280     pub saturate: bool,
3281     pub abs: bool,
3282     pub neg: bool,
3283 }
3284 
3285 impl DisplayOp for OpI2I {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3286     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3287         write!(f, "i2i")?;
3288         if self.saturate {
3289             write!(f, ".sat ")?;
3290         }
3291         write!(f, "{}{} {}", self.dst_type, self.src_type, self.src,)?;
3292         if self.abs {
3293             write!(f, ".abs")?;
3294         }
3295         if self.neg {
3296             write!(f, ".neg")?;
3297         }
3298         Ok(())
3299     }
3300 }
3301 impl_display_for_op!(OpI2I);
3302 
3303 #[repr(C)]
3304 #[derive(DstsAsSlice)]
3305 pub struct OpFRnd {
3306     pub dst: Dst,
3307 
3308     pub src: Src,
3309 
3310     pub dst_type: FloatType,
3311     pub src_type: FloatType,
3312     pub rnd_mode: FRndMode,
3313     pub ftz: bool,
3314 }
3315 
3316 impl SrcsAsSlice for OpFRnd {
srcs_as_slice(&self) -> &[Src]3317     fn srcs_as_slice(&self) -> &[Src] {
3318         std::slice::from_ref(&self.src)
3319     }
3320 
srcs_as_mut_slice(&mut self) -> &mut [Src]3321     fn srcs_as_mut_slice(&mut self) -> &mut [Src] {
3322         std::slice::from_mut(&mut self.src)
3323     }
3324 
src_types(&self) -> SrcTypeList3325     fn src_types(&self) -> SrcTypeList {
3326         let src_type = match self.src_type {
3327             FloatType::F16 => SrcType::ALU,
3328             FloatType::F32 => SrcType::F32,
3329             FloatType::F64 => SrcType::F64,
3330         };
3331         SrcTypeList::Uniform(src_type)
3332     }
3333 }
3334 
3335 impl DisplayOp for OpFRnd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3336     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3337         let ftz = if self.ftz { ".ftz" } else { "" };
3338         write!(
3339             f,
3340             "frnd{}{}{}{ftz} {}",
3341             self.dst_type, self.src_type, self.rnd_mode, self.src,
3342         )
3343     }
3344 }
3345 impl_display_for_op!(OpFRnd);
3346 
3347 #[repr(C)]
3348 #[derive(SrcsAsSlice, DstsAsSlice)]
3349 pub struct OpMov {
3350     pub dst: Dst,
3351 
3352     #[src_type(ALU)]
3353     pub src: Src,
3354 
3355     pub quad_lanes: u8,
3356 }
3357 
3358 impl DisplayOp for OpMov {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3359     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3360         if self.quad_lanes == 0xf {
3361             write!(f, "mov {}", self.src)
3362         } else {
3363             write!(f, "mov[{:#x}] {}", self.quad_lanes, self.src)
3364         }
3365     }
3366 }
3367 impl_display_for_op!(OpMov);
3368 
3369 #[allow(dead_code)]
3370 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
3371 pub enum PrmtMode {
3372     Index,
3373     Forward4Extract,
3374     Backward4Extract,
3375     Replicate8,
3376     EdgeClampLeft,
3377     EdgeClampRight,
3378     Replicate16,
3379 }
3380 
3381 impl fmt::Display for PrmtMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3382     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3383         match self {
3384             PrmtMode::Index => Ok(()),
3385             PrmtMode::Forward4Extract => write!(f, ".f4e"),
3386             PrmtMode::Backward4Extract => write!(f, ".b4e"),
3387             PrmtMode::Replicate8 => write!(f, ".rc8"),
3388             PrmtMode::EdgeClampLeft => write!(f, ".ecl"),
3389             PrmtMode::EdgeClampRight => write!(f, ".ecl"),
3390             PrmtMode::Replicate16 => write!(f, ".rc16"),
3391         }
3392     }
3393 }
3394 
3395 #[repr(C)]
3396 #[derive(SrcsAsSlice, DstsAsSlice)]
3397 /// Permutes `srcs` into `dst` using `selection`.
3398 pub struct OpPrmt {
3399     pub dst: Dst,
3400 
3401     #[src_type(ALU)]
3402     pub srcs: [Src; 2],
3403 
3404     #[src_type(ALU)]
3405     pub sel: Src,
3406 
3407     pub mode: PrmtMode,
3408 }
3409 
3410 impl DisplayOp for OpPrmt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3411     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3412         write!(
3413             f,
3414             "prmt{} {} [{}] {}",
3415             self.mode, self.srcs[0], self.sel, self.srcs[1],
3416         )
3417     }
3418 }
3419 impl_display_for_op!(OpPrmt);
3420 
3421 #[repr(C)]
3422 #[derive(SrcsAsSlice, DstsAsSlice)]
3423 pub struct OpSel {
3424     pub dst: Dst,
3425 
3426     #[src_type(Pred)]
3427     pub cond: Src,
3428 
3429     #[src_type(ALU)]
3430     pub srcs: [Src; 2],
3431 }
3432 
3433 impl DisplayOp for OpSel {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3434     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3435         write!(f, "sel {} {} {}", self.cond, self.srcs[0], self.srcs[1],)
3436     }
3437 }
3438 impl_display_for_op!(OpSel);
3439 
3440 #[repr(C)]
3441 #[derive(SrcsAsSlice, DstsAsSlice)]
3442 pub struct OpShfl {
3443     pub dst: Dst,
3444     pub in_bounds: Dst,
3445 
3446     #[src_type(SSA)]
3447     pub src: Src,
3448 
3449     #[src_type(ALU)]
3450     pub lane: Src,
3451 
3452     #[src_type(ALU)]
3453     pub c: Src,
3454 
3455     pub op: ShflOp,
3456 }
3457 
3458 impl DisplayOp for OpShfl {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3459     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3460         write!(f, "shfl.{} {} {} {}", self.op, self.src, self.lane, self.c)
3461     }
3462 }
3463 impl_display_for_op!(OpShfl);
3464 
3465 #[repr(C)]
3466 #[derive(SrcsAsSlice, DstsAsSlice)]
3467 pub struct OpPLop3 {
3468     pub dsts: [Dst; 2],
3469 
3470     #[src_type(Pred)]
3471     pub srcs: [Src; 3],
3472 
3473     pub ops: [LogicOp3; 2],
3474 }
3475 
3476 impl DisplayOp for OpPLop3 {
fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3477     fn fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3478         write!(f, "{} {}", self.dsts[0], self.dsts[1])
3479     }
3480 
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3481     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3482         write!(
3483             f,
3484             "plop3 {} {} {} {} {}",
3485             self.srcs[0], self.srcs[1], self.srcs[2], self.ops[0], self.ops[1],
3486         )
3487     }
3488 }
3489 impl_display_for_op!(OpPLop3);
3490 
3491 #[repr(C)]
3492 #[derive(SrcsAsSlice, DstsAsSlice)]
3493 pub struct OpPSetP {
3494     pub dsts: [Dst; 2],
3495 
3496     pub ops: [PredSetOp; 2],
3497 
3498     #[src_type(Pred)]
3499     pub srcs: [Src; 3],
3500 }
3501 
3502 impl DisplayOp for OpPSetP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3503     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3504         write!(
3505             f,
3506             "psetp{}{} {} {} {}",
3507             self.ops[0], self.ops[1], self.srcs[0], self.srcs[1], self.srcs[2],
3508         )
3509     }
3510 }
3511 
3512 #[repr(C)]
3513 #[derive(SrcsAsSlice, DstsAsSlice)]
3514 pub struct OpPopC {
3515     pub dst: Dst,
3516 
3517     #[src_type(ALU)]
3518     pub src: Src,
3519 }
3520 
3521 impl DisplayOp for OpPopC {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3522     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3523         write!(f, "popc {}", self.src,)
3524     }
3525 }
3526 impl_display_for_op!(OpPopC);
3527 
3528 #[repr(C)]
3529 #[derive(SrcsAsSlice, DstsAsSlice)]
3530 pub struct OpTex {
3531     pub dsts: [Dst; 2],
3532     pub resident: Dst,
3533 
3534     #[src_type(SSA)]
3535     pub srcs: [Src; 2],
3536 
3537     pub dim: TexDim,
3538     pub lod_mode: TexLodMode,
3539     pub z_cmpr: bool,
3540     pub offset: bool,
3541     pub mask: u8,
3542 }
3543 
3544 impl DisplayOp for OpTex {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3545     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3546         write!(f, "tex.b{}", self.dim)?;
3547         if self.lod_mode != TexLodMode::Auto {
3548             write!(f, ".{}", self.lod_mode)?;
3549         }
3550         if self.offset {
3551             write!(f, ".aoffi")?;
3552         }
3553         if self.z_cmpr {
3554             write!(f, ".dc")?;
3555         }
3556         write!(f, " {} {}", self.srcs[0], self.srcs[1])
3557     }
3558 }
3559 impl_display_for_op!(OpTex);
3560 
3561 #[repr(C)]
3562 #[derive(SrcsAsSlice, DstsAsSlice)]
3563 pub struct OpTld {
3564     pub dsts: [Dst; 2],
3565     pub resident: Dst,
3566 
3567     #[src_type(SSA)]
3568     pub srcs: [Src; 2],
3569 
3570     pub dim: TexDim,
3571     pub is_ms: bool,
3572     pub lod_mode: TexLodMode,
3573     pub offset: bool,
3574     pub mask: u8,
3575 }
3576 
3577 impl DisplayOp for OpTld {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3578     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3579         write!(f, "tld.b{}", self.dim)?;
3580         if self.lod_mode != TexLodMode::Auto {
3581             write!(f, ".{}", self.lod_mode)?;
3582         }
3583         if self.offset {
3584             write!(f, ".aoffi")?;
3585         }
3586         if self.is_ms {
3587             write!(f, ".ms")?;
3588         }
3589         write!(f, " {} {}", self.srcs[0], self.srcs[1])
3590     }
3591 }
3592 impl_display_for_op!(OpTld);
3593 
3594 #[repr(C)]
3595 #[derive(SrcsAsSlice, DstsAsSlice)]
3596 pub struct OpTld4 {
3597     pub dsts: [Dst; 2],
3598     pub resident: Dst,
3599 
3600     #[src_type(SSA)]
3601     pub srcs: [Src; 2],
3602 
3603     pub dim: TexDim,
3604     pub comp: u8,
3605     pub offset_mode: Tld4OffsetMode,
3606     pub z_cmpr: bool,
3607     pub mask: u8,
3608 }
3609 
3610 impl DisplayOp for OpTld4 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3611     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3612         write!(f, "tld4.g.b{}", self.dim)?;
3613         if self.offset_mode != Tld4OffsetMode::None {
3614             write!(f, ".{}", self.offset_mode)?;
3615         }
3616         write!(f, " {} {}", self.srcs[0], self.srcs[1])
3617     }
3618 }
3619 impl_display_for_op!(OpTld4);
3620 
3621 #[repr(C)]
3622 #[derive(SrcsAsSlice, DstsAsSlice)]
3623 pub struct OpTmml {
3624     pub dsts: [Dst; 2],
3625 
3626     #[src_type(SSA)]
3627     pub srcs: [Src; 2],
3628 
3629     pub dim: TexDim,
3630     pub mask: u8,
3631 }
3632 
3633 impl DisplayOp for OpTmml {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3634     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3635         write!(
3636             f,
3637             "tmml.b.lod{} {} {}",
3638             self.dim, self.srcs[0], self.srcs[1]
3639         )
3640     }
3641 }
3642 impl_display_for_op!(OpTmml);
3643 
3644 #[repr(C)]
3645 #[derive(SrcsAsSlice, DstsAsSlice)]
3646 pub struct OpTxd {
3647     pub dsts: [Dst; 2],
3648     pub resident: Dst,
3649 
3650     #[src_type(SSA)]
3651     pub srcs: [Src; 2],
3652 
3653     pub dim: TexDim,
3654     pub offset: bool,
3655     pub mask: u8,
3656 }
3657 
3658 impl DisplayOp for OpTxd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3659     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3660         write!(f, "txd.b{}", self.dim)?;
3661         if self.offset {
3662             write!(f, ".aoffi")?;
3663         }
3664         write!(f, " {} {}", self.srcs[0], self.srcs[1])
3665     }
3666 }
3667 impl_display_for_op!(OpTxd);
3668 
3669 #[repr(C)]
3670 #[derive(SrcsAsSlice, DstsAsSlice)]
3671 pub struct OpTxq {
3672     pub dsts: [Dst; 2],
3673 
3674     #[src_type(SSA)]
3675     pub src: Src,
3676 
3677     pub query: TexQuery,
3678     pub mask: u8,
3679 }
3680 
3681 impl DisplayOp for OpTxq {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3682     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3683         write!(f, "txq.b {} {}", self.src, self.query)
3684     }
3685 }
3686 impl_display_for_op!(OpTxq);
3687 
3688 #[repr(C)]
3689 #[derive(SrcsAsSlice, DstsAsSlice)]
3690 pub struct OpSuLd {
3691     pub dst: Dst,
3692     pub resident: Dst,
3693 
3694     pub image_dim: ImageDim,
3695     pub mem_order: MemOrder,
3696     pub mem_eviction_priority: MemEvictionPriority,
3697     pub mask: u8,
3698 
3699     #[src_type(GPR)]
3700     pub handle: Src,
3701 
3702     #[src_type(SSA)]
3703     pub coord: Src,
3704 }
3705 
3706 impl DisplayOp for OpSuLd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3707     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3708         write!(
3709             f,
3710             "suld.p{}{}{} [{}] {}",
3711             self.image_dim,
3712             self.mem_order,
3713             self.mem_eviction_priority,
3714             self.coord,
3715             self.handle,
3716         )
3717     }
3718 }
3719 impl_display_for_op!(OpSuLd);
3720 
3721 #[repr(C)]
3722 #[derive(SrcsAsSlice, DstsAsSlice)]
3723 pub struct OpSuSt {
3724     pub image_dim: ImageDim,
3725     pub mem_order: MemOrder,
3726     pub mem_eviction_priority: MemEvictionPriority,
3727     pub mask: u8,
3728 
3729     #[src_type(GPR)]
3730     pub handle: Src,
3731 
3732     #[src_type(SSA)]
3733     pub coord: Src,
3734 
3735     #[src_type(SSA)]
3736     pub data: Src,
3737 }
3738 
3739 impl DisplayOp for OpSuSt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3740     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3741         write!(
3742             f,
3743             "sust.p{}{}{} [{}] {} {}",
3744             self.image_dim,
3745             self.mem_order,
3746             self.mem_eviction_priority,
3747             self.coord,
3748             self.data,
3749             self.handle,
3750         )
3751     }
3752 }
3753 impl_display_for_op!(OpSuSt);
3754 
3755 #[repr(C)]
3756 #[derive(SrcsAsSlice, DstsAsSlice)]
3757 pub struct OpSuAtom {
3758     pub dst: Dst,
3759     pub resident: Dst,
3760 
3761     pub image_dim: ImageDim,
3762 
3763     pub atom_op: AtomOp,
3764     pub atom_type: AtomType,
3765 
3766     pub mem_order: MemOrder,
3767     pub mem_eviction_priority: MemEvictionPriority,
3768 
3769     #[src_type(GPR)]
3770     pub handle: Src,
3771 
3772     #[src_type(SSA)]
3773     pub coord: Src,
3774 
3775     #[src_type(SSA)]
3776     pub data: Src,
3777 }
3778 
3779 impl DisplayOp for OpSuAtom {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3780     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3781         write!(
3782             f,
3783             "suatom.p{}{}{}{}{} [{}] {} {}",
3784             self.image_dim,
3785             self.atom_op,
3786             self.atom_type,
3787             self.mem_order,
3788             self.mem_eviction_priority,
3789             self.coord,
3790             self.data,
3791             self.handle,
3792         )
3793     }
3794 }
3795 impl_display_for_op!(OpSuAtom);
3796 
3797 #[repr(C)]
3798 #[derive(SrcsAsSlice, DstsAsSlice)]
3799 pub struct OpLd {
3800     pub dst: Dst,
3801 
3802     #[src_type(GPR)]
3803     pub addr: Src,
3804 
3805     pub offset: i32,
3806     pub access: MemAccess,
3807 }
3808 
3809 impl DisplayOp for OpLd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3810     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3811         write!(f, "ld{} [{}", self.access, self.addr)?;
3812         if self.offset > 0 {
3813             write!(f, "+{:#x}", self.offset)?;
3814         }
3815         write!(f, "]")
3816     }
3817 }
3818 impl_display_for_op!(OpLd);
3819 
3820 #[repr(C)]
3821 #[derive(SrcsAsSlice, DstsAsSlice)]
3822 pub struct OpLdc {
3823     pub dst: Dst,
3824 
3825     #[src_type(ALU)]
3826     pub cb: Src,
3827 
3828     #[src_type(GPR)]
3829     pub offset: Src,
3830 
3831     pub mem_type: MemType,
3832 }
3833 
3834 impl DisplayOp for OpLdc {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3835     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3836         let SrcRef::CBuf(cb) = self.cb.src_ref else {
3837             panic!("Not a cbuf");
3838         };
3839         write!(f, "ldc{} {}[", self.mem_type, cb.buf)?;
3840         if self.offset.is_zero() {
3841             write!(f, "+{:#x}", cb.offset)?;
3842         } else if cb.offset == 0 {
3843             write!(f, "{}", self.offset)?;
3844         } else {
3845             write!(f, "{}+{:#x}", self.offset, cb.offset)?;
3846         }
3847         write!(f, "]")
3848     }
3849 }
3850 impl_display_for_op!(OpLdc);
3851 
3852 #[repr(C)]
3853 #[derive(SrcsAsSlice, DstsAsSlice)]
3854 pub struct OpSt {
3855     #[src_type(GPR)]
3856     pub addr: Src,
3857 
3858     #[src_type(SSA)]
3859     pub data: Src,
3860 
3861     pub offset: i32,
3862     pub access: MemAccess,
3863 }
3864 
3865 impl DisplayOp for OpSt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3866     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3867         write!(f, "st{} [{}", self.access, self.addr)?;
3868         if self.offset > 0 {
3869             write!(f, "+{:#x}", self.offset)?;
3870         }
3871         write!(f, "] {}", self.data)
3872     }
3873 }
3874 impl_display_for_op!(OpSt);
3875 
3876 #[repr(C)]
3877 #[derive(SrcsAsSlice, DstsAsSlice)]
3878 pub struct OpAtom {
3879     pub dst: Dst,
3880 
3881     #[src_type(GPR)]
3882     pub addr: Src,
3883 
3884     #[src_type(GPR)]
3885     pub cmpr: Src,
3886 
3887     #[src_type(SSA)]
3888     pub data: Src,
3889 
3890     pub atom_op: AtomOp,
3891     pub atom_type: AtomType,
3892 
3893     pub addr_offset: i32,
3894 
3895     pub mem_space: MemSpace,
3896     pub mem_order: MemOrder,
3897     pub mem_eviction_priority: MemEvictionPriority,
3898 }
3899 
3900 impl DisplayOp for OpAtom {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3901     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3902         write!(
3903             f,
3904             "atom{}{}{}{}{}",
3905             self.atom_op,
3906             self.atom_type,
3907             self.mem_space,
3908             self.mem_order,
3909             self.mem_eviction_priority,
3910         )?;
3911         write!(f, " [")?;
3912         if !self.addr.is_zero() {
3913             write!(f, "{}", self.addr)?;
3914         }
3915         if self.addr_offset > 0 {
3916             if !self.addr.is_zero() {
3917                 write!(f, "+")?;
3918             }
3919             write!(f, "{:#x}", self.addr_offset)?;
3920         }
3921         write!(f, "] {}", self.data)
3922     }
3923 }
3924 impl_display_for_op!(OpAtom);
3925 
3926 #[repr(C)]
3927 #[derive(SrcsAsSlice, DstsAsSlice)]
3928 pub struct OpAL2P {
3929     pub dst: Dst,
3930 
3931     #[src_type(GPR)]
3932     pub offset: Src,
3933 
3934     pub access: AttrAccess,
3935 }
3936 
3937 impl DisplayOp for OpAL2P {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3938     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3939         write!(f, "al2p")?;
3940         if self.access.output {
3941             write!(f, ".o")?;
3942         }
3943         if self.access.patch {
3944             write!(f, ".p")?;
3945         }
3946         write!(f, " a[{:#x}", self.access.addr)?;
3947         if !self.offset.is_zero() {
3948             write!(f, "+{}", self.offset)?;
3949         }
3950         write!(f, "]")
3951     }
3952 }
3953 impl_display_for_op!(OpAL2P);
3954 
3955 #[repr(C)]
3956 #[derive(SrcsAsSlice, DstsAsSlice)]
3957 pub struct OpALd {
3958     pub dst: Dst,
3959 
3960     #[src_type(GPR)]
3961     pub vtx: Src,
3962 
3963     #[src_type(GPR)]
3964     pub offset: Src,
3965 
3966     pub access: AttrAccess,
3967 }
3968 
3969 impl DisplayOp for OpALd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3970     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3971         write!(f, "ald")?;
3972         if self.access.output {
3973             write!(f, ".o")?;
3974         }
3975         if self.access.patch {
3976             write!(f, ".p")?;
3977         }
3978         if self.access.phys {
3979             write!(f, ".phys")?;
3980         }
3981         write!(f, " a")?;
3982         if !self.vtx.is_zero() {
3983             write!(f, "[{}]", self.vtx)?;
3984         }
3985         write!(f, "[{:#x}", self.access.addr)?;
3986         if !self.offset.is_zero() {
3987             write!(f, "+{}", self.offset)?;
3988         }
3989         write!(f, "]")
3990     }
3991 }
3992 impl_display_for_op!(OpALd);
3993 
3994 #[repr(C)]
3995 #[derive(SrcsAsSlice, DstsAsSlice)]
3996 pub struct OpASt {
3997     #[src_type(GPR)]
3998     pub vtx: Src,
3999 
4000     #[src_type(GPR)]
4001     pub offset: Src,
4002 
4003     #[src_type(SSA)]
4004     pub data: Src,
4005 
4006     pub access: AttrAccess,
4007 }
4008 
4009 impl DisplayOp for OpASt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4010     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4011         write!(f, "ast")?;
4012         if self.access.patch {
4013             write!(f, ".p")?;
4014         }
4015         if self.access.phys {
4016             write!(f, ".phys")?;
4017         }
4018         write!(f, " a")?;
4019         if !self.vtx.is_zero() {
4020             write!(f, "[{}]", self.vtx)?;
4021         }
4022         write!(f, "[{:#x}", self.access.addr)?;
4023         if !self.offset.is_zero() {
4024             write!(f, "+{}", self.offset)?;
4025         }
4026         write!(f, "] {}", self.data)
4027     }
4028 }
4029 impl_display_for_op!(OpASt);
4030 
4031 #[repr(C)]
4032 #[derive(SrcsAsSlice, DstsAsSlice)]
4033 pub struct OpIpa {
4034     pub dst: Dst,
4035     pub addr: u16,
4036     pub freq: InterpFreq,
4037     pub loc: InterpLoc,
4038     pub inv_w: Src,
4039     pub offset: Src,
4040 }
4041 
4042 impl DisplayOp for OpIpa {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4043     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4044         write!(f, "ipa")?;
4045         match self.freq {
4046             InterpFreq::Pass => write!(f, ".pass")?,
4047             InterpFreq::PassMulW => write!(f, ".pass_mul_w")?,
4048             InterpFreq::Constant => write!(f, ".constant")?,
4049             InterpFreq::State => write!(f, ".state")?,
4050         }
4051         match self.loc {
4052             InterpLoc::Default => (),
4053             InterpLoc::Centroid => write!(f, ".centroid")?,
4054             InterpLoc::Offset => write!(f, ".offset")?,
4055         }
4056 
4057         write!(f, " {} a[{:#x}] {}", self.dst, self.addr, self.inv_w)?;
4058         if self.loc == InterpLoc::Offset {
4059             write!(f, " {}", self.offset)?;
4060         }
4061         Ok(())
4062     }
4063 }
4064 impl_display_for_op!(OpIpa);
4065 
4066 #[repr(C)]
4067 #[derive(SrcsAsSlice, DstsAsSlice)]
4068 pub struct OpLdTram {
4069     pub dst: Dst,
4070     pub addr: u16,
4071     pub use_c: bool,
4072 }
4073 
4074 impl DisplayOp for OpLdTram {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4075     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4076         write!(f, "ldtram")?;
4077         if self.use_c {
4078             write!(f, ".c")?;
4079         } else {
4080             write!(f, ".ab")?;
4081         }
4082         write!(f, " a[{:#x}]", self.addr)?;
4083         Ok(())
4084     }
4085 }
4086 impl_display_for_op!(OpLdTram);
4087 
4088 #[allow(dead_code)]
4089 pub enum CCtlOp {
4090     PF1,
4091     PF2,
4092     WB,
4093     IV,
4094     IVAll,
4095     RS,
4096     IVAllP,
4097     WBAll,
4098     WBAllP,
4099 }
4100 
4101 impl CCtlOp {
is_all(&self) -> bool4102     pub fn is_all(&self) -> bool {
4103         match self {
4104             CCtlOp::PF1
4105             | CCtlOp::PF2
4106             | CCtlOp::WB
4107             | CCtlOp::IV
4108             | CCtlOp::RS => false,
4109             CCtlOp::IVAll | CCtlOp::IVAllP | CCtlOp::WBAll | CCtlOp::WBAllP => {
4110                 true
4111             }
4112         }
4113     }
4114 }
4115 
4116 impl fmt::Display for CCtlOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4117     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4118         match self {
4119             CCtlOp::PF1 => write!(f, "pf1"),
4120             CCtlOp::PF2 => write!(f, "pf2"),
4121             CCtlOp::WB => write!(f, "wb"),
4122             CCtlOp::IV => write!(f, "iv"),
4123             CCtlOp::IVAll => write!(f, "ivall"),
4124             CCtlOp::RS => write!(f, "rs"),
4125             CCtlOp::IVAllP => write!(f, "ivallp"),
4126             CCtlOp::WBAll => write!(f, "wball"),
4127             CCtlOp::WBAllP => write!(f, "wballp"),
4128         }
4129     }
4130 }
4131 
4132 #[repr(C)]
4133 #[derive(SrcsAsSlice, DstsAsSlice)]
4134 pub struct OpCCtl {
4135     pub op: CCtlOp,
4136 
4137     pub mem_space: MemSpace,
4138 
4139     #[src_type(GPR)]
4140     pub addr: Src,
4141 
4142     pub addr_offset: i32,
4143 }
4144 
4145 impl DisplayOp for OpCCtl {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4146     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4147         write!(f, "cctl{}", self.mem_space)?;
4148         if !self.op.is_all() {
4149             write!(f, " [{}", self.addr)?;
4150             if self.addr_offset > 0 {
4151                 write!(f, "+{:#x}", self.addr_offset)?;
4152             }
4153             write!(f, "]")?;
4154         }
4155         Ok(())
4156     }
4157 }
4158 impl_display_for_op!(OpCCtl);
4159 
4160 #[repr(C)]
4161 #[derive(SrcsAsSlice, DstsAsSlice)]
4162 pub struct OpMemBar {
4163     pub scope: MemScope,
4164 }
4165 
4166 impl DisplayOp for OpMemBar {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4167     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4168         write!(f, "membar.sc.{}", self.scope)
4169     }
4170 }
4171 impl_display_for_op!(OpMemBar);
4172 
4173 #[repr(C)]
4174 #[derive(SrcsAsSlice, DstsAsSlice)]
4175 pub struct OpBClear {
4176     pub dst: Dst,
4177 }
4178 
4179 impl DisplayOp for OpBClear {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4180     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4181         write!(f, "bclear")
4182     }
4183 }
4184 impl_display_for_op!(OpBClear);
4185 
4186 #[repr(C)]
4187 #[derive(SrcsAsSlice, DstsAsSlice)]
4188 pub struct OpBMov {
4189     pub dst: Dst,
4190     pub src: Src,
4191     pub clear: bool,
4192 }
4193 
4194 impl DisplayOp for OpBMov {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4195     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4196         write!(f, "bmov.32")?;
4197         if self.clear {
4198             write!(f, ".clear")?;
4199         }
4200         write!(f, " {}", self.src)
4201     }
4202 }
4203 impl_display_for_op!(OpBMov);
4204 
4205 #[repr(C)]
4206 #[derive(SrcsAsSlice, DstsAsSlice)]
4207 pub struct OpBreak {
4208     pub bar_out: Dst,
4209 
4210     #[src_type(Bar)]
4211     pub bar_in: Src,
4212 
4213     #[src_type(Pred)]
4214     pub cond: Src,
4215 }
4216 
4217 impl DisplayOp for OpBreak {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4218     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4219         write!(f, "break {} {}", self.bar_in, self.cond)
4220     }
4221 }
4222 impl_display_for_op!(OpBreak);
4223 
4224 #[repr(C)]
4225 #[derive(SrcsAsSlice, DstsAsSlice)]
4226 pub struct OpBSSy {
4227     pub bar_out: Dst,
4228 
4229     #[src_type(Pred)]
4230     pub bar_in: Src,
4231 
4232     #[src_type(Pred)]
4233     pub cond: Src,
4234 
4235     pub target: Label,
4236 }
4237 
4238 impl DisplayOp for OpBSSy {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4239     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4240         write!(f, "bssy {} {} {}", self.bar_in, self.cond, self.target)
4241     }
4242 }
4243 impl_display_for_op!(OpBSSy);
4244 
4245 #[repr(C)]
4246 #[derive(SrcsAsSlice, DstsAsSlice)]
4247 pub struct OpBSync {
4248     #[src_type(Bar)]
4249     pub bar: Src,
4250 
4251     #[src_type(Pred)]
4252     pub cond: Src,
4253 }
4254 
4255 impl DisplayOp for OpBSync {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4256     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4257         write!(f, "bsync {} {}", self.bar, self.cond)
4258     }
4259 }
4260 impl_display_for_op!(OpBSync);
4261 
4262 #[repr(C)]
4263 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
4264 pub struct OpBra {
4265     pub target: Label,
4266 }
4267 
4268 impl DisplayOp for OpBra {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4269     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4270         write!(f, "bra {}", self.target)
4271     }
4272 }
4273 impl_display_for_op!(OpBra);
4274 
4275 #[repr(C)]
4276 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
4277 pub struct OpExit {}
4278 
4279 impl DisplayOp for OpExit {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4280     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4281         write!(f, "exit")
4282     }
4283 }
4284 impl_display_for_op!(OpExit);
4285 
4286 #[repr(C)]
4287 #[derive(SrcsAsSlice, DstsAsSlice)]
4288 pub struct OpWarpSync {
4289     pub mask: u32,
4290 }
4291 
4292 impl DisplayOp for OpWarpSync {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4293     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4294         write!(f, "warpsync 0x{:x}", self.mask)
4295     }
4296 }
4297 impl_display_for_op!(OpWarpSync);
4298 
4299 #[repr(C)]
4300 #[derive(SrcsAsSlice, DstsAsSlice)]
4301 pub struct OpBar {}
4302 
4303 impl DisplayOp for OpBar {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4304     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4305         write!(f, "bar.sync")
4306     }
4307 }
4308 impl_display_for_op!(OpBar);
4309 
4310 #[repr(C)]
4311 #[derive(SrcsAsSlice, DstsAsSlice)]
4312 pub struct OpCS2R {
4313     pub dst: Dst,
4314     pub idx: u8,
4315 }
4316 
4317 impl DisplayOp for OpCS2R {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4318     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4319         write!(f, "cs2r sr[{:#x}]", self.idx)
4320     }
4321 }
4322 impl_display_for_op!(OpCS2R);
4323 
4324 #[repr(C)]
4325 #[derive(SrcsAsSlice, DstsAsSlice)]
4326 pub struct OpIsberd {
4327     pub dst: Dst,
4328 
4329     #[src_type(SSA)]
4330     pub idx: Src,
4331 }
4332 
4333 impl DisplayOp for OpIsberd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4334     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4335         write!(f, "isberd {} [{}]", self.dst, self.idx)
4336     }
4337 }
4338 impl_display_for_op!(OpIsberd);
4339 
4340 #[repr(C)]
4341 #[derive(SrcsAsSlice, DstsAsSlice)]
4342 pub struct OpKill {}
4343 
4344 impl DisplayOp for OpKill {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4345     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4346         write!(f, "kill")
4347     }
4348 }
4349 impl_display_for_op!(OpKill);
4350 
4351 #[repr(C)]
4352 #[derive(SrcsAsSlice, DstsAsSlice)]
4353 pub struct OpNop {
4354     pub label: Option<Label>,
4355 }
4356 
4357 impl DisplayOp for OpNop {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4358     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4359         write!(f, "nop")?;
4360         if let Some(label) = &self.label {
4361             write!(f, " {}", label)?;
4362         }
4363         Ok(())
4364     }
4365 }
4366 impl_display_for_op!(OpNop);
4367 
4368 #[allow(dead_code)]
4369 pub enum PixVal {
4370     MsCount,
4371     CovMask,
4372     CentroidOffset,
4373     MyIndex,
4374     InnerCoverage,
4375 }
4376 
4377 #[repr(C)]
4378 #[derive(SrcsAsSlice, DstsAsSlice)]
4379 pub struct OpPixLd {
4380     pub dst: Dst,
4381     pub val: PixVal,
4382 }
4383 
4384 impl DisplayOp for OpPixLd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4385     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4386         write!(f, "pixld")?;
4387         match self.val {
4388             PixVal::MsCount => write!(f, ".mscount"),
4389             PixVal::CovMask => write!(f, ".covmask"),
4390             PixVal::CentroidOffset => write!(f, ".centroid_offset"),
4391             PixVal::MyIndex => write!(f, ".my_index"),
4392             PixVal::InnerCoverage => write!(f, ".inner_coverage"),
4393         }
4394     }
4395 }
4396 impl_display_for_op!(OpPixLd);
4397 
4398 #[repr(C)]
4399 #[derive(SrcsAsSlice, DstsAsSlice)]
4400 pub struct OpS2R {
4401     pub dst: Dst,
4402     pub idx: u8,
4403 }
4404 
4405 impl DisplayOp for OpS2R {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4406     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4407         write!(f, "s2r sr[{:#x}]", self.idx)
4408     }
4409 }
4410 impl_display_for_op!(OpS2R);
4411 
4412 pub enum VoteOp {
4413     Any,
4414     All,
4415     Eq,
4416 }
4417 
4418 impl fmt::Display for VoteOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4419     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4420         match self {
4421             VoteOp::Any => write!(f, "any"),
4422             VoteOp::All => write!(f, "all"),
4423             VoteOp::Eq => write!(f, "eq"),
4424         }
4425     }
4426 }
4427 
4428 #[repr(C)]
4429 #[derive(SrcsAsSlice, DstsAsSlice)]
4430 pub struct OpVote {
4431     pub op: VoteOp,
4432 
4433     pub ballot: Dst,
4434     pub vote: Dst,
4435 
4436     #[src_type(Pred)]
4437     pub pred: Src,
4438 }
4439 
4440 impl DisplayOp for OpVote {
fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4441     fn fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4442         if self.ballot.is_none() && self.vote.is_none() {
4443             write!(f, "none")
4444         } else {
4445             if !self.ballot.is_none() {
4446                 write!(f, "{}", self.ballot)?;
4447             }
4448             if !self.vote.is_none() {
4449                 write!(f, "{}", self.vote)?;
4450             }
4451             Ok(())
4452         }
4453     }
4454 
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4455     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4456         write!(f, "vote.{} {}", self.op, self.pred)
4457     }
4458 }
4459 impl_display_for_op!(OpVote);
4460 
4461 #[repr(C)]
4462 #[derive(SrcsAsSlice, DstsAsSlice)]
4463 pub struct OpUndef {
4464     pub dst: Dst,
4465 }
4466 
4467 impl DisplayOp for OpUndef {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4468     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4469         write!(f, "undef {}", self.dst)
4470     }
4471 }
4472 impl_display_for_op!(OpUndef);
4473 
4474 #[repr(C)]
4475 #[derive(SrcsAsSlice, DstsAsSlice)]
4476 pub struct OpSrcBar {
4477     pub src: Src,
4478 }
4479 
4480 impl DisplayOp for OpSrcBar {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4481     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4482         write!(f, "src_bar {}", self.src)
4483     }
4484 }
4485 impl_display_for_op!(OpSrcBar);
4486 
4487 pub struct VecPair<A, B> {
4488     a: Vec<A>,
4489     b: Vec<B>,
4490 }
4491 
4492 impl<A, B> VecPair<A, B> {
append(&mut self, other: &mut VecPair<A, B>)4493     pub fn append(&mut self, other: &mut VecPair<A, B>) {
4494         self.a.append(&mut other.a);
4495         self.b.append(&mut other.b);
4496     }
4497 
is_empty(&self) -> bool4498     pub fn is_empty(&self) -> bool {
4499         debug_assert!(self.a.len() == self.b.len());
4500         self.a.is_empty()
4501     }
4502 
iter(&self) -> Zip<slice::Iter<'_, A>, slice::Iter<'_, B>>4503     pub fn iter(&self) -> Zip<slice::Iter<'_, A>, slice::Iter<'_, B>> {
4504         debug_assert!(self.a.len() == self.b.len());
4505         self.a.iter().zip(self.b.iter())
4506     }
4507 
iter_mut( &mut self, ) -> Zip<slice::IterMut<'_, A>, slice::IterMut<'_, B>>4508     pub fn iter_mut(
4509         &mut self,
4510     ) -> Zip<slice::IterMut<'_, A>, slice::IterMut<'_, B>> {
4511         debug_assert!(self.a.len() == self.b.len());
4512         self.a.iter_mut().zip(self.b.iter_mut())
4513     }
4514 
len(&self) -> usize4515     pub fn len(&self) -> usize {
4516         debug_assert!(self.a.len() == self.b.len());
4517         self.a.len()
4518     }
4519 
new() -> Self4520     pub fn new() -> Self {
4521         Self {
4522             a: Vec::new(),
4523             b: Vec::new(),
4524         }
4525     }
4526 
push(&mut self, a: A, b: B)4527     pub fn push(&mut self, a: A, b: B) {
4528         debug_assert!(self.a.len() == self.b.len());
4529         self.a.push(a);
4530         self.b.push(b);
4531     }
4532 }
4533 
4534 impl<A: Clone, B: Clone> VecPair<A, B> {
retain(&mut self, mut f: impl FnMut(&A, &B) -> bool)4535     pub fn retain(&mut self, mut f: impl FnMut(&A, &B) -> bool) {
4536         debug_assert!(self.a.len() == self.b.len());
4537         let len = self.a.len();
4538         let mut i = 0_usize;
4539         while i < len {
4540             if !f(&self.a[i], &self.b[i]) {
4541                 break;
4542             }
4543             i += 1;
4544         }
4545 
4546         let mut new_len = i;
4547 
4548         // Don't check this one twice.
4549         i += 1;
4550 
4551         while i < len {
4552             // This could be more efficient but it's good enough for our
4553             // purposes since everything we're storing is small and has a
4554             // trivial Drop.
4555             if f(&self.a[i], &self.b[i]) {
4556                 self.a[new_len] = self.a[i].clone();
4557                 self.b[new_len] = self.b[i].clone();
4558                 new_len += 1;
4559             }
4560             i += 1;
4561         }
4562 
4563         if new_len < len {
4564             self.a.truncate(new_len);
4565             self.b.truncate(new_len);
4566         }
4567     }
4568 }
4569 
4570 pub struct PhiAllocator {
4571     count: u32,
4572 }
4573 
4574 impl PhiAllocator {
new() -> PhiAllocator4575     pub fn new() -> PhiAllocator {
4576         PhiAllocator { count: 0 }
4577     }
4578 
alloc(&mut self) -> u324579     pub fn alloc(&mut self) -> u32 {
4580         let idx = self.count;
4581         self.count = idx + 1;
4582         idx
4583     }
4584 }
4585 
4586 #[repr(C)]
4587 #[derive(DstsAsSlice)]
4588 pub struct OpPhiSrcs {
4589     pub srcs: VecPair<u32, Src>,
4590 }
4591 
4592 impl OpPhiSrcs {
new() -> OpPhiSrcs4593     pub fn new() -> OpPhiSrcs {
4594         OpPhiSrcs {
4595             srcs: VecPair::new(),
4596         }
4597     }
4598 }
4599 
4600 impl SrcsAsSlice for OpPhiSrcs {
srcs_as_slice(&self) -> &[Src]4601     fn srcs_as_slice(&self) -> &[Src] {
4602         &self.srcs.b
4603     }
4604 
srcs_as_mut_slice(&mut self) -> &mut [Src]4605     fn srcs_as_mut_slice(&mut self) -> &mut [Src] {
4606         &mut self.srcs.b
4607     }
4608 
src_types(&self) -> SrcTypeList4609     fn src_types(&self) -> SrcTypeList {
4610         SrcTypeList::Uniform(SrcType::GPR)
4611     }
4612 }
4613 
4614 impl DisplayOp for OpPhiSrcs {
fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result4615     fn fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
4616         Ok(())
4617     }
4618 
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4619     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4620         write!(f, "phi_src ")?;
4621         for (i, (id, src)) in self.srcs.iter().enumerate() {
4622             if i > 0 {
4623                 write!(f, ", ")?;
4624             }
4625             write!(f, "φ{} = {}", id, src)?;
4626         }
4627         Ok(())
4628     }
4629 }
4630 impl_display_for_op!(OpPhiSrcs);
4631 
4632 #[repr(C)]
4633 #[derive(SrcsAsSlice)]
4634 pub struct OpPhiDsts {
4635     pub dsts: VecPair<u32, Dst>,
4636 }
4637 
4638 impl OpPhiDsts {
new() -> OpPhiDsts4639     pub fn new() -> OpPhiDsts {
4640         OpPhiDsts {
4641             dsts: VecPair::new(),
4642         }
4643     }
4644 }
4645 
4646 impl DstsAsSlice for OpPhiDsts {
dsts_as_slice(&self) -> &[Dst]4647     fn dsts_as_slice(&self) -> &[Dst] {
4648         &self.dsts.b
4649     }
4650 
dsts_as_mut_slice(&mut self) -> &mut [Dst]4651     fn dsts_as_mut_slice(&mut self) -> &mut [Dst] {
4652         &mut self.dsts.b
4653     }
4654 }
4655 
4656 impl DisplayOp for OpPhiDsts {
fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result4657     fn fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
4658         Ok(())
4659     }
4660 
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4661     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4662         write!(f, "phi_dst ")?;
4663         for (i, (id, dst)) in self.dsts.iter().enumerate() {
4664             if i > 0 {
4665                 write!(f, ", ")?;
4666             }
4667             write!(f, "{} = φ{}", dst, id)?;
4668         }
4669         Ok(())
4670     }
4671 }
4672 impl_display_for_op!(OpPhiDsts);
4673 
4674 #[repr(C)]
4675 #[derive(SrcsAsSlice, DstsAsSlice)]
4676 pub struct OpCopy {
4677     pub dst: Dst,
4678     pub src: Src,
4679 }
4680 
4681 impl DisplayOp for OpCopy {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4682     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4683         write!(f, "copy {}", self.src)
4684     }
4685 }
4686 impl_display_for_op!(OpCopy);
4687 
4688 #[repr(C)]
4689 #[derive(SrcsAsSlice, DstsAsSlice)]
4690 pub struct OpSwap {
4691     pub dsts: [Dst; 2],
4692     pub srcs: [Src; 2],
4693 }
4694 
4695 impl DisplayOp for OpSwap {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4696     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4697         write!(f, "swap {} {}", self.srcs[0], self.srcs[1])
4698     }
4699 }
4700 impl_display_for_op!(OpSwap);
4701 
4702 #[repr(C)]
4703 pub struct OpParCopy {
4704     pub dsts_srcs: VecPair<Dst, Src>,
4705     pub tmp: Option<RegRef>,
4706 }
4707 
4708 impl OpParCopy {
new() -> OpParCopy4709     pub fn new() -> OpParCopy {
4710         OpParCopy {
4711             dsts_srcs: VecPair::new(),
4712             tmp: None,
4713         }
4714     }
4715 
is_empty(&self) -> bool4716     pub fn is_empty(&self) -> bool {
4717         self.dsts_srcs.is_empty()
4718     }
4719 
push(&mut self, dst: Dst, src: Src)4720     pub fn push(&mut self, dst: Dst, src: Src) {
4721         self.dsts_srcs.push(dst, src);
4722     }
4723 }
4724 
4725 impl SrcsAsSlice for OpParCopy {
srcs_as_slice(&self) -> &[Src]4726     fn srcs_as_slice(&self) -> &[Src] {
4727         &self.dsts_srcs.b
4728     }
4729 
srcs_as_mut_slice(&mut self) -> &mut [Src]4730     fn srcs_as_mut_slice(&mut self) -> &mut [Src] {
4731         &mut self.dsts_srcs.b
4732     }
4733 
src_types(&self) -> SrcTypeList4734     fn src_types(&self) -> SrcTypeList {
4735         SrcTypeList::Uniform(SrcType::GPR)
4736     }
4737 }
4738 
4739 impl DstsAsSlice for OpParCopy {
dsts_as_slice(&self) -> &[Dst]4740     fn dsts_as_slice(&self) -> &[Dst] {
4741         &self.dsts_srcs.a
4742     }
4743 
dsts_as_mut_slice(&mut self) -> &mut [Dst]4744     fn dsts_as_mut_slice(&mut self) -> &mut [Dst] {
4745         &mut self.dsts_srcs.a
4746     }
4747 }
4748 
4749 impl DisplayOp for OpParCopy {
fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result4750     fn fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
4751         Ok(())
4752     }
4753 
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4754     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4755         write!(f, "par_copy")?;
4756         for (i, (dst, src)) in self.dsts_srcs.iter().enumerate() {
4757             if i > 0 {
4758                 write!(f, ",")?;
4759             }
4760             write!(f, " {} = {}", dst, src)?;
4761         }
4762         Ok(())
4763     }
4764 }
4765 impl_display_for_op!(OpParCopy);
4766 
4767 #[repr(C)]
4768 #[derive(DstsAsSlice)]
4769 pub struct OpFSOut {
4770     pub srcs: Vec<Src>,
4771 }
4772 
4773 impl SrcsAsSlice for OpFSOut {
srcs_as_slice(&self) -> &[Src]4774     fn srcs_as_slice(&self) -> &[Src] {
4775         &self.srcs
4776     }
4777 
srcs_as_mut_slice(&mut self) -> &mut [Src]4778     fn srcs_as_mut_slice(&mut self) -> &mut [Src] {
4779         &mut self.srcs
4780     }
4781 
src_types(&self) -> SrcTypeList4782     fn src_types(&self) -> SrcTypeList {
4783         SrcTypeList::Uniform(SrcType::GPR)
4784     }
4785 }
4786 
4787 impl DisplayOp for OpFSOut {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4788     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4789         write!(f, "fs_out {{")?;
4790         for (i, src) in self.srcs.iter().enumerate() {
4791             if i > 0 {
4792                 write!(f, ",")?;
4793             }
4794             write!(f, " {}", src)?;
4795         }
4796         write!(f, " }}")
4797     }
4798 }
4799 impl_display_for_op!(OpFSOut);
4800 
4801 #[derive(Copy, Clone, Debug, PartialEq)]
4802 pub enum OutType {
4803     Emit,
4804     Cut,
4805     EmitThenCut,
4806 }
4807 
4808 impl fmt::Display for OutType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4809     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4810         match self {
4811             OutType::Emit => write!(f, "emit"),
4812             OutType::Cut => write!(f, "cut"),
4813             OutType::EmitThenCut => write!(f, "emit_then_cut"),
4814         }
4815     }
4816 }
4817 
4818 #[repr(C)]
4819 #[derive(SrcsAsSlice, DstsAsSlice)]
4820 pub struct OpOut {
4821     pub dst: Dst,
4822 
4823     #[src_type(SSA)]
4824     pub handle: Src,
4825 
4826     #[src_type(ALU)]
4827     pub stream: Src,
4828 
4829     pub out_type: OutType,
4830 }
4831 
4832 impl DisplayOp for OpOut {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4833     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4834         write!(f, "out.{} {} {}", self.out_type, self.handle, self.stream)
4835     }
4836 }
4837 impl_display_for_op!(OpOut);
4838 
4839 #[repr(C)]
4840 #[derive(SrcsAsSlice, DstsAsSlice)]
4841 pub struct OpOutFinal {
4842     #[src_type(SSA)]
4843     pub handle: Src,
4844 }
4845 
4846 impl DisplayOp for OpOutFinal {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4847     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4848         write!(f, "out.final {{ {} }}", self.handle)
4849     }
4850 }
4851 impl_display_for_op!(OpOutFinal);
4852 
4853 /// Describes an annotation on an instruction.
4854 #[repr(C)]
4855 #[derive(SrcsAsSlice, DstsAsSlice)]
4856 pub struct OpAnnotate {
4857     /// The annotation
4858     pub annotation: String,
4859 }
4860 
4861 impl DisplayOp for OpAnnotate {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4862     fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4863         write!(f, "// {}", self.annotation)
4864     }
4865 }
4866 
4867 impl fmt::Display for OpAnnotate {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4868     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4869         self.fmt_op(f)
4870     }
4871 }
4872 
4873 #[derive(DisplayOp, DstsAsSlice, SrcsAsSlice, FromVariants)]
4874 pub enum Op {
4875     FAdd(OpFAdd),
4876     FFma(OpFFma),
4877     FMnMx(OpFMnMx),
4878     FMul(OpFMul),
4879     Rro(OpRro),
4880     MuFu(OpMuFu),
4881     FSet(OpFSet),
4882     FSetP(OpFSetP),
4883     FSwzAdd(OpFSwzAdd),
4884     DAdd(OpDAdd),
4885     DFma(OpDFma),
4886     DMnMx(OpDMnMx),
4887     DMul(OpDMul),
4888     DSetP(OpDSetP),
4889     BMsk(OpBMsk),
4890     BRev(OpBRev),
4891     Bfe(OpBfe),
4892     Flo(OpFlo),
4893     IAbs(OpIAbs),
4894     INeg(OpINeg),
4895     IAdd2(OpIAdd2),
4896     IAdd3(OpIAdd3),
4897     IAdd3X(OpIAdd3X),
4898     IDp4(OpIDp4),
4899     IMad(OpIMad),
4900     IMad64(OpIMad64),
4901     IMul(OpIMul),
4902     IMnMx(OpIMnMx),
4903     ISetP(OpISetP),
4904     Lop2(OpLop2),
4905     Lop3(OpLop3),
4906     PopC(OpPopC),
4907     Shf(OpShf),
4908     Shl(OpShl),
4909     Shr(OpShr),
4910     F2F(OpF2F),
4911     F2I(OpF2I),
4912     I2F(OpI2F),
4913     I2I(OpI2I),
4914     FRnd(OpFRnd),
4915     Mov(OpMov),
4916     Prmt(OpPrmt),
4917     Sel(OpSel),
4918     Shfl(OpShfl),
4919     PLop3(OpPLop3),
4920     PSetP(OpPSetP),
4921     Tex(OpTex),
4922     Tld(OpTld),
4923     Tld4(OpTld4),
4924     Tmml(OpTmml),
4925     Txd(OpTxd),
4926     Txq(OpTxq),
4927     SuLd(OpSuLd),
4928     SuSt(OpSuSt),
4929     SuAtom(OpSuAtom),
4930     Ld(OpLd),
4931     Ldc(OpLdc),
4932     St(OpSt),
4933     Atom(OpAtom),
4934     AL2P(OpAL2P),
4935     ALd(OpALd),
4936     ASt(OpASt),
4937     Ipa(OpIpa),
4938     LdTram(OpLdTram),
4939     CCtl(OpCCtl),
4940     MemBar(OpMemBar),
4941     BClear(OpBClear),
4942     BMov(OpBMov),
4943     Break(OpBreak),
4944     BSSy(OpBSSy),
4945     BSync(OpBSync),
4946     Bra(OpBra),
4947     Exit(OpExit),
4948     WarpSync(OpWarpSync),
4949     Bar(OpBar),
4950     CS2R(OpCS2R),
4951     Isberd(OpIsberd),
4952     Kill(OpKill),
4953     Nop(OpNop),
4954     PixLd(OpPixLd),
4955     S2R(OpS2R),
4956     Vote(OpVote),
4957     Undef(OpUndef),
4958     SrcBar(OpSrcBar),
4959     PhiSrcs(OpPhiSrcs),
4960     PhiDsts(OpPhiDsts),
4961     Copy(OpCopy),
4962     Swap(OpSwap),
4963     ParCopy(OpParCopy),
4964     FSOut(OpFSOut),
4965     Out(OpOut),
4966     OutFinal(OpOutFinal),
4967     Annotate(OpAnnotate),
4968 }
4969 impl_display_for_op!(Op);
4970 
4971 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
4972 pub enum PredRef {
4973     None,
4974     SSA(SSAValue),
4975     Reg(RegRef),
4976 }
4977 
4978 impl PredRef {
4979     #[allow(dead_code)]
as_reg(&self) -> Option<&RegRef>4980     pub fn as_reg(&self) -> Option<&RegRef> {
4981         match self {
4982             PredRef::Reg(r) => Some(r),
4983             _ => None,
4984         }
4985     }
4986 
4987     #[allow(dead_code)]
as_ssa(&self) -> Option<&SSAValue>4988     pub fn as_ssa(&self) -> Option<&SSAValue> {
4989         match self {
4990             PredRef::SSA(r) => Some(r),
4991             _ => None,
4992         }
4993     }
4994 
is_none(&self) -> bool4995     pub fn is_none(&self) -> bool {
4996         matches!(self, PredRef::None)
4997     }
4998 
iter_ssa(&self) -> slice::Iter<'_, SSAValue>4999     pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
5000         match self {
5001             PredRef::None | PredRef::Reg(_) => &[],
5002             PredRef::SSA(ssa) => slice::from_ref(ssa),
5003         }
5004         .iter()
5005     }
5006 
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>5007     pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
5008         match self {
5009             PredRef::None | PredRef::Reg(_) => &mut [],
5010             PredRef::SSA(ssa) => slice::from_mut(ssa),
5011         }
5012         .iter_mut()
5013     }
5014 }
5015 
5016 impl From<RegRef> for PredRef {
from(reg: RegRef) -> PredRef5017     fn from(reg: RegRef) -> PredRef {
5018         PredRef::Reg(reg)
5019     }
5020 }
5021 
5022 impl From<SSAValue> for PredRef {
from(ssa: SSAValue) -> PredRef5023     fn from(ssa: SSAValue) -> PredRef {
5024         PredRef::SSA(ssa)
5025     }
5026 }
5027 
5028 impl fmt::Display for PredRef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5029     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5030         match self {
5031             PredRef::None => write!(f, "pT"),
5032             PredRef::SSA(ssa) => ssa.fmt_plain(f),
5033             PredRef::Reg(reg) => reg.fmt(f),
5034         }
5035     }
5036 }
5037 
5038 #[derive(Clone, Copy)]
5039 pub struct Pred {
5040     pub pred_ref: PredRef,
5041     pub pred_inv: bool,
5042 }
5043 
5044 impl Pred {
is_true(&self) -> bool5045     pub fn is_true(&self) -> bool {
5046         self.pred_ref.is_none() && !self.pred_inv
5047     }
5048 
is_false(&self) -> bool5049     pub fn is_false(&self) -> bool {
5050         self.pred_ref.is_none() && self.pred_inv
5051     }
5052 
iter_ssa(&self) -> slice::Iter<'_, SSAValue>5053     pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
5054         self.pred_ref.iter_ssa()
5055     }
5056 
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>5057     pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
5058         self.pred_ref.iter_ssa_mut()
5059     }
5060 }
5061 
5062 impl<T: Into<PredRef>> From<T> for Pred {
from(p: T) -> Self5063     fn from(p: T) -> Self {
5064         Pred {
5065             pred_ref: p.into(),
5066             pred_inv: false,
5067         }
5068     }
5069 }
5070 
5071 impl fmt::Display for Pred {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5072     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5073         if self.pred_inv {
5074             write!(f, "!")?;
5075         }
5076         self.pred_ref.fmt(f)
5077     }
5078 }
5079 
5080 pub const MIN_INSTR_DELAY: u8 = 1;
5081 pub const MAX_INSTR_DELAY: u8 = 15;
5082 
5083 pub struct InstrDeps {
5084     pub delay: u8,
5085     pub yld: bool,
5086     wr_bar: i8,
5087     rd_bar: i8,
5088     pub wt_bar_mask: u8,
5089     pub reuse_mask: u8,
5090 }
5091 
5092 impl InstrDeps {
new() -> InstrDeps5093     pub fn new() -> InstrDeps {
5094         InstrDeps {
5095             delay: 0,
5096             yld: false,
5097             wr_bar: -1,
5098             rd_bar: -1,
5099             wt_bar_mask: 0,
5100             reuse_mask: 0,
5101         }
5102     }
5103 
rd_bar(&self) -> Option<u8>5104     pub fn rd_bar(&self) -> Option<u8> {
5105         if self.rd_bar < 0 {
5106             None
5107         } else {
5108             Some(self.rd_bar.try_into().unwrap())
5109         }
5110     }
5111 
wr_bar(&self) -> Option<u8>5112     pub fn wr_bar(&self) -> Option<u8> {
5113         if self.wr_bar < 0 {
5114             None
5115         } else {
5116             Some(self.wr_bar.try_into().unwrap())
5117         }
5118     }
5119 
set_delay(&mut self, delay: u8)5120     pub fn set_delay(&mut self, delay: u8) {
5121         assert!(delay <= MAX_INSTR_DELAY);
5122         self.delay = delay;
5123     }
5124 
set_yield(&mut self, yld: bool)5125     pub fn set_yield(&mut self, yld: bool) {
5126         self.yld = yld;
5127     }
5128 
set_rd_bar(&mut self, idx: u8)5129     pub fn set_rd_bar(&mut self, idx: u8) {
5130         assert!(idx < 6);
5131         self.rd_bar = idx.try_into().unwrap();
5132     }
5133 
set_wr_bar(&mut self, idx: u8)5134     pub fn set_wr_bar(&mut self, idx: u8) {
5135         assert!(idx < 6);
5136         self.wr_bar = idx.try_into().unwrap();
5137     }
5138 
add_wt_bar(&mut self, idx: u8)5139     pub fn add_wt_bar(&mut self, idx: u8) {
5140         self.add_wt_bar_mask(1 << idx);
5141     }
5142 
add_wt_bar_mask(&mut self, bar_mask: u8)5143     pub fn add_wt_bar_mask(&mut self, bar_mask: u8) {
5144         assert!(bar_mask < 1 << 6);
5145         self.wt_bar_mask |= bar_mask;
5146     }
5147 
5148     #[allow(dead_code)]
add_reuse(&mut self, idx: u8)5149     pub fn add_reuse(&mut self, idx: u8) {
5150         assert!(idx < 6);
5151         self.reuse_mask |= 1_u8 << idx;
5152     }
5153 }
5154 
5155 impl fmt::Display for InstrDeps {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5156     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5157         if self.delay > 0 {
5158             write!(f, " delay={}", self.delay)?;
5159         }
5160         if self.wt_bar_mask != 0 {
5161             write!(f, " wt={:06b}", self.wt_bar_mask)?;
5162         }
5163         if self.rd_bar >= 0 {
5164             write!(f, " rd:{}", self.rd_bar)?;
5165         }
5166         if self.wr_bar >= 0 {
5167             write!(f, " wr:{}", self.wr_bar)?;
5168         }
5169         if self.reuse_mask != 0 {
5170             write!(f, " reuse={:06b}", self.reuse_mask)?;
5171         }
5172         if self.yld {
5173             write!(f, " yld")?;
5174         }
5175         Ok(())
5176     }
5177 }
5178 
5179 pub struct Instr {
5180     pub pred: Pred,
5181     pub op: Op,
5182     pub deps: InstrDeps,
5183 }
5184 
5185 impl Instr {
new(op: impl Into<Op>) -> Instr5186     pub fn new(op: impl Into<Op>) -> Instr {
5187         Instr {
5188             op: op.into(),
5189             pred: PredRef::None.into(),
5190             deps: InstrDeps::new(),
5191         }
5192     }
5193 
new_boxed(op: impl Into<Op>) -> Box<Self>5194     pub fn new_boxed(op: impl Into<Op>) -> Box<Self> {
5195         Box::new(Instr::new(op))
5196     }
5197 
dsts(&self) -> &[Dst]5198     pub fn dsts(&self) -> &[Dst] {
5199         self.op.dsts_as_slice()
5200     }
5201 
dsts_mut(&mut self) -> &mut [Dst]5202     pub fn dsts_mut(&mut self) -> &mut [Dst] {
5203         self.op.dsts_as_mut_slice()
5204     }
5205 
srcs(&self) -> &[Src]5206     pub fn srcs(&self) -> &[Src] {
5207         self.op.srcs_as_slice()
5208     }
5209 
srcs_mut(&mut self) -> &mut [Src]5210     pub fn srcs_mut(&mut self) -> &mut [Src] {
5211         self.op.srcs_as_mut_slice()
5212     }
5213 
src_types(&self) -> SrcTypeList5214     pub fn src_types(&self) -> SrcTypeList {
5215         self.op.src_types()
5216     }
5217 
for_each_ssa_use(&self, mut f: impl FnMut(&SSAValue))5218     pub fn for_each_ssa_use(&self, mut f: impl FnMut(&SSAValue)) {
5219         for ssa in self.pred.iter_ssa() {
5220             f(ssa);
5221         }
5222         for src in self.srcs() {
5223             for ssa in src.iter_ssa() {
5224                 f(ssa);
5225             }
5226         }
5227     }
5228 
for_each_ssa_use_mut(&mut self, mut f: impl FnMut(&mut SSAValue))5229     pub fn for_each_ssa_use_mut(&mut self, mut f: impl FnMut(&mut SSAValue)) {
5230         for ssa in self.pred.iter_ssa_mut() {
5231             f(ssa);
5232         }
5233         for src in self.srcs_mut() {
5234             for ssa in src.iter_ssa_mut() {
5235                 f(ssa);
5236             }
5237         }
5238     }
5239 
for_each_ssa_def(&self, mut f: impl FnMut(&SSAValue))5240     pub fn for_each_ssa_def(&self, mut f: impl FnMut(&SSAValue)) {
5241         for dst in self.dsts() {
5242             for ssa in dst.iter_ssa() {
5243                 f(ssa);
5244             }
5245         }
5246     }
5247 
for_each_ssa_def_mut(&mut self, mut f: impl FnMut(&mut SSAValue))5248     pub fn for_each_ssa_def_mut(&mut self, mut f: impl FnMut(&mut SSAValue)) {
5249         for dst in self.dsts_mut() {
5250             for ssa in dst.iter_ssa_mut() {
5251                 f(ssa);
5252             }
5253         }
5254     }
5255 
is_branch(&self) -> bool5256     pub fn is_branch(&self) -> bool {
5257         matches!(self.op, Op::Bra(_) | Op::Exit(_))
5258     }
5259 
is_barrier(&self) -> bool5260     pub fn is_barrier(&self) -> bool {
5261         matches!(self.op, Op::Bar(_))
5262     }
5263 
uses_global_mem(&self) -> bool5264     pub fn uses_global_mem(&self) -> bool {
5265         match &self.op {
5266             Op::Atom(op) => op.mem_space != MemSpace::Local,
5267             Op::Ld(op) => op.access.space != MemSpace::Local,
5268             Op::St(op) => op.access.space != MemSpace::Local,
5269             Op::SuAtom(_) | Op::SuLd(_) | Op::SuSt(_) => true,
5270             _ => false,
5271         }
5272     }
5273 
writes_global_mem(&self) -> bool5274     pub fn writes_global_mem(&self) -> bool {
5275         match &self.op {
5276             Op::Atom(op) => matches!(op.mem_space, MemSpace::Global(_)),
5277             Op::St(op) => matches!(op.access.space, MemSpace::Global(_)),
5278             Op::SuAtom(_) | Op::SuSt(_) => true,
5279             _ => false,
5280         }
5281     }
5282 
can_eliminate(&self) -> bool5283     pub fn can_eliminate(&self) -> bool {
5284         match &self.op {
5285             Op::ASt(_)
5286             | Op::SuSt(_)
5287             | Op::SuAtom(_)
5288             | Op::St(_)
5289             | Op::Atom(_)
5290             | Op::CCtl(_)
5291             | Op::MemBar(_)
5292             | Op::Kill(_)
5293             | Op::Nop(_)
5294             | Op::BSync(_)
5295             | Op::Bra(_)
5296             | Op::Exit(_)
5297             | Op::WarpSync(_)
5298             | Op::Bar(_)
5299             | Op::FSOut(_)
5300             | Op::Out(_)
5301             | Op::OutFinal(_)
5302             | Op::Annotate(_) => false,
5303             Op::BMov(op) => !op.clear,
5304             _ => true,
5305         }
5306     }
5307 
has_fixed_latency(&self, _sm: u8) -> bool5308     pub fn has_fixed_latency(&self, _sm: u8) -> bool {
5309         match &self.op {
5310             // Float ALU
5311             Op::FAdd(_)
5312             | Op::FFma(_)
5313             | Op::FMnMx(_)
5314             | Op::FMul(_)
5315             | Op::FSet(_)
5316             | Op::FSetP(_)
5317             | Op::FSwzAdd(_) => true,
5318 
5319             // Multi-function unit is variable latency
5320             Op::Rro(_) | Op::MuFu(_) => false,
5321 
5322             // Double-precision float ALU
5323             Op::DAdd(_)
5324             | Op::DFma(_)
5325             | Op::DMnMx(_)
5326             | Op::DMul(_)
5327             | Op::DSetP(_) => false,
5328 
5329             // Integer ALU
5330             Op::BRev(_) | Op::Flo(_) | Op::PopC(_) => false,
5331             Op::BMsk(_)
5332             | Op::IAbs(_)
5333             | Op::INeg(_)
5334             | Op::IAdd2(_)
5335             | Op::IAdd3(_)
5336             | Op::IAdd3X(_)
5337             | Op::IDp4(_)
5338             | Op::IMad(_)
5339             | Op::IMad64(_)
5340             | Op::IMul(_)
5341             | Op::IMnMx(_)
5342             | Op::ISetP(_)
5343             | Op::Lop2(_)
5344             | Op::Lop3(_)
5345             | Op::Shf(_)
5346             | Op::Shl(_)
5347             | Op::Shr(_)
5348             | Op::Bfe(_) => true,
5349 
5350             // Conversions are variable latency?!?
5351             Op::F2F(_) | Op::F2I(_) | Op::I2F(_) | Op::I2I(_) | Op::FRnd(_) => {
5352                 false
5353             }
5354 
5355             // Move ops
5356             Op::Mov(_) | Op::Prmt(_) | Op::Sel(_) => true,
5357             Op::Shfl(_) => false,
5358 
5359             // Predicate ops
5360             Op::PLop3(_) | Op::PSetP(_) => true,
5361 
5362             // Texture ops
5363             Op::Tex(_)
5364             | Op::Tld(_)
5365             | Op::Tld4(_)
5366             | Op::Tmml(_)
5367             | Op::Txd(_)
5368             | Op::Txq(_) => false,
5369 
5370             // Surface ops
5371             Op::SuLd(_) | Op::SuSt(_) | Op::SuAtom(_) => false,
5372 
5373             // Memory ops
5374             Op::Ld(_)
5375             | Op::Ldc(_)
5376             | Op::St(_)
5377             | Op::Atom(_)
5378             | Op::AL2P(_)
5379             | Op::ALd(_)
5380             | Op::ASt(_)
5381             | Op::Ipa(_)
5382             | Op::CCtl(_)
5383             | Op::LdTram(_)
5384             | Op::MemBar(_) => false,
5385 
5386             // Control-flow ops
5387             Op::BClear(_) | Op::Break(_) | Op::BSSy(_) | Op::BSync(_) => true,
5388             Op::Bra(_) | Op::Exit(_) => true,
5389             Op::WarpSync(_) => false,
5390 
5391             // BMOV: barriers only when using gprs (and only valid for the gpr),
5392             // no barriers for the others.
5393             Op::BMov(op) => match &op.dst {
5394                 Dst::None => true,
5395                 Dst::SSA(vec) => vec.file() == RegFile::Bar,
5396                 Dst::Reg(reg) => reg.file() == RegFile::Bar,
5397             },
5398 
5399             // Geometry ops
5400             Op::Out(_) | Op::OutFinal(_) => false,
5401 
5402             // Miscellaneous ops
5403             Op::Bar(_)
5404             | Op::CS2R(_)
5405             | Op::Isberd(_)
5406             | Op::Kill(_)
5407             | Op::PixLd(_)
5408             | Op::S2R(_) => false,
5409             Op::Nop(_) | Op::Vote(_) => true,
5410 
5411             // Virtual ops
5412             Op::Undef(_)
5413             | Op::SrcBar(_)
5414             | Op::PhiSrcs(_)
5415             | Op::PhiDsts(_)
5416             | Op::Copy(_)
5417             | Op::Swap(_)
5418             | Op::ParCopy(_)
5419             | Op::FSOut(_)
5420             | Op::Annotate(_) => {
5421                 panic!("Not a hardware opcode")
5422             }
5423         }
5424     }
5425 
5426     /// Minimum latency before another instruction can execute
get_exec_latency(&self, sm: u8) -> u325427     pub fn get_exec_latency(&self, sm: u8) -> u32 {
5428         match &self.op {
5429             Op::Bar(_) | Op::MemBar(_) => {
5430                 if sm >= 80 {
5431                     6
5432                 } else {
5433                     5
5434                 }
5435             }
5436             Op::CCtl(_op) => {
5437                 // CCTL.C needs 8, CCTL.I needs 11
5438                 11
5439             }
5440             // Op::DepBar(_) => 4,
5441             _ => 1, // TODO: co-issue
5442         }
5443     }
5444 
get_dst_latency(&self, sm: u8, dst_idx: usize) -> u325445     pub fn get_dst_latency(&self, sm: u8, dst_idx: usize) -> u32 {
5446         debug_assert!(self.has_fixed_latency(sm));
5447         let file = match self.dsts()[dst_idx] {
5448             Dst::None => return 0,
5449             Dst::SSA(vec) => vec.file(),
5450             Dst::Reg(reg) => reg.file(),
5451         };
5452         if file.is_predicate() {
5453             13
5454         } else {
5455             6
5456         }
5457     }
5458 
needs_yield(&self) -> bool5459     pub fn needs_yield(&self) -> bool {
5460         matches!(&self.op, Op::Bar(_) | Op::BSync(_))
5461     }
5462 
fmt_pred(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5463     fn fmt_pred(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5464         if !self.pred.is_true() {
5465             write!(f, "@{} ", self.pred)?;
5466         }
5467         Ok(())
5468     }
5469 }
5470 
5471 impl fmt::Display for Instr {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5472     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5473         write!(f, "{} {}{}", Fmt(|f| self.fmt_pred(f)), self.op, self.deps)
5474     }
5475 }
5476 
5477 impl<T: Into<Op>> From<T> for Instr {
from(value: T) -> Self5478     fn from(value: T) -> Self {
5479         Self::new(value)
5480     }
5481 }
5482 
5483 /// The result of map() done on a Box<Instr>. A Vec is only allocated if the
5484 /// mapping results in multiple instructions. This helps to reduce the amount of
5485 /// Vec's allocated in the optimization passes.
5486 pub enum MappedInstrs {
5487     None,
5488     One(Box<Instr>),
5489     Many(Vec<Box<Instr>>),
5490 }
5491 
5492 impl MappedInstrs {
push(&mut self, i: Box<Instr>)5493     pub fn push(&mut self, i: Box<Instr>) {
5494         match self {
5495             MappedInstrs::None => {
5496                 *self = MappedInstrs::One(i);
5497             }
5498             MappedInstrs::One(_) => {
5499                 *self = match std::mem::replace(self, MappedInstrs::None) {
5500                     MappedInstrs::One(o) => MappedInstrs::Many(vec![o, i]),
5501                     _ => panic!("Not a One"),
5502                 };
5503             }
5504             MappedInstrs::Many(v) => {
5505                 v.push(i);
5506             }
5507         }
5508     }
5509 
last_mut(&mut self) -> Option<&mut Box<Instr>>5510     pub fn last_mut(&mut self) -> Option<&mut Box<Instr>> {
5511         match self {
5512             MappedInstrs::None => None,
5513             MappedInstrs::One(instr) => Some(instr),
5514             MappedInstrs::Many(v) => v.last_mut(),
5515         }
5516     }
5517 }
5518 
5519 pub struct BasicBlock {
5520     pub label: Label,
5521     pub instrs: Vec<Box<Instr>>,
5522 }
5523 
5524 impl BasicBlock {
new(label: Label) -> BasicBlock5525     pub fn new(label: Label) -> BasicBlock {
5526         BasicBlock {
5527             label: label,
5528             instrs: Vec::new(),
5529         }
5530     }
5531 
map_instrs_priv( &mut self, map: &mut impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs, ssa_alloc: &mut SSAValueAllocator, )5532     fn map_instrs_priv(
5533         &mut self,
5534         map: &mut impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs,
5535         ssa_alloc: &mut SSAValueAllocator,
5536     ) {
5537         let mut instrs = Vec::new();
5538         for i in self.instrs.drain(..) {
5539             match map(i, ssa_alloc) {
5540                 MappedInstrs::None => (),
5541                 MappedInstrs::One(i) => {
5542                     instrs.push(i);
5543                 }
5544                 MappedInstrs::Many(mut v) => {
5545                     instrs.append(&mut v);
5546                 }
5547             }
5548         }
5549         self.instrs = instrs;
5550     }
5551 
phi_dsts(&self) -> Option<&OpPhiDsts>5552     pub fn phi_dsts(&self) -> Option<&OpPhiDsts> {
5553         if let Op::PhiDsts(phi) = &self.instrs.first()?.op {
5554             return Some(phi);
5555         }
5556         None
5557     }
5558 
5559     #[allow(dead_code)]
phi_dsts_mut(&mut self) -> Option<&mut OpPhiDsts>5560     pub fn phi_dsts_mut(&mut self) -> Option<&mut OpPhiDsts> {
5561         if let Op::PhiDsts(phi) = &mut self.instrs.first_mut()?.op {
5562             return Some(phi);
5563         }
5564         None
5565     }
5566 
phi_srcs(&self) -> Option<&OpPhiSrcs>5567     pub fn phi_srcs(&self) -> Option<&OpPhiSrcs> {
5568         for instr in self.instrs.iter().rev() {
5569             if instr.is_branch() {
5570                 continue;
5571             }
5572 
5573             match &instr.op {
5574                 Op::PhiSrcs(phi) => return Some(phi),
5575                 _ => break,
5576             }
5577         }
5578         None
5579     }
5580 
phi_srcs_mut(&mut self) -> Option<&mut OpPhiSrcs>5581     pub fn phi_srcs_mut(&mut self) -> Option<&mut OpPhiSrcs> {
5582         for instr in self.instrs.iter_mut().rev() {
5583             if instr.is_branch() {
5584                 continue;
5585             }
5586 
5587             match &mut instr.op {
5588                 Op::PhiSrcs(phi) => return Some(phi),
5589                 _ => break,
5590             }
5591         }
5592         None
5593     }
5594 
branch(&self) -> Option<&Instr>5595     pub fn branch(&self) -> Option<&Instr> {
5596         if let Some(i) = self.instrs.last() {
5597             if i.is_branch() {
5598                 Some(i)
5599             } else {
5600                 None
5601             }
5602         } else {
5603             None
5604         }
5605     }
5606 
5607     #[allow(dead_code)]
branch_mut(&mut self) -> Option<&mut Instr>5608     pub fn branch_mut(&mut self) -> Option<&mut Instr> {
5609         if let Some(i) = self.instrs.last_mut() {
5610             if i.is_branch() {
5611                 Some(i)
5612             } else {
5613                 None
5614             }
5615         } else {
5616             None
5617         }
5618     }
5619 
falls_through(&self) -> bool5620     pub fn falls_through(&self) -> bool {
5621         if let Some(i) = self.branch() {
5622             !i.pred.is_true()
5623         } else {
5624             true
5625         }
5626     }
5627 }
5628 
5629 pub struct Function {
5630     pub ssa_alloc: SSAValueAllocator,
5631     pub phi_alloc: PhiAllocator,
5632     pub blocks: CFG<BasicBlock>,
5633 }
5634 
5635 impl Function {
map_instrs_priv( &mut self, map: &mut impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs, )5636     fn map_instrs_priv(
5637         &mut self,
5638         map: &mut impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs,
5639     ) {
5640         for b in &mut self.blocks {
5641             b.map_instrs_priv(map, &mut self.ssa_alloc);
5642         }
5643     }
5644 
map_instrs( &mut self, mut map: impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs, )5645     pub fn map_instrs(
5646         &mut self,
5647         mut map: impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs,
5648     ) {
5649         self.map_instrs_priv(&mut map);
5650     }
5651 }
5652 
5653 impl fmt::Display for Function {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5654     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5655         let mut pred_width = 0;
5656         let mut dsts_width = 0;
5657         let mut op_width = 0;
5658 
5659         let mut blocks = Vec::new();
5660         for b in &self.blocks {
5661             let mut instrs = Vec::new();
5662             for i in &b.instrs {
5663                 let mut pred = String::new();
5664                 write!(pred, "{}", Fmt(|f| i.fmt_pred(f)))?;
5665                 let mut dsts = String::new();
5666                 write!(dsts, "{}", Fmt(|f| i.op.fmt_dsts(f)))?;
5667                 let mut op = String::new();
5668                 write!(op, "{}", Fmt(|f| i.op.fmt_op(f)))?;
5669                 let mut deps = String::new();
5670                 write!(deps, "{}", i.deps)?;
5671 
5672                 pred_width = max(pred_width, pred.len());
5673                 dsts_width = max(dsts_width, dsts.len());
5674                 op_width = max(op_width, op.len());
5675                 let is_annotation = matches!(i.op, Op::Annotate(_));
5676 
5677                 instrs.push((pred, dsts, op, deps, is_annotation));
5678             }
5679             blocks.push(instrs);
5680         }
5681 
5682         for (i, mut b) in blocks.drain(..).enumerate() {
5683             write!(f, "block {} {} [", i, self.blocks[i].label)?;
5684             for (pi, p) in self.blocks.pred_indices(i).iter().enumerate() {
5685                 if pi > 0 {
5686                     write!(f, ", ")?;
5687                 }
5688                 write!(f, "{}", p)?;
5689             }
5690             write!(f, "] -> {{\n")?;
5691 
5692             for (pred, dsts, op, deps, is_annotation) in b.drain(..) {
5693                 let eq_sym = if dsts.is_empty() { " " } else { "=" };
5694                 if is_annotation {
5695                     write!(f, "\n{}\n", op)?;
5696                 } else if deps.is_empty() {
5697                     write!(
5698                         f,
5699                         "{:<pred_width$} {:<dsts_width$} {} {}\n",
5700                         pred, dsts, eq_sym, op,
5701                     )?;
5702                 } else {
5703                     write!(
5704                         f,
5705                         "{:<pred_width$} {:<dsts_width$} {} \
5706                          {:<op_width$} //{}\n",
5707                         pred, dsts, eq_sym, op, deps,
5708                     )?;
5709                 }
5710             }
5711 
5712             write!(f, "}} -> [")?;
5713             for (si, s) in self.blocks.succ_indices(i).iter().enumerate() {
5714                 if si > 0 {
5715                     write!(f, ", ")?;
5716                 }
5717                 write!(f, "{}", s)?;
5718             }
5719             write!(f, "]\n")?;
5720         }
5721         Ok(())
5722     }
5723 }
5724 
5725 #[derive(Debug)]
5726 pub struct ComputeShaderInfo {
5727     pub local_size: [u16; 3],
5728     pub smem_size: u16,
5729 }
5730 
5731 #[derive(Debug)]
5732 pub struct GeometryShaderInfo {
5733     pub passthrough_enable: bool,
5734     pub stream_out_mask: u8,
5735     pub threads_per_input_primitive: u8,
5736     pub output_topology: OutputTopology,
5737     pub max_output_vertex_count: u16,
5738 }
5739 
5740 impl Default for GeometryShaderInfo {
default() -> Self5741     fn default() -> Self {
5742         Self {
5743             passthrough_enable: false,
5744             stream_out_mask: 0,
5745             threads_per_input_primitive: 0,
5746             output_topology: OutputTopology::LineStrip,
5747             max_output_vertex_count: 0,
5748         }
5749     }
5750 }
5751 
5752 #[derive(Debug)]
5753 pub struct TessellationInitShaderInfo {
5754     pub per_patch_attribute_count: u8,
5755     pub threads_per_patch: u8,
5756 }
5757 
5758 #[derive(Debug)]
5759 pub enum ShaderStageInfo {
5760     Compute(ComputeShaderInfo),
5761     Vertex,
5762     Fragment,
5763     Geometry(GeometryShaderInfo),
5764     TessellationInit(TessellationInitShaderInfo),
5765     Tessellation,
5766 }
5767 
5768 #[derive(Debug, Default)]
5769 pub struct SysValInfo {
5770     pub ab: u32,
5771     pub c: u16,
5772 }
5773 
5774 #[derive(Debug)]
5775 pub struct VtgIoInfo {
5776     pub sysvals_in: SysValInfo,
5777     pub sysvals_in_d: u8,
5778     pub sysvals_out: SysValInfo,
5779     pub sysvals_out_d: u8,
5780     pub attr_in: [u32; 4],
5781     pub attr_out: [u32; 4],
5782     pub store_req_start: u8,
5783     pub store_req_end: u8,
5784 }
5785 
5786 impl VtgIoInfo {
mark_attrs(&mut self, addrs: Range<u16>, written: bool)5787     fn mark_attrs(&mut self, addrs: Range<u16>, written: bool) {
5788         let sysvals = if written {
5789             &mut self.sysvals_out
5790         } else {
5791             &mut self.sysvals_in
5792         };
5793 
5794         let sysvals_d = if written {
5795             &mut self.sysvals_out_d
5796         } else {
5797             &mut self.sysvals_in_d
5798         };
5799 
5800         let mut attr = BitMutView::new(if written {
5801             &mut self.attr_out
5802         } else {
5803             &mut self.attr_in
5804         });
5805 
5806         let mut addrs = addrs;
5807         addrs.start &= !3;
5808         for addr in addrs.step_by(4) {
5809             if addr < 0x080 {
5810                 sysvals.ab |= 1 << (addr / 4);
5811             } else if addr < 0x280 {
5812                 let attr_idx = (addr - 0x080) as usize / 4;
5813                 attr.set_bit(attr_idx, true);
5814             } else if addr < 0x2c0 {
5815                 panic!("FF color I/O not supported");
5816             } else if addr < 0x300 {
5817                 sysvals.c |= 1 << ((addr - 0x2c0) / 4);
5818             } else if addr >= 0x3a0 && addr < 0x3c0 {
5819                 *sysvals_d |= 1 << ((addr - 0x3a0) / 4);
5820             }
5821         }
5822     }
5823 
mark_attrs_read(&mut self, addrs: Range<u16>)5824     pub fn mark_attrs_read(&mut self, addrs: Range<u16>) {
5825         self.mark_attrs(addrs, false);
5826     }
5827 
mark_attrs_written(&mut self, addrs: Range<u16>)5828     pub fn mark_attrs_written(&mut self, addrs: Range<u16>) {
5829         self.mark_attrs(addrs, true);
5830     }
5831 
mark_store_req(&mut self, addrs: Range<u16>)5832     pub fn mark_store_req(&mut self, addrs: Range<u16>) {
5833         let start = (addrs.start / 4).try_into().unwrap();
5834         let end = ((addrs.end - 1) / 4).try_into().unwrap();
5835         self.store_req_start = min(self.store_req_start, start);
5836         self.store_req_end = max(self.store_req_end, end);
5837     }
5838 }
5839 
5840 #[derive(Debug)]
5841 pub struct FragmentIoInfo {
5842     pub sysvals_in: SysValInfo,
5843     pub sysvals_in_d: [PixelImap; 8],
5844     pub attr_in: [PixelImap; 128],
5845     pub barycentric_attr_in: [u32; 4],
5846 
5847     pub reads_sample_mask: bool,
5848     pub uses_kill: bool,
5849     pub writes_color: u32,
5850     pub writes_sample_mask: bool,
5851     pub writes_depth: bool,
5852     pub does_interlock: bool,
5853 }
5854 
5855 impl FragmentIoInfo {
mark_attr_read(&mut self, addr: u16, interp: PixelImap)5856     pub fn mark_attr_read(&mut self, addr: u16, interp: PixelImap) {
5857         if addr < 0x080 {
5858             self.sysvals_in.ab |= 1 << (addr / 4);
5859         } else if addr < 0x280 {
5860             let attr_idx = (addr - 0x080) as usize / 4;
5861             self.attr_in[attr_idx] = interp;
5862         } else if addr < 0x2c0 {
5863             panic!("FF color I/O not supported");
5864         } else if addr < 0x300 {
5865             self.sysvals_in.c |= 1 << ((addr - 0x2c0) / 4);
5866         } else if addr >= 0x3a0 && addr < 0x3c0 {
5867             let attr_idx = (addr - 0x3a0) as usize / 4;
5868             self.sysvals_in_d[attr_idx] = interp;
5869         }
5870     }
5871 
mark_barycentric_attr_in(&mut self, addr: u16)5872     pub fn mark_barycentric_attr_in(&mut self, addr: u16) {
5873         assert!(addr >= 0x80 && addr < 0x280);
5874 
5875         let mut attr = BitMutView::new(&mut self.barycentric_attr_in);
5876 
5877         let attr_idx = (addr - 0x080) as usize / 4;
5878         attr.set_bit(attr_idx, true);
5879     }
5880 }
5881 
5882 #[derive(Debug)]
5883 pub enum ShaderIoInfo {
5884     None,
5885     Vtg(VtgIoInfo),
5886     Fragment(FragmentIoInfo),
5887 }
5888 
5889 #[derive(Debug)]
5890 pub struct ShaderInfo {
5891     pub sm: u8,
5892     pub num_gprs: u8,
5893     pub num_barriers: u8,
5894     pub slm_size: u32,
5895     pub uses_global_mem: bool,
5896     pub writes_global_mem: bool,
5897     pub uses_fp64: bool,
5898     pub stage: ShaderStageInfo,
5899     pub io: ShaderIoInfo,
5900 }
5901 
5902 pub struct Shader {
5903     pub info: ShaderInfo,
5904     pub functions: Vec<Function>,
5905 }
5906 
5907 impl Shader {
for_each_instr(&self, f: &mut impl FnMut(&Instr))5908     pub fn for_each_instr(&self, f: &mut impl FnMut(&Instr)) {
5909         for func in &self.functions {
5910             for b in &func.blocks {
5911                 for i in &b.instrs {
5912                     f(i);
5913                 }
5914             }
5915         }
5916     }
5917 
map_instrs( &mut self, mut map: impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs, )5918     pub fn map_instrs(
5919         &mut self,
5920         mut map: impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs,
5921     ) {
5922         for f in &mut self.functions {
5923             f.map_instrs_priv(&mut map);
5924         }
5925     }
5926 
5927     /// Remove all annotations, presumably before encoding the shader.
remove_annotations(&mut self)5928     pub fn remove_annotations(&mut self) {
5929         self.map_instrs(|instr: Box<Instr>, _| -> MappedInstrs {
5930             if matches!(instr.op, Op::Annotate(_)) {
5931                 MappedInstrs::None
5932             } else {
5933                 MappedInstrs::One(instr)
5934             }
5935         })
5936     }
5937 
lower_ineg(&mut self)5938     pub fn lower_ineg(&mut self) {
5939         let sm = self.info.sm;
5940         self.map_instrs(|mut instr: Box<Instr>, _| -> MappedInstrs {
5941             match instr.op {
5942                 Op::INeg(neg) => {
5943                     if sm >= 70 {
5944                         instr.op = Op::IAdd3(OpIAdd3 {
5945                             dst: neg.dst,
5946                             overflow: [Dst::None; 2],
5947                             srcs: [0.into(), neg.src.ineg(), 0.into()],
5948                         });
5949                     } else {
5950                         instr.op = Op::IAdd2(OpIAdd2 {
5951                             dst: neg.dst,
5952                             srcs: [0.into(), neg.src.ineg()],
5953                             carry_in: 0.into(),
5954                             carry_out: Dst::None,
5955                         });
5956                     }
5957                     MappedInstrs::One(instr)
5958                 }
5959                 _ => MappedInstrs::One(instr),
5960             }
5961         })
5962     }
5963 
gather_global_mem_usage(&mut self)5964     pub fn gather_global_mem_usage(&mut self) {
5965         if let ShaderStageInfo::Compute(_) = self.info.stage {
5966             return;
5967         }
5968 
5969         let mut uses_global_mem = false;
5970         let mut writes_global_mem = false;
5971 
5972         self.for_each_instr(&mut |instr| {
5973             if !uses_global_mem {
5974                 uses_global_mem = instr.uses_global_mem();
5975             }
5976 
5977             if !writes_global_mem {
5978                 writes_global_mem = instr.writes_global_mem();
5979             }
5980         });
5981 
5982         self.info.uses_global_mem = uses_global_mem;
5983         self.info.writes_global_mem = writes_global_mem;
5984     }
5985 }
5986 
5987 impl fmt::Display for Shader {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5988     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5989         for func in &self.functions {
5990             write!(f, "{}", func)?;
5991         }
5992         Ok(())
5993     }
5994 }
5995