1 // Copyright © 2022 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3
4 extern crate bitview;
5 extern crate nak_ir_proc;
6
7 use bitview::BitMutView;
8
9 use crate::api::{GetDebugFlags, DEBUG};
10 pub use crate::builder::{Builder, InstrBuilder, SSABuilder, SSAInstrBuilder};
11 use crate::cfg::CFG;
12 use crate::sph::{OutputTopology, PixelImap};
13 use nak_ir_proc::*;
14 use std::cmp::{max, min};
15 use std::fmt;
16 use std::fmt::Write;
17 use std::iter::Zip;
18 use std::ops::{BitAnd, BitOr, Deref, DerefMut, Index, IndexMut, Not, Range};
19 use std::slice;
20
21 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
22 pub struct Label {
23 idx: u32,
24 }
25
26 impl fmt::Display for Label {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result27 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
28 write!(f, "L{}", self.idx)
29 }
30 }
31
32 pub struct LabelAllocator {
33 count: u32,
34 }
35
36 impl LabelAllocator {
new() -> LabelAllocator37 pub fn new() -> LabelAllocator {
38 LabelAllocator { count: 0 }
39 }
40
alloc(&mut self) -> Label41 pub fn alloc(&mut self) -> Label {
42 let idx = self.count;
43 self.count += 1;
44 Label { idx: idx }
45 }
46 }
47
48 /// Represents a register file
49 #[repr(u8)]
50 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
51 pub enum RegFile {
52 /// The general-purpose register file
53 ///
54 /// General-purpose registers are 32 bits per SIMT channel.
55 GPR = 0,
56
57 /// The general-purpose uniform register file
58 ///
59 /// General-purpose uniform registers are 32 bits each and uniform across a
60 /// wave.
61 UGPR = 1,
62
63 /// The predicate reigster file
64 ///
65 /// Predicate registers are 1 bit per SIMT channel.
66 Pred = 2,
67
68 /// The uniform predicate reigster file
69 ///
70 /// Uniform predicate registers are 1 bit and uniform across a wave.
71 UPred = 3,
72
73 /// The carry flag register file
74 ///
75 /// Only one carry flag register exists in hardware, but representing it as
76 /// a reg file simplifies dependency tracking.
77 ///
78 /// This is used only on SM50.
79 Carry = 4,
80
81 /// The barrier register file
82 ///
83 /// This is a lane mask used for wave re-convergence instructions.
84 Bar = 5,
85
86 /// The memory register file
87 ///
88 /// This is a virtual register file for things which will get spilled to
89 /// local memory. Each memory location is 32 bits per SIMT channel.
90 Mem = 6,
91 }
92
93 const NUM_REG_FILES: usize = 7;
94
95 impl RegFile {
96 /// Returns true if the register file is uniform across a wave
is_uniform(&self) -> bool97 pub fn is_uniform(&self) -> bool {
98 match self {
99 RegFile::GPR
100 | RegFile::Pred
101 | RegFile::Carry
102 | RegFile::Bar
103 | RegFile::Mem => false,
104 RegFile::UGPR | RegFile::UPred => true,
105 }
106 }
107
108 /// Returns true if the register file is general-purpose
is_gpr(&self) -> bool109 pub fn is_gpr(&self) -> bool {
110 match self {
111 RegFile::GPR | RegFile::UGPR => true,
112 RegFile::Pred
113 | RegFile::UPred
114 | RegFile::Carry
115 | RegFile::Bar
116 | RegFile::Mem => false,
117 }
118 }
119
120 /// Returns true if the register file is a predicate register file
is_predicate(&self) -> bool121 pub fn is_predicate(&self) -> bool {
122 match self {
123 RegFile::GPR
124 | RegFile::UGPR
125 | RegFile::Carry
126 | RegFile::Bar
127 | RegFile::Mem => false,
128 RegFile::Pred | RegFile::UPred => true,
129 }
130 }
131
num_regs(&self, sm: u8) -> u32132 pub fn num_regs(&self, sm: u8) -> u32 {
133 match self {
134 RegFile::GPR => {
135 if DEBUG.spill() {
136 // We need at least 16 registers to satisfy RA constraints
137 // for texture ops and another 2 for parallel copy lowering
138 18
139 } else if sm >= 70 {
140 // Volta+ has a maximum of 253 registers. Presumably
141 // because two registers get burned for UGPRs? Unclear
142 // on why we need it on Volta though.
143 253
144 } else {
145 255
146 }
147 }
148 RegFile::UGPR => {
149 if sm >= 75 {
150 63
151 } else {
152 0
153 }
154 }
155 RegFile::Pred => 7,
156 RegFile::UPred => {
157 if sm >= 75 {
158 7
159 } else {
160 0
161 }
162 }
163 RegFile::Carry => {
164 if sm >= 70 {
165 0
166 } else {
167 1
168 }
169 }
170 RegFile::Bar => {
171 if sm >= 70 {
172 16
173 } else {
174 0
175 }
176 }
177 RegFile::Mem => 1 << 24,
178 }
179 }
180
fmt_prefix(&self) -> &'static str181 fn fmt_prefix(&self) -> &'static str {
182 match self {
183 RegFile::GPR => "r",
184 RegFile::UGPR => "ur",
185 RegFile::Pred => "p",
186 RegFile::UPred => "up",
187 RegFile::Carry => "c",
188 RegFile::Bar => "b",
189 RegFile::Mem => "m",
190 }
191 }
192 }
193
194 impl fmt::Display for RegFile {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result195 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
196 match self {
197 RegFile::GPR => write!(f, "GPR"),
198 RegFile::UGPR => write!(f, "UGPR"),
199 RegFile::Pred => write!(f, "Pred"),
200 RegFile::UPred => write!(f, "UPred"),
201 RegFile::Carry => write!(f, "Carry"),
202 RegFile::Bar => write!(f, "Bar"),
203 RegFile::Mem => write!(f, "Mem"),
204 }
205 }
206 }
207
208 impl From<RegFile> for u8 {
from(value: RegFile) -> u8209 fn from(value: RegFile) -> u8 {
210 value as u8
211 }
212 }
213
214 impl TryFrom<u32> for RegFile {
215 type Error = &'static str;
216
try_from(value: u32) -> Result<Self, Self::Error>217 fn try_from(value: u32) -> Result<Self, Self::Error> {
218 match value {
219 0 => Ok(RegFile::GPR),
220 1 => Ok(RegFile::UGPR),
221 2 => Ok(RegFile::Pred),
222 3 => Ok(RegFile::UPred),
223 4 => Ok(RegFile::Carry),
224 5 => Ok(RegFile::Bar),
225 6 => Ok(RegFile::Mem),
226 _ => Err("Invalid register file number"),
227 }
228 }
229 }
230
231 impl TryFrom<u16> for RegFile {
232 type Error = &'static str;
233
try_from(value: u16) -> Result<Self, Self::Error>234 fn try_from(value: u16) -> Result<Self, Self::Error> {
235 RegFile::try_from(u32::from(value))
236 }
237 }
238
239 impl TryFrom<u8> for RegFile {
240 type Error = &'static str;
241
try_from(value: u8) -> Result<Self, Self::Error>242 fn try_from(value: u8) -> Result<Self, Self::Error> {
243 RegFile::try_from(u32::from(value))
244 }
245 }
246
247 /// A trait for things which have an associated register file
248 pub trait HasRegFile {
file(&self) -> RegFile249 fn file(&self) -> RegFile;
250
is_uniform(&self) -> bool251 fn is_uniform(&self) -> bool {
252 self.file().is_uniform()
253 }
254
is_gpr(&self) -> bool255 fn is_gpr(&self) -> bool {
256 self.file().is_gpr()
257 }
258
is_predicate(&self) -> bool259 fn is_predicate(&self) -> bool {
260 self.file().is_predicate()
261 }
262 }
263
264 #[derive(Clone)]
265 pub struct RegFileSet {
266 bits: u8,
267 }
268
269 impl RegFileSet {
new() -> RegFileSet270 pub fn new() -> RegFileSet {
271 RegFileSet { bits: 0 }
272 }
273
len(&self) -> usize274 pub fn len(&self) -> usize {
275 self.bits.count_ones() as usize
276 }
277
contains(&self, file: RegFile) -> bool278 pub fn contains(&self, file: RegFile) -> bool {
279 self.bits & (1 << (file as u8)) != 0
280 }
281
insert(&mut self, file: RegFile) -> bool282 pub fn insert(&mut self, file: RegFile) -> bool {
283 let has_file = self.contains(file);
284 self.bits |= 1 << (file as u8);
285 !has_file
286 }
287
is_empty(&self) -> bool288 pub fn is_empty(&self) -> bool {
289 self.bits == 0
290 }
291
292 #[allow(dead_code)]
iter(&self) -> RegFileSet293 pub fn iter(&self) -> RegFileSet {
294 self.clone()
295 }
296
remove(&mut self, file: RegFile) -> bool297 pub fn remove(&mut self, file: RegFile) -> bool {
298 let has_file = self.contains(file);
299 self.bits &= !(1 << (file as u8));
300 has_file
301 }
302 }
303
304 impl FromIterator<RegFile> for RegFileSet {
from_iter<T: IntoIterator<Item = RegFile>>(iter: T) -> Self305 fn from_iter<T: IntoIterator<Item = RegFile>>(iter: T) -> Self {
306 let mut set = RegFileSet::new();
307 for file in iter {
308 set.insert(file);
309 }
310 set
311 }
312 }
313
314 impl Iterator for RegFileSet {
315 type Item = RegFile;
316
next(&mut self) -> Option<RegFile>317 fn next(&mut self) -> Option<RegFile> {
318 if self.is_empty() {
319 None
320 } else {
321 let file = self.bits.trailing_zeros().try_into().unwrap();
322 self.remove(file);
323 Some(file)
324 }
325 }
326
size_hint(&self) -> (usize, Option<usize>)327 fn size_hint(&self) -> (usize, Option<usize>) {
328 let len = self.len();
329 (len, Some(len))
330 }
331 }
332
333 #[derive(Clone, Copy)]
334 pub struct PerRegFile<T> {
335 per_file: [T; NUM_REG_FILES],
336 }
337
338 impl<T> PerRegFile<T> {
new_with<F: Fn(RegFile) -> T>(f: F) -> Self339 pub fn new_with<F: Fn(RegFile) -> T>(f: F) -> Self {
340 PerRegFile {
341 per_file: [
342 f(RegFile::GPR),
343 f(RegFile::UGPR),
344 f(RegFile::Pred),
345 f(RegFile::UPred),
346 f(RegFile::Carry),
347 f(RegFile::Bar),
348 f(RegFile::Mem),
349 ],
350 }
351 }
352
353 #[allow(dead_code)]
values(&self) -> slice::Iter<T>354 pub fn values(&self) -> slice::Iter<T> {
355 self.per_file.iter()
356 }
357
358 #[allow(dead_code)]
values_mut(&mut self) -> slice::IterMut<T>359 pub fn values_mut(&mut self) -> slice::IterMut<T> {
360 self.per_file.iter_mut()
361 }
362 }
363
364 impl<T: Default> Default for PerRegFile<T> {
default() -> Self365 fn default() -> Self {
366 PerRegFile {
367 per_file: Default::default(),
368 }
369 }
370 }
371
372 impl<T> Index<RegFile> for PerRegFile<T> {
373 type Output = T;
374
index(&self, idx: RegFile) -> &T375 fn index(&self, idx: RegFile) -> &T {
376 &self.per_file[idx as u8 as usize]
377 }
378 }
379
380 impl<T> IndexMut<RegFile> for PerRegFile<T> {
index_mut(&mut self, idx: RegFile) -> &mut T381 fn index_mut(&mut self, idx: RegFile) -> &mut T {
382 &mut self.per_file[idx as u8 as usize]
383 }
384 }
385
386 /// An SSA value
387 ///
388 /// Each SSA in NAK represents a single 32-bit or 1-bit (if a predicate) value
389 /// which must either be spilled to memory or allocated space in the specified
390 /// register file. Whenever more data is required such as a 64-bit memory
391 /// address, double-precision float, or a vec4 texture result, multiple SSA
392 /// values are used.
393 ///
394 /// Each SSA value logically contains two things: an index and a register file.
395 /// It is required that each index refers to a unique SSA value, regardless of
396 /// register file. This way the index can be used to index tightly-packed data
397 /// structures such as bitsets without having to determine separate ranges for
398 /// each register file.
399 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
400 pub struct SSAValue {
401 packed: u32,
402 }
403
404 impl SSAValue {
405 /// A special SSA value which is always invalid
406 pub const NONE: Self = SSAValue { packed: 0 };
407
408 /// Returns an SSA value with the given register file and index
new(file: RegFile, idx: u32) -> SSAValue409 pub fn new(file: RegFile, idx: u32) -> SSAValue {
410 assert!(idx > 0 && idx < (1 << 29) - 2);
411 let mut packed = idx;
412 assert!(u8::from(file) < 8);
413 packed |= u32::from(u8::from(file)) << 29;
414 SSAValue { packed: packed }
415 }
416
417 /// Returns the index of this SSA value
idx(&self) -> u32418 pub fn idx(&self) -> u32 {
419 self.packed & 0x1fffffff
420 }
421
422 /// Returns true if this SSA value is equal to SSAValue::NONE
423 #[allow(dead_code)]
is_none(&self) -> bool424 pub fn is_none(&self) -> bool {
425 self.packed == 0
426 }
427
fmt_plain(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result428 fn fmt_plain(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
429 write!(f, "{}{}", self.file().fmt_prefix(), self.idx())
430 }
431 }
432
433 impl HasRegFile for SSAValue {
434 /// Returns the register file of this SSA value
file(&self) -> RegFile435 fn file(&self) -> RegFile {
436 RegFile::try_from(self.packed >> 29).unwrap()
437 }
438 }
439
440 impl fmt::Display for SSAValue {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result441 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
442 write!(f, "%")?;
443 self.fmt_plain(f)
444 }
445 }
446
447 /// A reference to one or more SSA values
448 ///
449 /// Because each SSA value represents a single 1 or 32-bit scalar, we need a way
450 /// to reference multiple SSA values for instructions which read or write
451 /// multiple registers in the same source. When the register allocator runs,
452 /// all the SSA values in a given SSA ref will be placed in consecutive
453 /// registers, with the base register aligned to the number of values, aligned
454 /// to the next power of two.
455 ///
456 /// An SSA reference can reference between 1 and 4 SSA values. It dereferences
457 /// to a slice for easy access to individual SSA values. The structure is
458 /// designed so that is always 16B, regardless of how many SSA values are
459 /// referenced so it's easy and fairly cheap to copy around and embed in other
460 /// structures.
461 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
462 pub struct SSARef {
463 v: [SSAValue; 4],
464 }
465
466 impl SSARef {
467 /// Returns a new SSA reference
468 #[inline]
new(comps: &[SSAValue]) -> SSARef469 fn new(comps: &[SSAValue]) -> SSARef {
470 assert!(comps.len() > 0 && comps.len() <= 4);
471 let mut r = SSARef {
472 v: [SSAValue::NONE; 4],
473 };
474 for i in 0..comps.len() {
475 r.v[i] = comps[i];
476 }
477 if comps.len() < 4 {
478 r.v[3].packed = (comps.len() as u32).wrapping_neg();
479 }
480 r
481 }
482
483 /// Returns the number of components in this SSA reference
comps(&self) -> u8484 pub fn comps(&self) -> u8 {
485 if self.v[3].packed >= u32::MAX - 2 {
486 self.v[3].packed.wrapping_neg() as u8
487 } else {
488 4
489 }
490 }
491 }
492
493 impl HasRegFile for SSARef {
file(&self) -> RegFile494 fn file(&self) -> RegFile {
495 let comps = usize::from(self.comps());
496 for i in 1..comps {
497 assert!(self.v[i].file() == self.v[0].file());
498 }
499 self.v[0].file()
500 }
501 }
502
503 impl Deref for SSARef {
504 type Target = [SSAValue];
505
deref(&self) -> &[SSAValue]506 fn deref(&self) -> &[SSAValue] {
507 let comps = usize::from(self.comps());
508 &self.v[..comps]
509 }
510 }
511
512 impl DerefMut for SSARef {
deref_mut(&mut self) -> &mut [SSAValue]513 fn deref_mut(&mut self) -> &mut [SSAValue] {
514 let comps = usize::from(self.comps());
515 &mut self.v[..comps]
516 }
517 }
518
519 impl TryFrom<&[SSAValue]> for SSARef {
520 type Error = &'static str;
521
try_from(comps: &[SSAValue]) -> Result<Self, Self::Error>522 fn try_from(comps: &[SSAValue]) -> Result<Self, Self::Error> {
523 if comps.len() == 0 {
524 Err("Empty vector")
525 } else if comps.len() > 4 {
526 Err("Too many vector components")
527 } else {
528 Ok(SSARef::new(comps))
529 }
530 }
531 }
532
533 impl TryFrom<Vec<SSAValue>> for SSARef {
534 type Error = &'static str;
535
try_from(comps: Vec<SSAValue>) -> Result<Self, Self::Error>536 fn try_from(comps: Vec<SSAValue>) -> Result<Self, Self::Error> {
537 SSARef::try_from(&comps[..])
538 }
539 }
540
541 macro_rules! impl_ssa_ref_from_arr {
542 ($n: expr) => {
543 impl From<[SSAValue; $n]> for SSARef {
544 fn from(comps: [SSAValue; $n]) -> Self {
545 SSARef::new(&comps[..])
546 }
547 }
548 };
549 }
550 impl_ssa_ref_from_arr!(1);
551 impl_ssa_ref_from_arr!(2);
552 impl_ssa_ref_from_arr!(3);
553 impl_ssa_ref_from_arr!(4);
554
555 impl From<SSAValue> for SSARef {
from(val: SSAValue) -> Self556 fn from(val: SSAValue) -> Self {
557 [val].into()
558 }
559 }
560
561 impl fmt::Display for SSARef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result562 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
563 if self.comps() == 1 {
564 write!(f, "{}", self[0])
565 } else {
566 write!(f, "{{")?;
567 for (i, v) in self.iter().enumerate() {
568 if i != 0 {
569 write!(f, " ")?;
570 }
571 write!(f, "{}", v)?;
572 }
573 write!(f, "}}")
574 }
575 }
576 }
577
578 pub struct SSAValueAllocator {
579 count: u32,
580 }
581
582 impl SSAValueAllocator {
new() -> SSAValueAllocator583 pub fn new() -> SSAValueAllocator {
584 SSAValueAllocator { count: 0 }
585 }
586
max_idx(&self) -> u32587 pub fn max_idx(&self) -> u32 {
588 self.count
589 }
590
alloc(&mut self, file: RegFile) -> SSAValue591 pub fn alloc(&mut self, file: RegFile) -> SSAValue {
592 self.count += 1;
593 SSAValue::new(file, self.count)
594 }
595
alloc_vec(&mut self, file: RegFile, comps: u8) -> SSARef596 pub fn alloc_vec(&mut self, file: RegFile, comps: u8) -> SSARef {
597 assert!(comps >= 1 && comps <= 4);
598 let mut vec = [SSAValue::NONE; 4];
599 for c in 0..comps {
600 vec[usize::from(c)] = self.alloc(file);
601 }
602 vec[0..usize::from(comps)].try_into().unwrap()
603 }
604 }
605
606 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
607 pub struct RegRef {
608 packed: u32,
609 }
610
611 impl RegRef {
zero_idx(file: RegFile) -> u32612 fn zero_idx(file: RegFile) -> u32 {
613 match file {
614 RegFile::GPR => 255,
615 RegFile::UGPR => 63,
616 RegFile::Pred => 7,
617 RegFile::UPred => 7,
618 RegFile::Carry => panic!("Carry has no zero index"),
619 RegFile::Bar => panic!("Bar has no zero index"),
620 RegFile::Mem => panic!("Mem has no zero index"),
621 }
622 }
623
new(file: RegFile, base_idx: u32, comps: u8) -> RegRef624 pub fn new(file: RegFile, base_idx: u32, comps: u8) -> RegRef {
625 assert!(base_idx < (1 << 26));
626 let mut packed = base_idx;
627 assert!(comps > 0 && comps <= 8);
628 packed |= u32::from(comps - 1) << 26;
629 assert!(u8::from(file) < 8);
630 packed |= u32::from(u8::from(file)) << 29;
631 RegRef { packed: packed }
632 }
633
zero(file: RegFile, comps: u8) -> RegRef634 pub fn zero(file: RegFile, comps: u8) -> RegRef {
635 RegRef::new(file, RegRef::zero_idx(file), comps)
636 }
637
base_idx(&self) -> u32638 pub fn base_idx(&self) -> u32 {
639 self.packed & 0x03ffffff
640 }
641
idx_range(&self) -> Range<u32>642 pub fn idx_range(&self) -> Range<u32> {
643 let start = self.base_idx();
644 let end = start + u32::from(self.comps());
645 start..end
646 }
647
comps(&self) -> u8648 pub fn comps(&self) -> u8 {
649 (((self.packed >> 26) & 0x7) + 1).try_into().unwrap()
650 }
651
comp(&self, c: u8) -> RegRef652 pub fn comp(&self, c: u8) -> RegRef {
653 assert!(c < self.comps());
654 RegRef::new(self.file(), self.base_idx() + u32::from(c), 1)
655 }
656 }
657
658 impl HasRegFile for RegRef {
file(&self) -> RegFile659 fn file(&self) -> RegFile {
660 ((self.packed >> 29) & 0x7).try_into().unwrap()
661 }
662 }
663
664 impl fmt::Display for RegRef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result665 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
666 write!(f, "{}{}", self.file().fmt_prefix(), self.base_idx())?;
667 if self.comps() > 1 {
668 write!(f, "..{}", self.idx_range().end)?;
669 }
670 Ok(())
671 }
672 }
673
674 #[derive(Clone, Copy)]
675 pub enum Dst {
676 None,
677 SSA(SSARef),
678 Reg(RegRef),
679 }
680
681 impl Dst {
is_none(&self) -> bool682 pub fn is_none(&self) -> bool {
683 matches!(self, Dst::None)
684 }
685
as_reg(&self) -> Option<&RegRef>686 pub fn as_reg(&self) -> Option<&RegRef> {
687 match self {
688 Dst::Reg(r) => Some(r),
689 _ => None,
690 }
691 }
692
as_ssa(&self) -> Option<&SSARef>693 pub fn as_ssa(&self) -> Option<&SSARef> {
694 match self {
695 Dst::SSA(r) => Some(r),
696 _ => None,
697 }
698 }
699
iter_ssa(&self) -> slice::Iter<'_, SSAValue>700 pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
701 match self {
702 Dst::None | Dst::Reg(_) => &[],
703 Dst::SSA(ssa) => ssa.deref(),
704 }
705 .iter()
706 }
707
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>708 pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
709 match self {
710 Dst::None | Dst::Reg(_) => &mut [],
711 Dst::SSA(ssa) => ssa.deref_mut(),
712 }
713 .iter_mut()
714 }
715 }
716
717 impl From<RegRef> for Dst {
from(reg: RegRef) -> Dst718 fn from(reg: RegRef) -> Dst {
719 Dst::Reg(reg)
720 }
721 }
722
723 impl<T: Into<SSARef>> From<T> for Dst {
from(ssa: T) -> Dst724 fn from(ssa: T) -> Dst {
725 Dst::SSA(ssa.into())
726 }
727 }
728
729 impl fmt::Display for Dst {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result730 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
731 match self {
732 Dst::None => write!(f, "null")?,
733 Dst::SSA(v) => v.fmt(f)?,
734 Dst::Reg(r) => r.fmt(f)?,
735 }
736 Ok(())
737 }
738 }
739
740 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
741 pub enum CBuf {
742 Binding(u8),
743
744 #[allow(dead_code)]
745 BindlessSSA(SSAValue),
746
747 #[allow(dead_code)]
748 BindlessGPR(RegRef),
749 }
750
751 impl fmt::Display for CBuf {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result752 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
753 match self {
754 CBuf::Binding(idx) => write!(f, "c[{:#x}]", idx),
755 CBuf::BindlessSSA(v) => write!(f, "cx[{}]", v),
756 CBuf::BindlessGPR(r) => write!(f, "cx[{}]", r),
757 }
758 }
759 }
760
761 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
762 pub struct CBufRef {
763 pub buf: CBuf,
764 pub offset: u16,
765 }
766
767 impl CBufRef {
offset(self, offset: u16) -> CBufRef768 pub fn offset(self, offset: u16) -> CBufRef {
769 CBufRef {
770 buf: self.buf,
771 offset: self.offset + offset,
772 }
773 }
774 }
775
776 impl fmt::Display for CBufRef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result777 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
778 write!(f, "{}[{:#x}]", self.buf, self.offset)
779 }
780 }
781
782 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
783 pub enum SrcRef {
784 Zero,
785 True,
786 False,
787 Imm32(u32),
788 CBuf(CBufRef),
789 SSA(SSARef),
790 Reg(RegRef),
791 }
792
793 impl SrcRef {
is_alu(&self) -> bool794 pub fn is_alu(&self) -> bool {
795 match self {
796 SrcRef::Zero | SrcRef::Imm32(_) | SrcRef::CBuf(_) => true,
797 SrcRef::SSA(ssa) => ssa.is_gpr(),
798 SrcRef::Reg(reg) => reg.is_gpr(),
799 SrcRef::True | SrcRef::False => false,
800 }
801 }
802
is_predicate(&self) -> bool803 pub fn is_predicate(&self) -> bool {
804 match self {
805 SrcRef::Zero | SrcRef::Imm32(_) | SrcRef::CBuf(_) => false,
806 SrcRef::True | SrcRef::False => true,
807 SrcRef::SSA(ssa) => ssa.is_predicate(),
808 SrcRef::Reg(reg) => reg.is_predicate(),
809 }
810 }
811
is_barrier(&self) -> bool812 pub fn is_barrier(&self) -> bool {
813 match self {
814 SrcRef::SSA(ssa) => ssa.file() == RegFile::Bar,
815 SrcRef::Reg(reg) => reg.file() == RegFile::Bar,
816 _ => false,
817 }
818 }
819
as_reg(&self) -> Option<&RegRef>820 pub fn as_reg(&self) -> Option<&RegRef> {
821 match self {
822 SrcRef::Reg(r) => Some(r),
823 _ => None,
824 }
825 }
826
as_ssa(&self) -> Option<&SSARef>827 pub fn as_ssa(&self) -> Option<&SSARef> {
828 match self {
829 SrcRef::SSA(r) => Some(r),
830 _ => None,
831 }
832 }
833
get_reg(&self) -> Option<&RegRef>834 pub fn get_reg(&self) -> Option<&RegRef> {
835 match self {
836 SrcRef::Zero
837 | SrcRef::True
838 | SrcRef::False
839 | SrcRef::Imm32(_)
840 | SrcRef::SSA(_) => None,
841 SrcRef::CBuf(cb) => match &cb.buf {
842 CBuf::Binding(_) | CBuf::BindlessSSA(_) => None,
843 CBuf::BindlessGPR(reg) => Some(reg),
844 },
845 SrcRef::Reg(reg) => Some(reg),
846 }
847 }
848
iter_ssa(&self) -> slice::Iter<'_, SSAValue>849 pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
850 match self {
851 SrcRef::Zero
852 | SrcRef::True
853 | SrcRef::False
854 | SrcRef::Imm32(_)
855 | SrcRef::Reg(_) => &[],
856 SrcRef::CBuf(cb) => match &cb.buf {
857 CBuf::Binding(_) | CBuf::BindlessGPR(_) => &[],
858 CBuf::BindlessSSA(ssa) => slice::from_ref(ssa),
859 },
860 SrcRef::SSA(ssa) => ssa.deref(),
861 }
862 .iter()
863 }
864
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>865 pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
866 match self {
867 SrcRef::Zero
868 | SrcRef::True
869 | SrcRef::False
870 | SrcRef::Imm32(_)
871 | SrcRef::Reg(_) => &mut [],
872 SrcRef::CBuf(cb) => match &mut cb.buf {
873 CBuf::Binding(_) | CBuf::BindlessGPR(_) => &mut [],
874 CBuf::BindlessSSA(ssa) => slice::from_mut(ssa),
875 },
876 SrcRef::SSA(ssa) => ssa.deref_mut(),
877 }
878 .iter_mut()
879 }
880 }
881
882 impl From<bool> for SrcRef {
from(b: bool) -> SrcRef883 fn from(b: bool) -> SrcRef {
884 if b {
885 SrcRef::True
886 } else {
887 SrcRef::False
888 }
889 }
890 }
891
892 impl From<u32> for SrcRef {
from(u: u32) -> SrcRef893 fn from(u: u32) -> SrcRef {
894 if u == 0 {
895 SrcRef::Zero
896 } else {
897 SrcRef::Imm32(u)
898 }
899 }
900 }
901
902 impl From<f32> for SrcRef {
from(f: f32) -> SrcRef903 fn from(f: f32) -> SrcRef {
904 f.to_bits().into()
905 }
906 }
907
908 impl From<CBufRef> for SrcRef {
from(cb: CBufRef) -> SrcRef909 fn from(cb: CBufRef) -> SrcRef {
910 SrcRef::CBuf(cb)
911 }
912 }
913
914 impl From<RegRef> for SrcRef {
from(reg: RegRef) -> SrcRef915 fn from(reg: RegRef) -> SrcRef {
916 SrcRef::Reg(reg)
917 }
918 }
919
920 impl<T: Into<SSARef>> From<T> for SrcRef {
from(ssa: T) -> SrcRef921 fn from(ssa: T) -> SrcRef {
922 SrcRef::SSA(ssa.into())
923 }
924 }
925
926 impl fmt::Display for SrcRef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result927 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
928 match self {
929 SrcRef::Zero => write!(f, "rZ"),
930 SrcRef::True => write!(f, "pT"),
931 SrcRef::False => write!(f, "pF"),
932 SrcRef::Imm32(u) => write!(f, "{:#x}", u),
933 SrcRef::CBuf(c) => c.fmt(f),
934 SrcRef::SSA(v) => v.fmt(f),
935 SrcRef::Reg(r) => r.fmt(f),
936 }
937 }
938 }
939
940 #[derive(Clone, Copy, PartialEq)]
941 pub enum SrcMod {
942 None,
943 FAbs,
944 FNeg,
945 FNegAbs,
946 INeg,
947 BNot,
948 }
949
950 impl SrcMod {
is_none(&self) -> bool951 pub fn is_none(&self) -> bool {
952 matches!(self, SrcMod::None)
953 }
954
has_fabs(&self) -> bool955 pub fn has_fabs(&self) -> bool {
956 match self {
957 SrcMod::None | SrcMod::FNeg => false,
958 SrcMod::FAbs | SrcMod::FNegAbs => true,
959 _ => panic!("Not a float modifier"),
960 }
961 }
962
has_fneg(&self) -> bool963 pub fn has_fneg(&self) -> bool {
964 match self {
965 SrcMod::None | SrcMod::FAbs => false,
966 SrcMod::FNeg | SrcMod::FNegAbs => true,
967 _ => panic!("Not a float modifier"),
968 }
969 }
970
is_ineg(&self) -> bool971 pub fn is_ineg(&self) -> bool {
972 match self {
973 SrcMod::None => false,
974 SrcMod::INeg => true,
975 _ => panic!("Not an integer modifier"),
976 }
977 }
978
is_bnot(&self) -> bool979 pub fn is_bnot(&self) -> bool {
980 match self {
981 SrcMod::None => false,
982 SrcMod::BNot => true,
983 _ => panic!("Not a bitwise modifier"),
984 }
985 }
986
fabs(self) -> SrcMod987 pub fn fabs(self) -> SrcMod {
988 match self {
989 SrcMod::None | SrcMod::FAbs | SrcMod::FNeg | SrcMod::FNegAbs => {
990 SrcMod::FAbs
991 }
992 _ => panic!("Not a float source modifier"),
993 }
994 }
995
fneg(self) -> SrcMod996 pub fn fneg(self) -> SrcMod {
997 match self {
998 SrcMod::None => SrcMod::FNeg,
999 SrcMod::FAbs => SrcMod::FNegAbs,
1000 SrcMod::FNeg => SrcMod::None,
1001 SrcMod::FNegAbs => SrcMod::FAbs,
1002 _ => panic!("Not a float source modifier"),
1003 }
1004 }
1005
ineg(self) -> SrcMod1006 pub fn ineg(self) -> SrcMod {
1007 match self {
1008 SrcMod::None => SrcMod::INeg,
1009 SrcMod::INeg => SrcMod::None,
1010 _ => panic!("Not an integer source modifier"),
1011 }
1012 }
1013
bnot(self) -> SrcMod1014 pub fn bnot(self) -> SrcMod {
1015 match self {
1016 SrcMod::None => SrcMod::BNot,
1017 SrcMod::BNot => SrcMod::None,
1018 _ => panic!("Not a boolean source modifier"),
1019 }
1020 }
1021
modify(self, other: SrcMod) -> SrcMod1022 pub fn modify(self, other: SrcMod) -> SrcMod {
1023 match other {
1024 SrcMod::None => self,
1025 SrcMod::FAbs => self.fabs(),
1026 SrcMod::FNeg => self.fneg(),
1027 SrcMod::FNegAbs => self.fabs().fneg(),
1028 SrcMod::INeg => self.ineg(),
1029 SrcMod::BNot => self.bnot(),
1030 }
1031 }
1032 }
1033
1034 #[repr(u8)]
1035 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1036 pub enum SrcType {
1037 SSA,
1038 GPR,
1039 ALU,
1040 F32,
1041 F64,
1042 I32,
1043 B32,
1044 Pred,
1045 Bar,
1046 }
1047
1048 #[derive(Clone, Copy, PartialEq)]
1049 pub struct Src {
1050 pub src_ref: SrcRef,
1051 pub src_mod: SrcMod,
1052 }
1053
1054 impl Src {
new_zero() -> Src1055 pub fn new_zero() -> Src {
1056 SrcRef::Zero.into()
1057 }
1058
new_imm_u32(u: u32) -> Src1059 pub fn new_imm_u32(u: u32) -> Src {
1060 u.into()
1061 }
1062
new_imm_bool(b: bool) -> Src1063 pub fn new_imm_bool(b: bool) -> Src {
1064 b.into()
1065 }
1066
fabs(&self) -> Src1067 pub fn fabs(&self) -> Src {
1068 Src {
1069 src_ref: self.src_ref,
1070 src_mod: self.src_mod.fabs(),
1071 }
1072 }
1073
fneg(&self) -> Src1074 pub fn fneg(&self) -> Src {
1075 Src {
1076 src_ref: self.src_ref,
1077 src_mod: self.src_mod.fneg(),
1078 }
1079 }
1080
ineg(&self) -> Src1081 pub fn ineg(&self) -> Src {
1082 Src {
1083 src_ref: self.src_ref,
1084 src_mod: self.src_mod.ineg(),
1085 }
1086 }
1087
bnot(&self) -> Src1088 pub fn bnot(&self) -> Src {
1089 Src {
1090 src_ref: self.src_ref,
1091 src_mod: self.src_mod.bnot(),
1092 }
1093 }
1094
as_ssa(&self) -> Option<&SSARef>1095 pub fn as_ssa(&self) -> Option<&SSARef> {
1096 if self.src_mod.is_none() {
1097 self.src_ref.as_ssa()
1098 } else {
1099 None
1100 }
1101 }
1102
as_bool(&self) -> Option<bool>1103 pub fn as_bool(&self) -> Option<bool> {
1104 match self.src_ref {
1105 SrcRef::True => Some(!self.src_mod.is_bnot()),
1106 SrcRef::False => Some(self.src_mod.is_bnot()),
1107 SrcRef::SSA(vec) => {
1108 assert!(vec.is_predicate() && vec.comps() == 1);
1109 None
1110 }
1111 SrcRef::Reg(reg) => {
1112 assert!(reg.is_predicate() && reg.comps() == 1);
1113 None
1114 }
1115 _ => panic!("Not a boolean source"),
1116 }
1117 }
1118
as_u32(&self) -> Option<u32>1119 pub fn as_u32(&self) -> Option<u32> {
1120 if self.src_mod.is_none() {
1121 match self.src_ref {
1122 SrcRef::Zero => Some(0),
1123 SrcRef::Imm32(u) => Some(u),
1124 SrcRef::CBuf(_) | SrcRef::SSA(_) | SrcRef::Reg(_) => None,
1125 _ => panic!("Invalid integer source"),
1126 }
1127 } else {
1128 None
1129 }
1130 }
1131
as_imm_not_i20(&self) -> Option<u32>1132 pub fn as_imm_not_i20(&self) -> Option<u32> {
1133 match self.src_ref {
1134 SrcRef::Imm32(i) => {
1135 assert!(self.src_mod.is_none());
1136 let top = i & 0xfff80000;
1137 if top == 0 || top == 0xfff80000 {
1138 None
1139 } else {
1140 Some(i)
1141 }
1142 }
1143 _ => None,
1144 }
1145 }
1146
as_imm_not_f20(&self) -> Option<u32>1147 pub fn as_imm_not_f20(&self) -> Option<u32> {
1148 match self.src_ref {
1149 SrcRef::Imm32(i) => {
1150 assert!(self.src_mod.is_none());
1151 if (i & 0xfff) == 0 {
1152 None
1153 } else {
1154 Some(i)
1155 }
1156 }
1157 _ => None,
1158 }
1159 }
1160
iter_ssa(&self) -> slice::Iter<'_, SSAValue>1161 pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
1162 self.src_ref.iter_ssa()
1163 }
1164
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>1165 pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
1166 self.src_ref.iter_ssa_mut()
1167 }
1168
1169 #[allow(dead_code)]
is_uniform(&self) -> bool1170 pub fn is_uniform(&self) -> bool {
1171 match self.src_ref {
1172 SrcRef::Zero
1173 | SrcRef::True
1174 | SrcRef::False
1175 | SrcRef::Imm32(_)
1176 | SrcRef::CBuf(_) => true,
1177 SrcRef::SSA(ssa) => ssa.is_uniform(),
1178 SrcRef::Reg(reg) => reg.is_uniform(),
1179 }
1180 }
1181
is_predicate(&self) -> bool1182 pub fn is_predicate(&self) -> bool {
1183 self.src_ref.is_predicate()
1184 }
1185
is_zero(&self) -> bool1186 pub fn is_zero(&self) -> bool {
1187 match self.src_ref {
1188 SrcRef::Zero | SrcRef::Imm32(0) => match self.src_mod {
1189 SrcMod::None | SrcMod::FAbs | SrcMod::INeg => true,
1190 SrcMod::FNeg | SrcMod::FNegAbs | SrcMod::BNot => false,
1191 },
1192 _ => false,
1193 }
1194 }
1195
is_fneg_zero(&self, src_type: SrcType) -> bool1196 pub fn is_fneg_zero(&self, src_type: SrcType) -> bool {
1197 match self.src_ref {
1198 SrcRef::Zero | SrcRef::Imm32(0) => {
1199 matches!(self.src_mod, SrcMod::FNeg | SrcMod::FNegAbs)
1200 }
1201 SrcRef::Imm32(0x80000000) => {
1202 src_type == SrcType::F32 && self.src_mod.is_none()
1203 }
1204 _ => false,
1205 }
1206 }
1207
1208 #[allow(dead_code)]
supports_type(&self, src_type: &SrcType) -> bool1209 pub fn supports_type(&self, src_type: &SrcType) -> bool {
1210 match src_type {
1211 SrcType::SSA => {
1212 if !self.src_mod.is_none() {
1213 return false;
1214 }
1215
1216 matches!(self.src_ref, SrcRef::SSA(_) | SrcRef::Reg(_))
1217 }
1218 SrcType::GPR => {
1219 if !self.src_mod.is_none() {
1220 return false;
1221 }
1222
1223 matches!(
1224 self.src_ref,
1225 SrcRef::Zero | SrcRef::SSA(_) | SrcRef::Reg(_)
1226 )
1227 }
1228 SrcType::ALU => self.src_mod.is_none() && self.src_ref.is_alu(),
1229 SrcType::F32 | SrcType::F64 => {
1230 match self.src_mod {
1231 SrcMod::None
1232 | SrcMod::FAbs
1233 | SrcMod::FNeg
1234 | SrcMod::FNegAbs => (),
1235 _ => return false,
1236 }
1237
1238 self.src_ref.is_alu()
1239 }
1240 SrcType::I32 => {
1241 match self.src_mod {
1242 SrcMod::None | SrcMod::INeg => (),
1243 _ => return false,
1244 }
1245
1246 self.src_ref.is_alu()
1247 }
1248 SrcType::B32 => {
1249 match self.src_mod {
1250 SrcMod::None | SrcMod::BNot => (),
1251 _ => return false,
1252 }
1253
1254 self.src_ref.is_alu()
1255 }
1256 SrcType::Pred => {
1257 match self.src_mod {
1258 SrcMod::None | SrcMod::BNot => (),
1259 _ => return false,
1260 }
1261
1262 self.src_ref.is_predicate()
1263 }
1264 SrcType::Bar => self.src_mod.is_none() && self.src_ref.is_barrier(),
1265 }
1266 }
1267 }
1268
1269 impl<T: Into<SrcRef>> From<T> for Src {
from(value: T) -> Src1270 fn from(value: T) -> Src {
1271 Src {
1272 src_ref: value.into(),
1273 src_mod: SrcMod::None,
1274 }
1275 }
1276 }
1277
1278 impl fmt::Display for Src {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1279 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1280 match self.src_mod {
1281 SrcMod::None => write!(f, "{}", self.src_ref),
1282 SrcMod::FAbs => write!(f, "|{}|", self.src_ref),
1283 SrcMod::FNeg => write!(f, "-{}", self.src_ref),
1284 SrcMod::FNegAbs => write!(f, "-|{}|", self.src_ref),
1285 SrcMod::INeg => write!(f, "-{}", self.src_ref),
1286 SrcMod::BNot => write!(f, "!{}", self.src_ref),
1287 }
1288 }
1289 }
1290
1291 impl SrcType {
1292 const DEFAULT: SrcType = SrcType::GPR;
1293 }
1294
1295 pub enum SrcTypeList {
1296 Array(&'static [SrcType]),
1297 Uniform(SrcType),
1298 }
1299
1300 impl Index<usize> for SrcTypeList {
1301 type Output = SrcType;
1302
index(&self, idx: usize) -> &SrcType1303 fn index(&self, idx: usize) -> &SrcType {
1304 match self {
1305 SrcTypeList::Array(arr) => &arr[idx],
1306 SrcTypeList::Uniform(typ) => typ,
1307 }
1308 }
1309 }
1310
1311 pub trait SrcsAsSlice {
srcs_as_slice(&self) -> &[Src]1312 fn srcs_as_slice(&self) -> &[Src];
srcs_as_mut_slice(&mut self) -> &mut [Src]1313 fn srcs_as_mut_slice(&mut self) -> &mut [Src];
src_types(&self) -> SrcTypeList1314 fn src_types(&self) -> SrcTypeList;
1315 }
1316
1317 pub trait DstsAsSlice {
dsts_as_slice(&self) -> &[Dst]1318 fn dsts_as_slice(&self) -> &[Dst];
dsts_as_mut_slice(&mut self) -> &mut [Dst]1319 fn dsts_as_mut_slice(&mut self) -> &mut [Dst];
1320 }
1321
fmt_dst_slice(f: &mut fmt::Formatter<'_>, dsts: &[Dst]) -> fmt::Result1322 fn fmt_dst_slice(f: &mut fmt::Formatter<'_>, dsts: &[Dst]) -> fmt::Result {
1323 if dsts.is_empty() {
1324 return Ok(());
1325 }
1326
1327 // Figure out the last non-null dst
1328 //
1329 // Note: By making the top inclusive and starting at 0, we ensure that
1330 // at least one dst always gets printed.
1331 let mut last_dst = 0;
1332 for (i, dst) in dsts.iter().enumerate() {
1333 if !dst.is_none() {
1334 last_dst = i;
1335 }
1336 }
1337
1338 for i in 0..(last_dst + 1) {
1339 if i != 0 {
1340 write!(f, " ")?;
1341 }
1342 write!(f, "{}", &dsts[i])?;
1343 }
1344 Ok(())
1345 }
1346
1347 pub trait DisplayOp: DstsAsSlice {
fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1348 fn fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1349 fmt_dst_slice(f, self.dsts_as_slice())
1350 }
1351
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1352 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result;
1353 }
1354
1355 // Hack struct so we can re-use Formatters. Shamelessly stolen from
1356 // https://users.rust-lang.org/t/reusing-an-fmt-formatter/8531/4
1357 pub struct Fmt<F>(pub F)
1358 where
1359 F: Fn(&mut fmt::Formatter) -> fmt::Result;
1360
1361 impl<F> fmt::Display for Fmt<F>
1362 where
1363 F: Fn(&mut fmt::Formatter) -> fmt::Result,
1364 {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result1365 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1366 (self.0)(f)
1367 }
1368 }
1369
1370 macro_rules! impl_display_for_op {
1371 ($op: ident) => {
1372 impl fmt::Display for $op {
1373 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1374 let mut s = String::new();
1375 write!(s, "{}", Fmt(|f| self.fmt_dsts(f)))?;
1376 if !s.is_empty() {
1377 write!(f, "{} = ", s)?;
1378 }
1379 self.fmt_op(f)
1380 }
1381 }
1382 };
1383 }
1384
1385 #[allow(dead_code)]
1386 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1387 pub enum PredSetOp {
1388 And,
1389 Or,
1390 Xor,
1391 }
1392
1393 impl PredSetOp {
is_trivial(&self, accum: &Src) -> bool1394 pub fn is_trivial(&self, accum: &Src) -> bool {
1395 if let Some(b) = accum.as_bool() {
1396 match self {
1397 PredSetOp::And => b,
1398 PredSetOp::Or => !b,
1399 PredSetOp::Xor => !b,
1400 }
1401 } else {
1402 false
1403 }
1404 }
1405 }
1406
1407 impl fmt::Display for PredSetOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1408 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1409 match self {
1410 PredSetOp::And => write!(f, ".and"),
1411 PredSetOp::Or => write!(f, ".or"),
1412 PredSetOp::Xor => write!(f, ".xor"),
1413 }
1414 }
1415 }
1416
1417 #[allow(dead_code)]
1418 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1419 pub enum FloatCmpOp {
1420 OrdEq,
1421 OrdNe,
1422 OrdLt,
1423 OrdLe,
1424 OrdGt,
1425 OrdGe,
1426 UnordEq,
1427 UnordNe,
1428 UnordLt,
1429 UnordLe,
1430 UnordGt,
1431 UnordGe,
1432 IsNum,
1433 IsNan,
1434 }
1435
1436 impl FloatCmpOp {
flip(self) -> FloatCmpOp1437 pub fn flip(self) -> FloatCmpOp {
1438 match self {
1439 FloatCmpOp::OrdEq | FloatCmpOp::OrdNe => self,
1440 FloatCmpOp::OrdLt => FloatCmpOp::OrdGt,
1441 FloatCmpOp::OrdLe => FloatCmpOp::OrdGe,
1442 FloatCmpOp::OrdGt => FloatCmpOp::OrdLt,
1443 FloatCmpOp::OrdGe => FloatCmpOp::OrdLe,
1444 FloatCmpOp::UnordEq | FloatCmpOp::UnordNe => self,
1445 FloatCmpOp::UnordLt => FloatCmpOp::UnordGt,
1446 FloatCmpOp::UnordLe => FloatCmpOp::UnordGe,
1447 FloatCmpOp::UnordGt => FloatCmpOp::UnordLt,
1448 FloatCmpOp::UnordGe => FloatCmpOp::UnordLe,
1449 FloatCmpOp::IsNum | FloatCmpOp::IsNan => panic!("Cannot flip unop"),
1450 }
1451 }
1452 }
1453
1454 impl fmt::Display for FloatCmpOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1455 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1456 match self {
1457 FloatCmpOp::OrdEq => write!(f, ".eq"),
1458 FloatCmpOp::OrdNe => write!(f, ".ne"),
1459 FloatCmpOp::OrdLt => write!(f, ".lt"),
1460 FloatCmpOp::OrdLe => write!(f, ".le"),
1461 FloatCmpOp::OrdGt => write!(f, ".gt"),
1462 FloatCmpOp::OrdGe => write!(f, ".ge"),
1463 FloatCmpOp::UnordEq => write!(f, ".equ"),
1464 FloatCmpOp::UnordNe => write!(f, ".neu"),
1465 FloatCmpOp::UnordLt => write!(f, ".ltu"),
1466 FloatCmpOp::UnordLe => write!(f, ".leu"),
1467 FloatCmpOp::UnordGt => write!(f, ".gtu"),
1468 FloatCmpOp::UnordGe => write!(f, ".geu"),
1469 FloatCmpOp::IsNum => write!(f, ".num"),
1470 FloatCmpOp::IsNan => write!(f, ".nan"),
1471 }
1472 }
1473 }
1474
1475 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1476 pub enum IntCmpOp {
1477 Eq,
1478 Ne,
1479 Lt,
1480 Le,
1481 Gt,
1482 Ge,
1483 }
1484
1485 impl IntCmpOp {
flip(self) -> IntCmpOp1486 pub fn flip(self) -> IntCmpOp {
1487 match self {
1488 IntCmpOp::Eq | IntCmpOp::Ne => self,
1489 IntCmpOp::Lt => IntCmpOp::Gt,
1490 IntCmpOp::Le => IntCmpOp::Ge,
1491 IntCmpOp::Gt => IntCmpOp::Lt,
1492 IntCmpOp::Ge => IntCmpOp::Le,
1493 }
1494 }
1495 }
1496
1497 impl fmt::Display for IntCmpOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1498 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1499 match self {
1500 IntCmpOp::Eq => write!(f, ".eq"),
1501 IntCmpOp::Ne => write!(f, ".ne"),
1502 IntCmpOp::Lt => write!(f, ".lt"),
1503 IntCmpOp::Le => write!(f, ".le"),
1504 IntCmpOp::Gt => write!(f, ".gt"),
1505 IntCmpOp::Ge => write!(f, ".ge"),
1506 }
1507 }
1508 }
1509
1510 pub enum IntCmpType {
1511 U32,
1512 I32,
1513 }
1514
1515 impl fmt::Display for IntCmpType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1516 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1517 match self {
1518 IntCmpType::U32 => write!(f, ".u32"),
1519 IntCmpType::I32 => write!(f, ".i32"),
1520 }
1521 }
1522 }
1523
1524 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1525 pub enum LogicOp2 {
1526 And,
1527 Or,
1528 Xor,
1529 PassB,
1530 }
1531
1532 impl fmt::Display for LogicOp2 {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1533 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1534 match self {
1535 LogicOp2::And => write!(f, "and"),
1536 LogicOp2::Or => write!(f, "or"),
1537 LogicOp2::Xor => write!(f, "xor"),
1538 LogicOp2::PassB => write!(f, "pass_b"),
1539 }
1540 }
1541 }
1542
1543 impl LogicOp2 {
to_lut(self) -> LogicOp31544 pub fn to_lut(self) -> LogicOp3 {
1545 match self {
1546 LogicOp2::And => LogicOp3::new_lut(&|x, y, _| x & y),
1547 LogicOp2::Or => LogicOp3::new_lut(&|x, y, _| x | y),
1548 LogicOp2::Xor => LogicOp3::new_lut(&|x, y, _| x ^ y),
1549 LogicOp2::PassB => LogicOp3::new_lut(&|_, b, _| b),
1550 }
1551 }
1552 }
1553
1554 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1555 pub struct LogicOp3 {
1556 pub lut: u8,
1557 }
1558
1559 impl LogicOp3 {
1560 pub const SRC_MASKS: [u8; 3] = [0xf0, 0xcc, 0xaa];
1561
1562 #[inline]
new_lut<F: Fn(u8, u8, u8) -> u8>(f: &F) -> LogicOp31563 pub fn new_lut<F: Fn(u8, u8, u8) -> u8>(f: &F) -> LogicOp3 {
1564 LogicOp3 {
1565 lut: f(
1566 LogicOp3::SRC_MASKS[0],
1567 LogicOp3::SRC_MASKS[1],
1568 LogicOp3::SRC_MASKS[2],
1569 ),
1570 }
1571 }
1572
new_const(val: bool) -> LogicOp31573 pub fn new_const(val: bool) -> LogicOp3 {
1574 LogicOp3 {
1575 lut: if val { !0 } else { 0 },
1576 }
1577 }
1578
src_used(&self, src_idx: usize) -> bool1579 pub fn src_used(&self, src_idx: usize) -> bool {
1580 let mask = LogicOp3::SRC_MASKS[src_idx];
1581 let shift = LogicOp3::SRC_MASKS[src_idx].trailing_zeros();
1582 self.lut & !mask != (self.lut >> shift) & !mask
1583 }
1584
fix_src(&mut self, src_idx: usize, val: bool)1585 pub fn fix_src(&mut self, src_idx: usize, val: bool) {
1586 let mask = LogicOp3::SRC_MASKS[src_idx];
1587 let shift = LogicOp3::SRC_MASKS[src_idx].trailing_zeros();
1588 if val {
1589 let t_bits = self.lut & mask;
1590 self.lut = t_bits | (t_bits >> shift)
1591 } else {
1592 let f_bits = self.lut & !mask;
1593 self.lut = (f_bits << shift) | f_bits
1594 };
1595 }
1596
invert_src(&mut self, src_idx: usize)1597 pub fn invert_src(&mut self, src_idx: usize) {
1598 let mask = LogicOp3::SRC_MASKS[src_idx];
1599 let shift = LogicOp3::SRC_MASKS[src_idx].trailing_zeros();
1600 let t_bits = self.lut & mask;
1601 let f_bits = self.lut & !mask;
1602 self.lut = (f_bits << shift) | (t_bits >> shift);
1603 }
1604
eval< T: BitAnd<Output = T> + BitOr<Output = T> + Copy + Not<Output = T>, >( &self, x: T, y: T, z: T, ) -> T1605 pub fn eval<
1606 T: BitAnd<Output = T> + BitOr<Output = T> + Copy + Not<Output = T>,
1607 >(
1608 &self,
1609 x: T,
1610 y: T,
1611 z: T,
1612 ) -> T {
1613 let mut res = x & !x; // zero
1614 if (self.lut & (1 << 0)) != 0 {
1615 res = res | (!x & !y & !z);
1616 }
1617 if (self.lut & (1 << 1)) != 0 {
1618 res = res | (!x & !y & z);
1619 }
1620 if (self.lut & (1 << 2)) != 0 {
1621 res = res | (!x & y & !z);
1622 }
1623 if (self.lut & (1 << 3)) != 0 {
1624 res = res | (!x & y & z);
1625 }
1626 if (self.lut & (1 << 4)) != 0 {
1627 res = res | (x & !y & !z);
1628 }
1629 if (self.lut & (1 << 5)) != 0 {
1630 res = res | (x & !y & z);
1631 }
1632 if (self.lut & (1 << 6)) != 0 {
1633 res = res | (x & y & !z);
1634 }
1635 if (self.lut & (1 << 7)) != 0 {
1636 res = res | (x & y & z);
1637 }
1638 res
1639 }
1640 }
1641
1642 impl fmt::Display for LogicOp3 {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1643 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1644 write!(f, "LUT[{:#x}]", self.lut)
1645 }
1646 }
1647
1648 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1649 pub enum FloatType {
1650 F16,
1651 F32,
1652 F64,
1653 }
1654
1655 impl FloatType {
from_bits(bytes: usize) -> FloatType1656 pub fn from_bits(bytes: usize) -> FloatType {
1657 match bytes {
1658 16 => FloatType::F16,
1659 32 => FloatType::F32,
1660 64 => FloatType::F64,
1661 _ => panic!("Invalid float type size"),
1662 }
1663 }
1664
bits(&self) -> usize1665 pub fn bits(&self) -> usize {
1666 match self {
1667 FloatType::F16 => 16,
1668 FloatType::F32 => 32,
1669 FloatType::F64 => 64,
1670 }
1671 }
1672 }
1673
1674 impl fmt::Display for FloatType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1675 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1676 match self {
1677 FloatType::F16 => write!(f, ".f16"),
1678 FloatType::F32 => write!(f, ".f32"),
1679 FloatType::F64 => write!(f, ".f64"),
1680 }
1681 }
1682 }
1683
1684 #[allow(dead_code)]
1685 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1686 pub enum FRndMode {
1687 NearestEven,
1688 NegInf,
1689 PosInf,
1690 Zero,
1691 }
1692
1693 impl fmt::Display for FRndMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1694 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1695 match self {
1696 FRndMode::NearestEven => write!(f, ".re"),
1697 FRndMode::NegInf => write!(f, ".rm"),
1698 FRndMode::PosInf => write!(f, ".rp"),
1699 FRndMode::Zero => write!(f, ".rz"),
1700 }
1701 }
1702 }
1703
1704 #[derive(Clone, Copy, Eq, PartialEq)]
1705 pub enum TexDim {
1706 _1D,
1707 Array1D,
1708 _2D,
1709 Array2D,
1710 _3D,
1711 Cube,
1712 ArrayCube,
1713 }
1714
1715 impl fmt::Display for TexDim {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1716 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1717 match self {
1718 TexDim::_1D => write!(f, ".1d"),
1719 TexDim::Array1D => write!(f, ".a1d"),
1720 TexDim::_2D => write!(f, ".2d"),
1721 TexDim::Array2D => write!(f, ".a2d"),
1722 TexDim::_3D => write!(f, ".3d"),
1723 TexDim::Cube => write!(f, ".cube"),
1724 TexDim::ArrayCube => write!(f, ".acube"),
1725 }
1726 }
1727 }
1728
1729 #[derive(Clone, Copy, Eq, PartialEq)]
1730 pub enum TexLodMode {
1731 Auto,
1732 Zero,
1733 Bias,
1734 Lod,
1735 Clamp,
1736 BiasClamp,
1737 }
1738
1739 impl fmt::Display for TexLodMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1740 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1741 match self {
1742 TexLodMode::Auto => write!(f, "la"),
1743 TexLodMode::Zero => write!(f, "lz"),
1744 TexLodMode::Bias => write!(f, "lb"),
1745 TexLodMode::Lod => write!(f, "ll"),
1746 TexLodMode::Clamp => write!(f, "lc"),
1747 TexLodMode::BiasClamp => write!(f, "lb.lc"),
1748 }
1749 }
1750 }
1751
1752 #[derive(Clone, Copy, Eq, PartialEq)]
1753 pub enum Tld4OffsetMode {
1754 None,
1755 AddOffI,
1756 PerPx,
1757 }
1758
1759 impl fmt::Display for Tld4OffsetMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1760 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1761 match self {
1762 Tld4OffsetMode::None => write!(f, "no_off"),
1763 Tld4OffsetMode::AddOffI => write!(f, "aoffi"),
1764 Tld4OffsetMode::PerPx => write!(f, "ptp"),
1765 }
1766 }
1767 }
1768
1769 #[allow(dead_code)]
1770 #[derive(Clone, Copy, Eq, PartialEq)]
1771 pub enum TexQuery {
1772 Dimension,
1773 TextureType,
1774 SamplerPos,
1775 }
1776
1777 impl fmt::Display for TexQuery {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1778 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1779 match self {
1780 TexQuery::Dimension => write!(f, "dimension"),
1781 TexQuery::TextureType => write!(f, "texture_type"),
1782 TexQuery::SamplerPos => write!(f, "sampler_pos"),
1783 }
1784 }
1785 }
1786
1787 #[derive(Clone, Copy, Eq, PartialEq)]
1788 pub enum ImageDim {
1789 _1D,
1790 _1DBuffer,
1791 _1DArray,
1792 _2D,
1793 _2DArray,
1794 _3D,
1795 }
1796
1797 impl ImageDim {
coord_comps(&self) -> u81798 pub fn coord_comps(&self) -> u8 {
1799 match self {
1800 ImageDim::_1D => 1,
1801 ImageDim::_1DBuffer => 1,
1802 ImageDim::_1DArray => 2,
1803 ImageDim::_2D => 2,
1804 ImageDim::_2DArray => 3,
1805 ImageDim::_3D => 3,
1806 }
1807 }
1808 }
1809
1810 impl fmt::Display for ImageDim {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1811 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1812 match self {
1813 ImageDim::_1D => write!(f, ".1d"),
1814 ImageDim::_1DBuffer => write!(f, ".buf"),
1815 ImageDim::_1DArray => write!(f, ".a1d"),
1816 ImageDim::_2D => write!(f, ".2d"),
1817 ImageDim::_2DArray => write!(f, ".a2d"),
1818 ImageDim::_3D => write!(f, ".3d"),
1819 }
1820 }
1821 }
1822
1823 pub enum IntType {
1824 U8,
1825 I8,
1826 U16,
1827 I16,
1828 U32,
1829 I32,
1830 U64,
1831 I64,
1832 }
1833
1834 impl IntType {
from_bits(bits: usize, is_signed: bool) -> IntType1835 pub fn from_bits(bits: usize, is_signed: bool) -> IntType {
1836 match bits {
1837 8 => {
1838 if is_signed {
1839 IntType::I8
1840 } else {
1841 IntType::U8
1842 }
1843 }
1844 16 => {
1845 if is_signed {
1846 IntType::I16
1847 } else {
1848 IntType::U16
1849 }
1850 }
1851 32 => {
1852 if is_signed {
1853 IntType::I32
1854 } else {
1855 IntType::U32
1856 }
1857 }
1858 64 => {
1859 if is_signed {
1860 IntType::I64
1861 } else {
1862 IntType::U64
1863 }
1864 }
1865 _ => panic!("Invalid integer type size"),
1866 }
1867 }
1868
is_signed(&self) -> bool1869 pub fn is_signed(&self) -> bool {
1870 match self {
1871 IntType::U8 | IntType::U16 | IntType::U32 | IntType::U64 => false,
1872 IntType::I8 | IntType::I16 | IntType::I32 | IntType::I64 => true,
1873 }
1874 }
1875
bits(&self) -> usize1876 pub fn bits(&self) -> usize {
1877 match self {
1878 IntType::U8 | IntType::I8 => 8,
1879 IntType::U16 | IntType::I16 => 16,
1880 IntType::U32 | IntType::I32 => 32,
1881 IntType::U64 | IntType::I64 => 64,
1882 }
1883 }
1884 }
1885
1886 impl fmt::Display for IntType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1887 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1888 match self {
1889 IntType::U8 => write!(f, ".u8"),
1890 IntType::I8 => write!(f, ".i8"),
1891 IntType::U16 => write!(f, ".u16"),
1892 IntType::I16 => write!(f, ".i16"),
1893 IntType::U32 => write!(f, ".u32"),
1894 IntType::I32 => write!(f, ".i32"),
1895 IntType::U64 => write!(f, ".u64"),
1896 IntType::I64 => write!(f, ".i64"),
1897 }
1898 }
1899 }
1900
1901 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1902 pub enum MemAddrType {
1903 A32,
1904 A64,
1905 }
1906
1907 impl fmt::Display for MemAddrType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1908 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1909 match self {
1910 MemAddrType::A32 => write!(f, ".a32"),
1911 MemAddrType::A64 => write!(f, ".a64"),
1912 }
1913 }
1914 }
1915
1916 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1917 pub enum MemType {
1918 U8,
1919 I8,
1920 U16,
1921 I16,
1922 B32,
1923 B64,
1924 B128,
1925 }
1926
1927 impl MemType {
from_size(size: u8, is_signed: bool) -> MemType1928 pub fn from_size(size: u8, is_signed: bool) -> MemType {
1929 match size {
1930 1 => {
1931 if is_signed {
1932 MemType::I8
1933 } else {
1934 MemType::U8
1935 }
1936 }
1937 2 => {
1938 if is_signed {
1939 MemType::I16
1940 } else {
1941 MemType::U16
1942 }
1943 }
1944 4 => MemType::B32,
1945 8 => MemType::B64,
1946 16 => MemType::B128,
1947 _ => panic!("Invalid memory load/store size"),
1948 }
1949 }
1950 }
1951
1952 impl fmt::Display for MemType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1953 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1954 match self {
1955 MemType::U8 => write!(f, ".u8"),
1956 MemType::I8 => write!(f, ".i8"),
1957 MemType::U16 => write!(f, ".u16"),
1958 MemType::I16 => write!(f, ".i16"),
1959 MemType::B32 => write!(f, ".b32"),
1960 MemType::B64 => write!(f, ".b64"),
1961 MemType::B128 => write!(f, ".b128"),
1962 }
1963 }
1964 }
1965
1966 #[allow(dead_code)]
1967 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1968 pub enum MemOrder {
1969 Constant,
1970 Weak,
1971 Strong(MemScope),
1972 }
1973
1974 impl fmt::Display for MemOrder {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1975 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1976 match self {
1977 MemOrder::Constant => write!(f, ".constant"),
1978 MemOrder::Weak => write!(f, ".weak"),
1979 MemOrder::Strong(scope) => write!(f, ".strong{}", scope),
1980 }
1981 }
1982 }
1983
1984 #[allow(dead_code)]
1985 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1986 pub enum MemScope {
1987 CTA,
1988 GPU,
1989 System,
1990 }
1991
1992 impl fmt::Display for MemScope {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1993 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1994 match self {
1995 MemScope::CTA => write!(f, ".cta"),
1996 MemScope::GPU => write!(f, ".gpu"),
1997 MemScope::System => write!(f, ".sys"),
1998 }
1999 }
2000 }
2001
2002 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2003 pub enum MemSpace {
2004 Global(MemAddrType),
2005 Local,
2006 Shared,
2007 }
2008
2009 impl MemSpace {
addr_type(&self) -> MemAddrType2010 pub fn addr_type(&self) -> MemAddrType {
2011 match self {
2012 MemSpace::Global(t) => *t,
2013 MemSpace::Local => MemAddrType::A32,
2014 MemSpace::Shared => MemAddrType::A32,
2015 }
2016 }
2017 }
2018
2019 impl fmt::Display for MemSpace {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2020 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2021 match self {
2022 MemSpace::Global(t) => write!(f, ".global{t}"),
2023 MemSpace::Local => write!(f, ".local"),
2024 MemSpace::Shared => write!(f, ".shared"),
2025 }
2026 }
2027 }
2028
2029 #[allow(dead_code)]
2030 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2031 pub enum MemEvictionPriority {
2032 First,
2033 Normal,
2034 Last,
2035 Unchanged,
2036 }
2037
2038 impl fmt::Display for MemEvictionPriority {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2039 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2040 match self {
2041 MemEvictionPriority::First => write!(f, ".ef"),
2042 MemEvictionPriority::Normal => Ok(()),
2043 MemEvictionPriority::Last => write!(f, ".el"),
2044 MemEvictionPriority::Unchanged => write!(f, ".lu"),
2045 }
2046 }
2047 }
2048
2049 #[derive(Clone)]
2050 pub struct MemAccess {
2051 pub mem_type: MemType,
2052 pub space: MemSpace,
2053 pub order: MemOrder,
2054 pub eviction_priority: MemEvictionPriority,
2055 }
2056
2057 impl fmt::Display for MemAccess {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2058 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2059 write!(
2060 f,
2061 "{}{}{}{}",
2062 self.space, self.order, self.eviction_priority, self.mem_type,
2063 )
2064 }
2065 }
2066
2067 #[allow(dead_code)]
2068 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2069 pub enum AtomType {
2070 F16x2,
2071 U32,
2072 I32,
2073 F32,
2074 U64,
2075 I64,
2076 F64,
2077 }
2078
2079 impl AtomType {
F(bits: u8) -> AtomType2080 pub fn F(bits: u8) -> AtomType {
2081 match bits {
2082 16 => panic!("16-bit float atomics not yet supported"),
2083 32 => AtomType::F32,
2084 64 => AtomType::F64,
2085 _ => panic!("Invalid float atomic type"),
2086 }
2087 }
2088
U(bits: u8) -> AtomType2089 pub fn U(bits: u8) -> AtomType {
2090 match bits {
2091 32 => AtomType::U32,
2092 64 => AtomType::U64,
2093 _ => panic!("Invalid uint atomic type"),
2094 }
2095 }
2096
I(bits: u8) -> AtomType2097 pub fn I(bits: u8) -> AtomType {
2098 match bits {
2099 32 => AtomType::I32,
2100 64 => AtomType::I64,
2101 _ => panic!("Invalid int atomic type"),
2102 }
2103 }
2104 }
2105
2106 impl fmt::Display for AtomType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2107 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2108 match self {
2109 AtomType::F16x2 => write!(f, ".f16x2"),
2110 AtomType::U32 => write!(f, ".u32"),
2111 AtomType::I32 => write!(f, ".i32"),
2112 AtomType::F32 => write!(f, ".f32"),
2113 AtomType::U64 => write!(f, ".u64"),
2114 AtomType::I64 => write!(f, ".i64"),
2115 AtomType::F64 => write!(f, ".f64"),
2116 }
2117 }
2118 }
2119
2120 #[allow(dead_code)]
2121 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2122 pub enum AtomOp {
2123 Add,
2124 Min,
2125 Max,
2126 Inc,
2127 Dec,
2128 And,
2129 Or,
2130 Xor,
2131 Exch,
2132 CmpExch,
2133 }
2134
2135 impl fmt::Display for AtomOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2136 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2137 match self {
2138 AtomOp::Add => write!(f, ".add"),
2139 AtomOp::Min => write!(f, ".min"),
2140 AtomOp::Max => write!(f, ".max"),
2141 AtomOp::Inc => write!(f, ".inc"),
2142 AtomOp::Dec => write!(f, ".dec"),
2143 AtomOp::And => write!(f, ".and"),
2144 AtomOp::Or => write!(f, ".or"),
2145 AtomOp::Xor => write!(f, ".xor"),
2146 AtomOp::Exch => write!(f, ".exch"),
2147 AtomOp::CmpExch => write!(f, ".cmpexch"),
2148 }
2149 }
2150 }
2151
2152 #[allow(dead_code)]
2153 #[derive(Clone, Copy, Eq, PartialEq)]
2154 pub enum InterpFreq {
2155 Pass,
2156 PassMulW,
2157 Constant,
2158 State,
2159 }
2160
2161 #[allow(dead_code)]
2162 #[derive(Clone, Copy, Eq, PartialEq)]
2163 pub enum InterpLoc {
2164 Default,
2165 Centroid,
2166 Offset,
2167 }
2168
2169 pub struct AttrAccess {
2170 pub addr: u16,
2171 pub comps: u8,
2172 pub patch: bool,
2173 pub output: bool,
2174 pub phys: bool,
2175 }
2176
2177 #[repr(C)]
2178 #[derive(SrcsAsSlice, DstsAsSlice)]
2179 pub struct OpFAdd {
2180 pub dst: Dst,
2181
2182 #[src_type(F32)]
2183 pub srcs: [Src; 2],
2184
2185 pub saturate: bool,
2186 pub rnd_mode: FRndMode,
2187 pub ftz: bool,
2188 }
2189
2190 impl DisplayOp for OpFAdd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2191 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2192 let sat = if self.saturate { ".sat" } else { "" };
2193 write!(f, "fadd{sat}")?;
2194 if self.rnd_mode != FRndMode::NearestEven {
2195 write!(f, "{}", self.rnd_mode)?;
2196 }
2197 if self.ftz {
2198 write!(f, ".ftz")?;
2199 }
2200 write!(f, " {} {}", self.srcs[0], self.srcs[1],)
2201 }
2202 }
2203 impl_display_for_op!(OpFAdd);
2204
2205 #[repr(C)]
2206 #[derive(SrcsAsSlice, DstsAsSlice)]
2207 pub struct OpFFma {
2208 pub dst: Dst,
2209
2210 #[src_type(F32)]
2211 pub srcs: [Src; 3],
2212
2213 pub saturate: bool,
2214 pub rnd_mode: FRndMode,
2215 pub ftz: bool,
2216 pub dnz: bool,
2217 }
2218
2219 impl DisplayOp for OpFFma {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2220 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2221 let sat = if self.saturate { ".sat" } else { "" };
2222 write!(f, "ffma{sat}")?;
2223 if self.rnd_mode != FRndMode::NearestEven {
2224 write!(f, "{}", self.rnd_mode)?;
2225 }
2226 if self.dnz {
2227 write!(f, ".dnz")?;
2228 } else if self.ftz {
2229 write!(f, ".ftz")?;
2230 }
2231 write!(f, " {} {} {}", self.srcs[0], self.srcs[1], self.srcs[2])
2232 }
2233 }
2234 impl_display_for_op!(OpFFma);
2235
2236 #[repr(C)]
2237 #[derive(SrcsAsSlice, DstsAsSlice)]
2238 pub struct OpFMnMx {
2239 pub dst: Dst,
2240
2241 #[src_type(F32)]
2242 pub srcs: [Src; 2],
2243
2244 #[src_type(Pred)]
2245 pub min: Src,
2246
2247 pub ftz: bool,
2248 }
2249
2250 impl DisplayOp for OpFMnMx {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2251 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2252 let ftz = if self.ftz { ".ftz" } else { "" };
2253 write!(
2254 f,
2255 "fmnmx{ftz} {} {} {}",
2256 self.srcs[0], self.srcs[1], self.min
2257 )
2258 }
2259 }
2260 impl_display_for_op!(OpFMnMx);
2261
2262 #[repr(C)]
2263 #[derive(SrcsAsSlice, DstsAsSlice)]
2264 pub struct OpFMul {
2265 pub dst: Dst,
2266
2267 #[src_type(F32)]
2268 pub srcs: [Src; 2],
2269
2270 pub saturate: bool,
2271 pub rnd_mode: FRndMode,
2272 pub ftz: bool,
2273 pub dnz: bool,
2274 }
2275
2276 impl DisplayOp for OpFMul {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2277 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2278 let sat = if self.saturate { ".sat" } else { "" };
2279 write!(f, "fmul{sat}")?;
2280 if self.rnd_mode != FRndMode::NearestEven {
2281 write!(f, "{}", self.rnd_mode)?;
2282 }
2283 if self.dnz {
2284 write!(f, ".dnz")?;
2285 } else if self.ftz {
2286 write!(f, ".ftz")?;
2287 }
2288 write!(f, " {} {}", self.srcs[0], self.srcs[1],)
2289 }
2290 }
2291 impl_display_for_op!(OpFMul);
2292
2293 #[repr(C)]
2294 #[derive(SrcsAsSlice, DstsAsSlice)]
2295 pub struct OpFSet {
2296 pub dst: Dst,
2297 pub cmp_op: FloatCmpOp,
2298
2299 #[src_type(F32)]
2300 pub srcs: [Src; 2],
2301
2302 pub ftz: bool,
2303 }
2304
2305 impl DisplayOp for OpFSet {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2306 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2307 let ftz = if self.ftz { ".ftz" } else { "" };
2308 write!(
2309 f,
2310 "fset{}{ftz} {} {}",
2311 self.cmp_op, self.srcs[0], self.srcs[1]
2312 )
2313 }
2314 }
2315 impl_display_for_op!(OpFSet);
2316
2317 #[repr(C)]
2318 #[derive(SrcsAsSlice, DstsAsSlice)]
2319 pub struct OpFSetP {
2320 pub dst: Dst,
2321
2322 pub set_op: PredSetOp,
2323 pub cmp_op: FloatCmpOp,
2324
2325 #[src_type(F32)]
2326 pub srcs: [Src; 2],
2327
2328 #[src_type(Pred)]
2329 pub accum: Src,
2330
2331 pub ftz: bool,
2332 }
2333
2334 impl DisplayOp for OpFSetP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2335 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2336 let ftz = if self.ftz { ".ftz" } else { "" };
2337 write!(f, "fsetp{}{ftz}", self.cmp_op)?;
2338 if !self.set_op.is_trivial(&self.accum) {
2339 write!(f, "{}", self.set_op)?;
2340 }
2341 write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
2342 if !self.set_op.is_trivial(&self.accum) {
2343 write!(f, " {}", self.accum)?;
2344 }
2345 Ok(())
2346 }
2347 }
2348 impl_display_for_op!(OpFSetP);
2349
2350 #[allow(dead_code)]
2351 #[derive(Clone, Copy, Eq, PartialEq)]
2352 pub enum FSwzAddOp {
2353 Add,
2354 SubRight,
2355 SubLeft,
2356 MoveLeft,
2357 }
2358
2359 impl fmt::Display for FSwzAddOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2360 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2361 match self {
2362 FSwzAddOp::Add => write!(f, "add"),
2363 FSwzAddOp::SubRight => write!(f, "subr"),
2364 FSwzAddOp::SubLeft => write!(f, "sub"),
2365 FSwzAddOp::MoveLeft => write!(f, "mov2"),
2366 }
2367 }
2368 }
2369
2370 #[repr(C)]
2371 #[derive(SrcsAsSlice, DstsAsSlice)]
2372 pub struct OpFSwzAdd {
2373 pub dst: Dst,
2374
2375 #[src_type(GPR)]
2376 pub srcs: [Src; 2],
2377
2378 pub rnd_mode: FRndMode,
2379 pub ftz: bool,
2380
2381 pub ops: [FSwzAddOp; 4],
2382 }
2383
2384 impl DisplayOp for OpFSwzAdd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2385 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2386 write!(f, "fswzadd",)?;
2387 if self.rnd_mode != FRndMode::NearestEven {
2388 write!(f, "{}", self.rnd_mode)?;
2389 }
2390 if self.ftz {
2391 write!(f, ".ftz")?;
2392 }
2393 write!(
2394 f,
2395 " {} {} [{}, {}, {}, {}]",
2396 self.srcs[0],
2397 self.srcs[1],
2398 self.ops[0],
2399 self.ops[1],
2400 self.ops[2],
2401 self.ops[3],
2402 )
2403 }
2404 }
2405 impl_display_for_op!(OpFSwzAdd);
2406
2407 pub enum RroOp {
2408 SinCos,
2409 Exp2,
2410 }
2411
2412 impl fmt::Display for RroOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2413 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2414 match self {
2415 RroOp::SinCos => write!(f, ".sincos"),
2416 RroOp::Exp2 => write!(f, ".exp2"),
2417 }
2418 }
2419 }
2420
2421 /// MuFu range reduction operator
2422 ///
2423 /// Not available on SM70+
2424 #[repr(C)]
2425 #[derive(SrcsAsSlice, DstsAsSlice)]
2426 pub struct OpRro {
2427 pub dst: Dst,
2428 pub op: RroOp,
2429
2430 #[src_type(F32)]
2431 pub src: Src,
2432 }
2433
2434 impl DisplayOp for OpRro {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2435 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2436 write!(f, "rro{} {}", self.op, self.src)
2437 }
2438 }
2439 impl_display_for_op!(OpRro);
2440
2441 #[allow(dead_code)]
2442 #[derive(Clone, Copy, Eq, PartialEq)]
2443 pub enum MuFuOp {
2444 Cos,
2445 Sin,
2446 Exp2,
2447 Log2,
2448 Rcp,
2449 Rsq,
2450 Rcp64H,
2451 Rsq64H,
2452 Sqrt,
2453 Tanh,
2454 }
2455
2456 impl fmt::Display for MuFuOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2457 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2458 match self {
2459 MuFuOp::Cos => write!(f, "cos"),
2460 MuFuOp::Sin => write!(f, "sin"),
2461 MuFuOp::Exp2 => write!(f, "exp2"),
2462 MuFuOp::Log2 => write!(f, "log2"),
2463 MuFuOp::Rcp => write!(f, "rcp"),
2464 MuFuOp::Rsq => write!(f, "rsq"),
2465 MuFuOp::Rcp64H => write!(f, "rcp64h"),
2466 MuFuOp::Rsq64H => write!(f, "rsq64h"),
2467 MuFuOp::Sqrt => write!(f, "sqrt"),
2468 MuFuOp::Tanh => write!(f, "tanh"),
2469 }
2470 }
2471 }
2472
2473 #[repr(C)]
2474 #[derive(SrcsAsSlice, DstsAsSlice)]
2475 pub struct OpMuFu {
2476 pub dst: Dst,
2477 pub op: MuFuOp,
2478
2479 #[src_type(F32)]
2480 pub src: Src,
2481 }
2482
2483 impl DisplayOp for OpMuFu {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2484 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2485 write!(f, "mufu.{} {}", self.op, self.src)
2486 }
2487 }
2488 impl_display_for_op!(OpMuFu);
2489
2490 #[repr(C)]
2491 #[derive(SrcsAsSlice, DstsAsSlice)]
2492 pub struct OpDAdd {
2493 pub dst: Dst,
2494
2495 #[src_type(F64)]
2496 pub srcs: [Src; 2],
2497
2498 pub rnd_mode: FRndMode,
2499 }
2500
2501 impl DisplayOp for OpDAdd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2502 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2503 write!(f, "dadd")?;
2504 if self.rnd_mode != FRndMode::NearestEven {
2505 write!(f, "{}", self.rnd_mode)?;
2506 }
2507 write!(f, " {} {}", self.srcs[0], self.srcs[1],)
2508 }
2509 }
2510 impl_display_for_op!(OpDAdd);
2511
2512 #[repr(C)]
2513 #[derive(SrcsAsSlice, DstsAsSlice)]
2514 pub struct OpDMul {
2515 pub dst: Dst,
2516
2517 #[src_type(F64)]
2518 pub srcs: [Src; 2],
2519
2520 pub rnd_mode: FRndMode,
2521 }
2522
2523 impl DisplayOp for OpDMul {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2524 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2525 write!(f, "dmul")?;
2526 if self.rnd_mode != FRndMode::NearestEven {
2527 write!(f, "{}", self.rnd_mode)?;
2528 }
2529 write!(f, " {} {}", self.srcs[0], self.srcs[1],)
2530 }
2531 }
2532 impl_display_for_op!(OpDMul);
2533
2534 #[repr(C)]
2535 #[derive(SrcsAsSlice, DstsAsSlice)]
2536 pub struct OpDFma {
2537 pub dst: Dst,
2538
2539 #[src_type(F64)]
2540 pub srcs: [Src; 3],
2541
2542 pub rnd_mode: FRndMode,
2543 }
2544
2545 impl DisplayOp for OpDFma {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2546 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2547 write!(f, "dfma")?;
2548 if self.rnd_mode != FRndMode::NearestEven {
2549 write!(f, "{}", self.rnd_mode)?;
2550 }
2551 write!(f, " {} {} {}", self.srcs[0], self.srcs[1], self.srcs[2])
2552 }
2553 }
2554 impl_display_for_op!(OpDFma);
2555
2556 #[repr(C)]
2557 #[derive(SrcsAsSlice, DstsAsSlice)]
2558 pub struct OpDMnMx {
2559 pub dst: Dst,
2560
2561 #[src_type(F64)]
2562 pub srcs: [Src; 2],
2563
2564 #[src_type(Pred)]
2565 pub min: Src,
2566 }
2567
2568 impl DisplayOp for OpDMnMx {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2569 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2570 write!(f, "dmnmx {} {} {}", self.srcs[0], self.srcs[1], self.min)
2571 }
2572 }
2573 impl_display_for_op!(OpDMnMx);
2574
2575 #[repr(C)]
2576 #[derive(SrcsAsSlice, DstsAsSlice)]
2577 pub struct OpDSetP {
2578 pub dst: Dst,
2579
2580 pub set_op: PredSetOp,
2581 pub cmp_op: FloatCmpOp,
2582
2583 #[src_type(F64)]
2584 pub srcs: [Src; 2],
2585
2586 #[src_type(Pred)]
2587 pub accum: Src,
2588 }
2589
2590 impl DisplayOp for OpDSetP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2591 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2592 write!(f, "dsetp{}", self.cmp_op)?;
2593 if !self.set_op.is_trivial(&self.accum) {
2594 write!(f, "{}", self.set_op)?;
2595 }
2596 write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
2597 if !self.set_op.is_trivial(&self.accum) {
2598 write!(f, " {}", self.accum)?;
2599 }
2600 Ok(())
2601 }
2602 }
2603 impl_display_for_op!(OpDSetP);
2604
2605 #[repr(C)]
2606 #[derive(SrcsAsSlice, DstsAsSlice)]
2607 pub struct OpBMsk {
2608 pub dst: Dst,
2609
2610 #[src_type(ALU)]
2611 pub pos: Src,
2612
2613 #[src_type(ALU)]
2614 pub width: Src,
2615
2616 pub wrap: bool,
2617 }
2618
2619 impl DisplayOp for OpBMsk {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2620 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2621 let wrap = if self.wrap { ".wrap" } else { ".clamp" };
2622 write!(f, "bmsk{} {} {}", wrap, self.pos, self.width)
2623 }
2624 }
2625 impl_display_for_op!(OpBMsk);
2626
2627 #[repr(C)]
2628 #[derive(SrcsAsSlice, DstsAsSlice)]
2629 pub struct OpBRev {
2630 pub dst: Dst,
2631
2632 #[src_type(ALU)]
2633 pub src: Src,
2634 }
2635
2636 impl DisplayOp for OpBRev {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2637 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2638 write!(f, "brev {}", self.src)
2639 }
2640 }
2641 impl_display_for_op!(OpBRev);
2642
2643 /// Bitfield extract. Extracts all bits from `base` starting at `offset` into
2644 /// `dst`.
2645 #[repr(C)]
2646 #[derive(SrcsAsSlice, DstsAsSlice)]
2647 pub struct OpBfe {
2648 /// Where to insert the bits.
2649 pub dst: Dst,
2650
2651 /// The source of bits to extract.
2652 #[src_type(ALU)]
2653 pub base: Src,
2654
2655 /// The range of bits to extract. This source is interpreted as four
2656 /// separate bytes, [b0, b1, b2, b3].
2657 ///
2658 /// b0 and b1: unused
2659 /// b2: the number of bits to extract.
2660 /// b3: the offset of the first bit to extract.
2661 ///
2662 /// This matches the way the hardware works.
2663 #[src_type(ALU)]
2664 pub range: Src,
2665
2666 /// Whether the output is signed
2667 pub signed: bool,
2668
2669 /// Whether to reverse the bits before inserting them into `dst`.
2670 pub reverse: bool,
2671 }
2672
2673 impl DisplayOp for OpBfe {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2674 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2675 write!(f, "bfe")?;
2676 if self.signed {
2677 write!(f, ".s")?;
2678 }
2679 if self.reverse {
2680 write!(f, ".rev")?;
2681 }
2682 write!(f, " {} {}", self.base, self.range,)
2683 }
2684 }
2685 impl_display_for_op!(OpBfe);
2686
2687 #[repr(C)]
2688 #[derive(SrcsAsSlice, DstsAsSlice)]
2689 pub struct OpFlo {
2690 pub dst: Dst,
2691
2692 #[src_type(ALU)]
2693 pub src: Src,
2694
2695 pub signed: bool,
2696 pub return_shift_amount: bool,
2697 }
2698
2699 impl DisplayOp for OpFlo {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2700 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2701 write!(f, "flo")?;
2702 if self.return_shift_amount {
2703 write!(f, ".samt")?;
2704 }
2705 write!(f, " {}", self.src)
2706 }
2707 }
2708 impl_display_for_op!(OpFlo);
2709
2710 #[repr(C)]
2711 #[derive(SrcsAsSlice, DstsAsSlice)]
2712 pub struct OpIAbs {
2713 pub dst: Dst,
2714
2715 #[src_type(ALU)]
2716 pub src: Src,
2717 }
2718
2719 impl DisplayOp for OpIAbs {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2720 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2721 write!(f, "iabs {}", self.src)
2722 }
2723 }
2724 impl_display_for_op!(OpIAbs);
2725
2726 #[repr(C)]
2727 #[derive(SrcsAsSlice, DstsAsSlice)]
2728 pub struct OpINeg {
2729 pub dst: Dst,
2730
2731 #[src_type(ALU)]
2732 pub src: Src,
2733 }
2734
2735 impl DisplayOp for OpINeg {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2736 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2737 write!(f, "ineg {}", self.src)
2738 }
2739 }
2740 impl_display_for_op!(OpINeg);
2741
2742 /// Only used on SM50
2743 #[repr(C)]
2744 #[derive(SrcsAsSlice, DstsAsSlice)]
2745 pub struct OpIAdd2 {
2746 pub dst: Dst,
2747 pub carry_out: Dst,
2748
2749 #[src_type(ALU)]
2750 pub srcs: [Src; 2],
2751 pub carry_in: Src,
2752 }
2753
2754 impl DisplayOp for OpIAdd2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2755 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2756 write!(f, "iadd2 {} {}", self.srcs[0], self.srcs[1])?;
2757 if !self.carry_in.is_zero() {
2758 write!(f, " {}", self.carry_in)?;
2759 }
2760 Ok(())
2761 }
2762 }
2763
2764 #[repr(C)]
2765 #[derive(SrcsAsSlice, DstsAsSlice)]
2766 pub struct OpIAdd3 {
2767 pub dst: Dst,
2768 pub overflow: [Dst; 2],
2769
2770 #[src_type(I32)]
2771 pub srcs: [Src; 3],
2772 }
2773
2774 impl DisplayOp for OpIAdd3 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2775 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2776 write!(
2777 f,
2778 "iadd3 {} {} {}",
2779 self.srcs[0], self.srcs[1], self.srcs[2],
2780 )
2781 }
2782 }
2783 impl_display_for_op!(OpIAdd3);
2784
2785 #[repr(C)]
2786 #[derive(SrcsAsSlice, DstsAsSlice)]
2787 pub struct OpIAdd3X {
2788 pub dst: Dst,
2789 pub overflow: [Dst; 2],
2790
2791 #[src_type(B32)]
2792 pub srcs: [Src; 3],
2793
2794 #[src_type(Pred)]
2795 pub carry: [Src; 2],
2796 }
2797
2798 impl DisplayOp for OpIAdd3X {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2799 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2800 write!(
2801 f,
2802 "iadd3.x {} {} {} {} {}",
2803 self.srcs[0],
2804 self.srcs[1],
2805 self.srcs[2],
2806 self.carry[0],
2807 self.carry[1]
2808 )
2809 }
2810 }
2811 impl_display_for_op!(OpIAdd3X);
2812
2813 #[repr(C)]
2814 #[derive(SrcsAsSlice, DstsAsSlice)]
2815 pub struct OpIDp4 {
2816 pub dst: Dst,
2817
2818 pub src_types: [IntType; 2],
2819
2820 #[src_type(I32)]
2821 pub srcs: [Src; 3],
2822 }
2823
2824 impl DisplayOp for OpIDp4 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2825 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2826 write!(
2827 f,
2828 "idp4{}{} {} {} {}",
2829 self.src_types[0],
2830 self.src_types[1],
2831 self.srcs[0],
2832 self.srcs[1],
2833 self.srcs[2],
2834 )
2835 }
2836 }
2837 impl_display_for_op!(OpIDp4);
2838
2839 #[repr(C)]
2840 #[derive(SrcsAsSlice, DstsAsSlice)]
2841 pub struct OpIMad {
2842 pub dst: Dst,
2843
2844 #[src_type(ALU)]
2845 pub srcs: [Src; 3],
2846
2847 pub signed: bool,
2848 }
2849
2850 impl DisplayOp for OpIMad {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2851 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2852 write!(f, "imad {} {} {}", self.srcs[0], self.srcs[1], self.srcs[2],)
2853 }
2854 }
2855 impl_display_for_op!(OpIMad);
2856
2857 /// Only used on SM50
2858 #[repr(C)]
2859 #[derive(SrcsAsSlice, DstsAsSlice)]
2860 pub struct OpIMul {
2861 pub dst: Dst,
2862
2863 #[src_type(ALU)]
2864 pub srcs: [Src; 2],
2865
2866 pub signed: [bool; 2],
2867 pub high: bool,
2868 }
2869
2870 impl DisplayOp for OpIMul {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2871 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2872 write!(f, "imul")?;
2873 if self.high {
2874 write!(f, ".hi")?;
2875 }
2876 let src_type = |signed| if signed { ".s32" } else { ".u32" };
2877 write!(
2878 f,
2879 "{}{}",
2880 src_type(self.signed[0]),
2881 src_type(self.signed[1])
2882 )?;
2883 write!(f, " {} {}", self.srcs[0], self.srcs[1])
2884 }
2885 }
2886
2887 #[repr(C)]
2888 #[derive(SrcsAsSlice, DstsAsSlice)]
2889 pub struct OpIMad64 {
2890 pub dst: Dst,
2891
2892 #[src_type(ALU)]
2893 pub srcs: [Src; 3],
2894
2895 pub signed: bool,
2896 }
2897
2898 impl DisplayOp for OpIMad64 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2899 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2900 write!(
2901 f,
2902 "imad64 {} {} {}",
2903 self.srcs[0], self.srcs[1], self.srcs[2],
2904 )
2905 }
2906 }
2907 impl_display_for_op!(OpIMad64);
2908
2909 #[repr(C)]
2910 #[derive(SrcsAsSlice, DstsAsSlice)]
2911 pub struct OpIMnMx {
2912 pub dst: Dst,
2913 pub cmp_type: IntCmpType,
2914
2915 #[src_type(ALU)]
2916 pub srcs: [Src; 2],
2917
2918 #[src_type(Pred)]
2919 pub min: Src,
2920 }
2921
2922 impl DisplayOp for OpIMnMx {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2923 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2924 write!(
2925 f,
2926 "imnmx{} {} {} {}",
2927 self.cmp_type, self.srcs[0], self.srcs[1], self.min
2928 )
2929 }
2930 }
2931 impl_display_for_op!(OpIMnMx);
2932
2933 #[repr(C)]
2934 #[derive(SrcsAsSlice, DstsAsSlice)]
2935 pub struct OpISetP {
2936 pub dst: Dst,
2937
2938 pub set_op: PredSetOp,
2939 pub cmp_op: IntCmpOp,
2940 pub cmp_type: IntCmpType,
2941 pub ex: bool,
2942
2943 #[src_type(ALU)]
2944 pub srcs: [Src; 2],
2945
2946 #[src_type(Pred)]
2947 pub accum: Src,
2948
2949 #[src_type(Pred)]
2950 pub low_cmp: Src,
2951 }
2952
2953 impl DisplayOp for OpISetP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2954 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2955 write!(f, "isetp{}{}", self.cmp_op, self.cmp_type)?;
2956 if !self.set_op.is_trivial(&self.accum) {
2957 write!(f, "{}", self.set_op)?;
2958 }
2959 if self.ex {
2960 write!(f, ".ex")?;
2961 }
2962 write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
2963 if !self.set_op.is_trivial(&self.accum) {
2964 write!(f, " {}", self.accum)?;
2965 }
2966 if self.ex {
2967 write!(f, " {}", self.low_cmp)?;
2968 }
2969 Ok(())
2970 }
2971 }
2972 impl_display_for_op!(OpISetP);
2973
2974 #[repr(C)]
2975 #[derive(SrcsAsSlice, DstsAsSlice)]
2976 pub struct OpLop2 {
2977 pub dst: Dst,
2978
2979 #[src_type(ALU)]
2980 pub srcs: [Src; 2],
2981
2982 pub op: LogicOp2,
2983 }
2984
2985 impl DisplayOp for OpLop2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2986 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2987 write!(f, "lop2.{} {} {}", self.op, self.srcs[0], self.srcs[1],)
2988 }
2989 }
2990
2991 #[repr(C)]
2992 #[derive(SrcsAsSlice, DstsAsSlice)]
2993 pub struct OpLop3 {
2994 pub dst: Dst,
2995
2996 #[src_type(ALU)]
2997 pub srcs: [Src; 3],
2998
2999 pub op: LogicOp3,
3000 }
3001
3002 impl DisplayOp for OpLop3 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3003 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3004 write!(
3005 f,
3006 "lop3.{} {} {} {}",
3007 self.op, self.srcs[0], self.srcs[1], self.srcs[2],
3008 )
3009 }
3010 }
3011 impl_display_for_op!(OpLop3);
3012
3013 #[allow(dead_code)]
3014 #[derive(Clone, Copy, Eq, PartialEq)]
3015 pub enum ShflOp {
3016 Idx,
3017 Up,
3018 Down,
3019 Bfly,
3020 }
3021
3022 impl fmt::Display for ShflOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3023 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3024 match self {
3025 ShflOp::Idx => write!(f, "idx"),
3026 ShflOp::Up => write!(f, "up"),
3027 ShflOp::Down => write!(f, "down"),
3028 ShflOp::Bfly => write!(f, "bfly"),
3029 }
3030 }
3031 }
3032
3033 #[repr(C)]
3034 #[derive(SrcsAsSlice, DstsAsSlice)]
3035 pub struct OpShf {
3036 pub dst: Dst,
3037
3038 #[src_type(GPR)]
3039 pub low: Src,
3040
3041 #[src_type(ALU)]
3042 pub high: Src,
3043
3044 #[src_type(GPR)]
3045 pub shift: Src,
3046
3047 pub right: bool,
3048 pub wrap: bool,
3049 pub data_type: IntType,
3050 pub dst_high: bool,
3051 }
3052
3053 impl DisplayOp for OpShf {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3054 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3055 write!(f, "shf")?;
3056 if self.right {
3057 write!(f, ".r")?;
3058 } else {
3059 write!(f, ".l")?;
3060 }
3061 if self.wrap {
3062 write!(f, ".w")?;
3063 }
3064 write!(f, "{}", self.data_type)?;
3065 if self.dst_high {
3066 write!(f, ".hi")?;
3067 }
3068 write!(f, " {} {} {}", self.low, self.high, self.shift)
3069 }
3070 }
3071 impl_display_for_op!(OpShf);
3072
3073 /// Only used on SM50
3074 #[repr(C)]
3075 #[derive(SrcsAsSlice, DstsAsSlice)]
3076 pub struct OpShl {
3077 pub dst: Dst,
3078
3079 #[src_type(GPR)]
3080 pub src: Src,
3081
3082 #[src_type(ALU)]
3083 pub shift: Src,
3084
3085 pub wrap: bool,
3086 }
3087
3088 impl DisplayOp for OpShl {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3089 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3090 write!(f, "shl")?;
3091 if self.wrap {
3092 write!(f, ".w")?;
3093 }
3094 write!(f, " {} {}", self.src, self.shift)
3095 }
3096 }
3097
3098 /// Only used on SM50
3099 #[repr(C)]
3100 #[derive(SrcsAsSlice, DstsAsSlice)]
3101 pub struct OpShr {
3102 pub dst: Dst,
3103
3104 #[src_type(GPR)]
3105 pub src: Src,
3106
3107 #[src_type(ALU)]
3108 pub shift: Src,
3109
3110 pub wrap: bool,
3111 pub signed: bool,
3112 }
3113
3114 impl DisplayOp for OpShr {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3115 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3116 write!(f, "shr")?;
3117 if self.wrap {
3118 write!(f, ".w")?;
3119 }
3120 if !self.signed {
3121 write!(f, ".u32")?;
3122 }
3123 write!(f, " {} {}", self.src, self.shift)
3124 }
3125 }
3126
3127 #[repr(C)]
3128 #[derive(DstsAsSlice)]
3129 pub struct OpF2F {
3130 pub dst: Dst,
3131
3132 pub src: Src,
3133
3134 pub src_type: FloatType,
3135 pub dst_type: FloatType,
3136 pub rnd_mode: FRndMode,
3137 pub ftz: bool,
3138 /// Place the result into the upper 16 bits of the destination register
3139 pub high: bool,
3140 /// Round to the nearest integer rather than nearest float
3141 ///
3142 /// Not available on SM70+
3143 pub integer_rnd: bool,
3144 }
3145
3146 impl SrcsAsSlice for OpF2F {
srcs_as_slice(&self) -> &[Src]3147 fn srcs_as_slice(&self) -> &[Src] {
3148 std::slice::from_ref(&self.src)
3149 }
3150
srcs_as_mut_slice(&mut self) -> &mut [Src]3151 fn srcs_as_mut_slice(&mut self) -> &mut [Src] {
3152 std::slice::from_mut(&mut self.src)
3153 }
3154
src_types(&self) -> SrcTypeList3155 fn src_types(&self) -> SrcTypeList {
3156 let src_type = match self.src_type {
3157 FloatType::F16 => SrcType::ALU,
3158 FloatType::F32 => SrcType::F32,
3159 FloatType::F64 => SrcType::F64,
3160 };
3161 SrcTypeList::Uniform(src_type)
3162 }
3163 }
3164
3165 impl DisplayOp for OpF2F {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3166 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3167 write!(f, "f2f")?;
3168 if self.ftz {
3169 write!(f, ".ftz")?;
3170 }
3171 if self.integer_rnd {
3172 write!(f, ".int")?;
3173 }
3174 write!(
3175 f,
3176 "{}{}{} {}",
3177 self.dst_type, self.src_type, self.rnd_mode, self.src,
3178 )
3179 }
3180 }
3181 impl_display_for_op!(OpF2F);
3182
3183 #[repr(C)]
3184 #[derive(DstsAsSlice)]
3185 pub struct OpF2I {
3186 pub dst: Dst,
3187
3188 pub src: Src,
3189
3190 pub src_type: FloatType,
3191 pub dst_type: IntType,
3192 pub rnd_mode: FRndMode,
3193 pub ftz: bool,
3194 }
3195
3196 impl SrcsAsSlice for OpF2I {
srcs_as_slice(&self) -> &[Src]3197 fn srcs_as_slice(&self) -> &[Src] {
3198 std::slice::from_ref(&self.src)
3199 }
3200
srcs_as_mut_slice(&mut self) -> &mut [Src]3201 fn srcs_as_mut_slice(&mut self) -> &mut [Src] {
3202 std::slice::from_mut(&mut self.src)
3203 }
3204
src_types(&self) -> SrcTypeList3205 fn src_types(&self) -> SrcTypeList {
3206 let src_type = match self.src_type {
3207 FloatType::F16 => SrcType::ALU,
3208 FloatType::F32 => SrcType::F32,
3209 FloatType::F64 => SrcType::F64,
3210 };
3211 SrcTypeList::Uniform(src_type)
3212 }
3213 }
3214
3215 impl DisplayOp for OpF2I {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3216 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3217 let ftz = if self.ftz { ".ftz" } else { "" };
3218 write!(
3219 f,
3220 "f2i{}{}{}{ftz} {}",
3221 self.dst_type, self.src_type, self.rnd_mode, self.src,
3222 )
3223 }
3224 }
3225 impl_display_for_op!(OpF2I);
3226
3227 #[repr(C)]
3228 #[derive(DstsAsSlice)]
3229 pub struct OpI2F {
3230 pub dst: Dst,
3231
3232 pub src: Src,
3233
3234 pub dst_type: FloatType,
3235 pub src_type: IntType,
3236 pub rnd_mode: FRndMode,
3237 }
3238
3239 impl SrcsAsSlice for OpI2F {
srcs_as_slice(&self) -> &[Src]3240 fn srcs_as_slice(&self) -> &[Src] {
3241 std::slice::from_ref(&self.src)
3242 }
3243
srcs_as_mut_slice(&mut self) -> &mut [Src]3244 fn srcs_as_mut_slice(&mut self) -> &mut [Src] {
3245 std::slice::from_mut(&mut self.src)
3246 }
3247
src_types(&self) -> SrcTypeList3248 fn src_types(&self) -> SrcTypeList {
3249 if self.src_type.bits() <= 32 {
3250 SrcTypeList::Uniform(SrcType::ALU)
3251 } else {
3252 SrcTypeList::Uniform(SrcType::GPR)
3253 }
3254 }
3255 }
3256
3257 impl DisplayOp for OpI2F {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3258 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3259 write!(
3260 f,
3261 "i2f{}{}{} {}",
3262 self.dst_type, self.src_type, self.rnd_mode, self.src,
3263 )
3264 }
3265 }
3266 impl_display_for_op!(OpI2F);
3267
3268 /// Not used on SM70+
3269 #[repr(C)]
3270 #[derive(SrcsAsSlice, DstsAsSlice)]
3271 pub struct OpI2I {
3272 pub dst: Dst,
3273
3274 #[src_type(ALU)]
3275 pub src: Src,
3276
3277 pub src_type: IntType,
3278 pub dst_type: IntType,
3279
3280 pub saturate: bool,
3281 pub abs: bool,
3282 pub neg: bool,
3283 }
3284
3285 impl DisplayOp for OpI2I {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3286 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3287 write!(f, "i2i")?;
3288 if self.saturate {
3289 write!(f, ".sat ")?;
3290 }
3291 write!(f, "{}{} {}", self.dst_type, self.src_type, self.src,)?;
3292 if self.abs {
3293 write!(f, ".abs")?;
3294 }
3295 if self.neg {
3296 write!(f, ".neg")?;
3297 }
3298 Ok(())
3299 }
3300 }
3301 impl_display_for_op!(OpI2I);
3302
3303 #[repr(C)]
3304 #[derive(DstsAsSlice)]
3305 pub struct OpFRnd {
3306 pub dst: Dst,
3307
3308 pub src: Src,
3309
3310 pub dst_type: FloatType,
3311 pub src_type: FloatType,
3312 pub rnd_mode: FRndMode,
3313 pub ftz: bool,
3314 }
3315
3316 impl SrcsAsSlice for OpFRnd {
srcs_as_slice(&self) -> &[Src]3317 fn srcs_as_slice(&self) -> &[Src] {
3318 std::slice::from_ref(&self.src)
3319 }
3320
srcs_as_mut_slice(&mut self) -> &mut [Src]3321 fn srcs_as_mut_slice(&mut self) -> &mut [Src] {
3322 std::slice::from_mut(&mut self.src)
3323 }
3324
src_types(&self) -> SrcTypeList3325 fn src_types(&self) -> SrcTypeList {
3326 let src_type = match self.src_type {
3327 FloatType::F16 => SrcType::ALU,
3328 FloatType::F32 => SrcType::F32,
3329 FloatType::F64 => SrcType::F64,
3330 };
3331 SrcTypeList::Uniform(src_type)
3332 }
3333 }
3334
3335 impl DisplayOp for OpFRnd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3336 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3337 let ftz = if self.ftz { ".ftz" } else { "" };
3338 write!(
3339 f,
3340 "frnd{}{}{}{ftz} {}",
3341 self.dst_type, self.src_type, self.rnd_mode, self.src,
3342 )
3343 }
3344 }
3345 impl_display_for_op!(OpFRnd);
3346
3347 #[repr(C)]
3348 #[derive(SrcsAsSlice, DstsAsSlice)]
3349 pub struct OpMov {
3350 pub dst: Dst,
3351
3352 #[src_type(ALU)]
3353 pub src: Src,
3354
3355 pub quad_lanes: u8,
3356 }
3357
3358 impl DisplayOp for OpMov {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3359 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3360 if self.quad_lanes == 0xf {
3361 write!(f, "mov {}", self.src)
3362 } else {
3363 write!(f, "mov[{:#x}] {}", self.quad_lanes, self.src)
3364 }
3365 }
3366 }
3367 impl_display_for_op!(OpMov);
3368
3369 #[allow(dead_code)]
3370 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
3371 pub enum PrmtMode {
3372 Index,
3373 Forward4Extract,
3374 Backward4Extract,
3375 Replicate8,
3376 EdgeClampLeft,
3377 EdgeClampRight,
3378 Replicate16,
3379 }
3380
3381 impl fmt::Display for PrmtMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3382 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3383 match self {
3384 PrmtMode::Index => Ok(()),
3385 PrmtMode::Forward4Extract => write!(f, ".f4e"),
3386 PrmtMode::Backward4Extract => write!(f, ".b4e"),
3387 PrmtMode::Replicate8 => write!(f, ".rc8"),
3388 PrmtMode::EdgeClampLeft => write!(f, ".ecl"),
3389 PrmtMode::EdgeClampRight => write!(f, ".ecl"),
3390 PrmtMode::Replicate16 => write!(f, ".rc16"),
3391 }
3392 }
3393 }
3394
3395 #[repr(C)]
3396 #[derive(SrcsAsSlice, DstsAsSlice)]
3397 /// Permutes `srcs` into `dst` using `selection`.
3398 pub struct OpPrmt {
3399 pub dst: Dst,
3400
3401 #[src_type(ALU)]
3402 pub srcs: [Src; 2],
3403
3404 #[src_type(ALU)]
3405 pub sel: Src,
3406
3407 pub mode: PrmtMode,
3408 }
3409
3410 impl DisplayOp for OpPrmt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3411 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3412 write!(
3413 f,
3414 "prmt{} {} [{}] {}",
3415 self.mode, self.srcs[0], self.sel, self.srcs[1],
3416 )
3417 }
3418 }
3419 impl_display_for_op!(OpPrmt);
3420
3421 #[repr(C)]
3422 #[derive(SrcsAsSlice, DstsAsSlice)]
3423 pub struct OpSel {
3424 pub dst: Dst,
3425
3426 #[src_type(Pred)]
3427 pub cond: Src,
3428
3429 #[src_type(ALU)]
3430 pub srcs: [Src; 2],
3431 }
3432
3433 impl DisplayOp for OpSel {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3434 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3435 write!(f, "sel {} {} {}", self.cond, self.srcs[0], self.srcs[1],)
3436 }
3437 }
3438 impl_display_for_op!(OpSel);
3439
3440 #[repr(C)]
3441 #[derive(SrcsAsSlice, DstsAsSlice)]
3442 pub struct OpShfl {
3443 pub dst: Dst,
3444 pub in_bounds: Dst,
3445
3446 #[src_type(SSA)]
3447 pub src: Src,
3448
3449 #[src_type(ALU)]
3450 pub lane: Src,
3451
3452 #[src_type(ALU)]
3453 pub c: Src,
3454
3455 pub op: ShflOp,
3456 }
3457
3458 impl DisplayOp for OpShfl {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3459 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3460 write!(f, "shfl.{} {} {} {}", self.op, self.src, self.lane, self.c)
3461 }
3462 }
3463 impl_display_for_op!(OpShfl);
3464
3465 #[repr(C)]
3466 #[derive(SrcsAsSlice, DstsAsSlice)]
3467 pub struct OpPLop3 {
3468 pub dsts: [Dst; 2],
3469
3470 #[src_type(Pred)]
3471 pub srcs: [Src; 3],
3472
3473 pub ops: [LogicOp3; 2],
3474 }
3475
3476 impl DisplayOp for OpPLop3 {
fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3477 fn fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3478 write!(f, "{} {}", self.dsts[0], self.dsts[1])
3479 }
3480
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3481 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3482 write!(
3483 f,
3484 "plop3 {} {} {} {} {}",
3485 self.srcs[0], self.srcs[1], self.srcs[2], self.ops[0], self.ops[1],
3486 )
3487 }
3488 }
3489 impl_display_for_op!(OpPLop3);
3490
3491 #[repr(C)]
3492 #[derive(SrcsAsSlice, DstsAsSlice)]
3493 pub struct OpPSetP {
3494 pub dsts: [Dst; 2],
3495
3496 pub ops: [PredSetOp; 2],
3497
3498 #[src_type(Pred)]
3499 pub srcs: [Src; 3],
3500 }
3501
3502 impl DisplayOp for OpPSetP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3503 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3504 write!(
3505 f,
3506 "psetp{}{} {} {} {}",
3507 self.ops[0], self.ops[1], self.srcs[0], self.srcs[1], self.srcs[2],
3508 )
3509 }
3510 }
3511
3512 #[repr(C)]
3513 #[derive(SrcsAsSlice, DstsAsSlice)]
3514 pub struct OpPopC {
3515 pub dst: Dst,
3516
3517 #[src_type(ALU)]
3518 pub src: Src,
3519 }
3520
3521 impl DisplayOp for OpPopC {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3522 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3523 write!(f, "popc {}", self.src,)
3524 }
3525 }
3526 impl_display_for_op!(OpPopC);
3527
3528 #[repr(C)]
3529 #[derive(SrcsAsSlice, DstsAsSlice)]
3530 pub struct OpTex {
3531 pub dsts: [Dst; 2],
3532 pub resident: Dst,
3533
3534 #[src_type(SSA)]
3535 pub srcs: [Src; 2],
3536
3537 pub dim: TexDim,
3538 pub lod_mode: TexLodMode,
3539 pub z_cmpr: bool,
3540 pub offset: bool,
3541 pub mask: u8,
3542 }
3543
3544 impl DisplayOp for OpTex {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3545 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3546 write!(f, "tex.b{}", self.dim)?;
3547 if self.lod_mode != TexLodMode::Auto {
3548 write!(f, ".{}", self.lod_mode)?;
3549 }
3550 if self.offset {
3551 write!(f, ".aoffi")?;
3552 }
3553 if self.z_cmpr {
3554 write!(f, ".dc")?;
3555 }
3556 write!(f, " {} {}", self.srcs[0], self.srcs[1])
3557 }
3558 }
3559 impl_display_for_op!(OpTex);
3560
3561 #[repr(C)]
3562 #[derive(SrcsAsSlice, DstsAsSlice)]
3563 pub struct OpTld {
3564 pub dsts: [Dst; 2],
3565 pub resident: Dst,
3566
3567 #[src_type(SSA)]
3568 pub srcs: [Src; 2],
3569
3570 pub dim: TexDim,
3571 pub is_ms: bool,
3572 pub lod_mode: TexLodMode,
3573 pub offset: bool,
3574 pub mask: u8,
3575 }
3576
3577 impl DisplayOp for OpTld {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3578 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3579 write!(f, "tld.b{}", self.dim)?;
3580 if self.lod_mode != TexLodMode::Auto {
3581 write!(f, ".{}", self.lod_mode)?;
3582 }
3583 if self.offset {
3584 write!(f, ".aoffi")?;
3585 }
3586 if self.is_ms {
3587 write!(f, ".ms")?;
3588 }
3589 write!(f, " {} {}", self.srcs[0], self.srcs[1])
3590 }
3591 }
3592 impl_display_for_op!(OpTld);
3593
3594 #[repr(C)]
3595 #[derive(SrcsAsSlice, DstsAsSlice)]
3596 pub struct OpTld4 {
3597 pub dsts: [Dst; 2],
3598 pub resident: Dst,
3599
3600 #[src_type(SSA)]
3601 pub srcs: [Src; 2],
3602
3603 pub dim: TexDim,
3604 pub comp: u8,
3605 pub offset_mode: Tld4OffsetMode,
3606 pub z_cmpr: bool,
3607 pub mask: u8,
3608 }
3609
3610 impl DisplayOp for OpTld4 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3611 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3612 write!(f, "tld4.g.b{}", self.dim)?;
3613 if self.offset_mode != Tld4OffsetMode::None {
3614 write!(f, ".{}", self.offset_mode)?;
3615 }
3616 write!(f, " {} {}", self.srcs[0], self.srcs[1])
3617 }
3618 }
3619 impl_display_for_op!(OpTld4);
3620
3621 #[repr(C)]
3622 #[derive(SrcsAsSlice, DstsAsSlice)]
3623 pub struct OpTmml {
3624 pub dsts: [Dst; 2],
3625
3626 #[src_type(SSA)]
3627 pub srcs: [Src; 2],
3628
3629 pub dim: TexDim,
3630 pub mask: u8,
3631 }
3632
3633 impl DisplayOp for OpTmml {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3634 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3635 write!(
3636 f,
3637 "tmml.b.lod{} {} {}",
3638 self.dim, self.srcs[0], self.srcs[1]
3639 )
3640 }
3641 }
3642 impl_display_for_op!(OpTmml);
3643
3644 #[repr(C)]
3645 #[derive(SrcsAsSlice, DstsAsSlice)]
3646 pub struct OpTxd {
3647 pub dsts: [Dst; 2],
3648 pub resident: Dst,
3649
3650 #[src_type(SSA)]
3651 pub srcs: [Src; 2],
3652
3653 pub dim: TexDim,
3654 pub offset: bool,
3655 pub mask: u8,
3656 }
3657
3658 impl DisplayOp for OpTxd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3659 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3660 write!(f, "txd.b{}", self.dim)?;
3661 if self.offset {
3662 write!(f, ".aoffi")?;
3663 }
3664 write!(f, " {} {}", self.srcs[0], self.srcs[1])
3665 }
3666 }
3667 impl_display_for_op!(OpTxd);
3668
3669 #[repr(C)]
3670 #[derive(SrcsAsSlice, DstsAsSlice)]
3671 pub struct OpTxq {
3672 pub dsts: [Dst; 2],
3673
3674 #[src_type(SSA)]
3675 pub src: Src,
3676
3677 pub query: TexQuery,
3678 pub mask: u8,
3679 }
3680
3681 impl DisplayOp for OpTxq {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3682 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3683 write!(f, "txq.b {} {}", self.src, self.query)
3684 }
3685 }
3686 impl_display_for_op!(OpTxq);
3687
3688 #[repr(C)]
3689 #[derive(SrcsAsSlice, DstsAsSlice)]
3690 pub struct OpSuLd {
3691 pub dst: Dst,
3692 pub resident: Dst,
3693
3694 pub image_dim: ImageDim,
3695 pub mem_order: MemOrder,
3696 pub mem_eviction_priority: MemEvictionPriority,
3697 pub mask: u8,
3698
3699 #[src_type(GPR)]
3700 pub handle: Src,
3701
3702 #[src_type(SSA)]
3703 pub coord: Src,
3704 }
3705
3706 impl DisplayOp for OpSuLd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3707 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3708 write!(
3709 f,
3710 "suld.p{}{}{} [{}] {}",
3711 self.image_dim,
3712 self.mem_order,
3713 self.mem_eviction_priority,
3714 self.coord,
3715 self.handle,
3716 )
3717 }
3718 }
3719 impl_display_for_op!(OpSuLd);
3720
3721 #[repr(C)]
3722 #[derive(SrcsAsSlice, DstsAsSlice)]
3723 pub struct OpSuSt {
3724 pub image_dim: ImageDim,
3725 pub mem_order: MemOrder,
3726 pub mem_eviction_priority: MemEvictionPriority,
3727 pub mask: u8,
3728
3729 #[src_type(GPR)]
3730 pub handle: Src,
3731
3732 #[src_type(SSA)]
3733 pub coord: Src,
3734
3735 #[src_type(SSA)]
3736 pub data: Src,
3737 }
3738
3739 impl DisplayOp for OpSuSt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3740 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3741 write!(
3742 f,
3743 "sust.p{}{}{} [{}] {} {}",
3744 self.image_dim,
3745 self.mem_order,
3746 self.mem_eviction_priority,
3747 self.coord,
3748 self.data,
3749 self.handle,
3750 )
3751 }
3752 }
3753 impl_display_for_op!(OpSuSt);
3754
3755 #[repr(C)]
3756 #[derive(SrcsAsSlice, DstsAsSlice)]
3757 pub struct OpSuAtom {
3758 pub dst: Dst,
3759 pub resident: Dst,
3760
3761 pub image_dim: ImageDim,
3762
3763 pub atom_op: AtomOp,
3764 pub atom_type: AtomType,
3765
3766 pub mem_order: MemOrder,
3767 pub mem_eviction_priority: MemEvictionPriority,
3768
3769 #[src_type(GPR)]
3770 pub handle: Src,
3771
3772 #[src_type(SSA)]
3773 pub coord: Src,
3774
3775 #[src_type(SSA)]
3776 pub data: Src,
3777 }
3778
3779 impl DisplayOp for OpSuAtom {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3780 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3781 write!(
3782 f,
3783 "suatom.p{}{}{}{}{} [{}] {} {}",
3784 self.image_dim,
3785 self.atom_op,
3786 self.atom_type,
3787 self.mem_order,
3788 self.mem_eviction_priority,
3789 self.coord,
3790 self.data,
3791 self.handle,
3792 )
3793 }
3794 }
3795 impl_display_for_op!(OpSuAtom);
3796
3797 #[repr(C)]
3798 #[derive(SrcsAsSlice, DstsAsSlice)]
3799 pub struct OpLd {
3800 pub dst: Dst,
3801
3802 #[src_type(GPR)]
3803 pub addr: Src,
3804
3805 pub offset: i32,
3806 pub access: MemAccess,
3807 }
3808
3809 impl DisplayOp for OpLd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3810 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3811 write!(f, "ld{} [{}", self.access, self.addr)?;
3812 if self.offset > 0 {
3813 write!(f, "+{:#x}", self.offset)?;
3814 }
3815 write!(f, "]")
3816 }
3817 }
3818 impl_display_for_op!(OpLd);
3819
3820 #[repr(C)]
3821 #[derive(SrcsAsSlice, DstsAsSlice)]
3822 pub struct OpLdc {
3823 pub dst: Dst,
3824
3825 #[src_type(ALU)]
3826 pub cb: Src,
3827
3828 #[src_type(GPR)]
3829 pub offset: Src,
3830
3831 pub mem_type: MemType,
3832 }
3833
3834 impl DisplayOp for OpLdc {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3835 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3836 let SrcRef::CBuf(cb) = self.cb.src_ref else {
3837 panic!("Not a cbuf");
3838 };
3839 write!(f, "ldc{} {}[", self.mem_type, cb.buf)?;
3840 if self.offset.is_zero() {
3841 write!(f, "+{:#x}", cb.offset)?;
3842 } else if cb.offset == 0 {
3843 write!(f, "{}", self.offset)?;
3844 } else {
3845 write!(f, "{}+{:#x}", self.offset, cb.offset)?;
3846 }
3847 write!(f, "]")
3848 }
3849 }
3850 impl_display_for_op!(OpLdc);
3851
3852 #[repr(C)]
3853 #[derive(SrcsAsSlice, DstsAsSlice)]
3854 pub struct OpSt {
3855 #[src_type(GPR)]
3856 pub addr: Src,
3857
3858 #[src_type(SSA)]
3859 pub data: Src,
3860
3861 pub offset: i32,
3862 pub access: MemAccess,
3863 }
3864
3865 impl DisplayOp for OpSt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3866 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3867 write!(f, "st{} [{}", self.access, self.addr)?;
3868 if self.offset > 0 {
3869 write!(f, "+{:#x}", self.offset)?;
3870 }
3871 write!(f, "] {}", self.data)
3872 }
3873 }
3874 impl_display_for_op!(OpSt);
3875
3876 #[repr(C)]
3877 #[derive(SrcsAsSlice, DstsAsSlice)]
3878 pub struct OpAtom {
3879 pub dst: Dst,
3880
3881 #[src_type(GPR)]
3882 pub addr: Src,
3883
3884 #[src_type(GPR)]
3885 pub cmpr: Src,
3886
3887 #[src_type(SSA)]
3888 pub data: Src,
3889
3890 pub atom_op: AtomOp,
3891 pub atom_type: AtomType,
3892
3893 pub addr_offset: i32,
3894
3895 pub mem_space: MemSpace,
3896 pub mem_order: MemOrder,
3897 pub mem_eviction_priority: MemEvictionPriority,
3898 }
3899
3900 impl DisplayOp for OpAtom {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3901 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3902 write!(
3903 f,
3904 "atom{}{}{}{}{}",
3905 self.atom_op,
3906 self.atom_type,
3907 self.mem_space,
3908 self.mem_order,
3909 self.mem_eviction_priority,
3910 )?;
3911 write!(f, " [")?;
3912 if !self.addr.is_zero() {
3913 write!(f, "{}", self.addr)?;
3914 }
3915 if self.addr_offset > 0 {
3916 if !self.addr.is_zero() {
3917 write!(f, "+")?;
3918 }
3919 write!(f, "{:#x}", self.addr_offset)?;
3920 }
3921 write!(f, "] {}", self.data)
3922 }
3923 }
3924 impl_display_for_op!(OpAtom);
3925
3926 #[repr(C)]
3927 #[derive(SrcsAsSlice, DstsAsSlice)]
3928 pub struct OpAL2P {
3929 pub dst: Dst,
3930
3931 #[src_type(GPR)]
3932 pub offset: Src,
3933
3934 pub access: AttrAccess,
3935 }
3936
3937 impl DisplayOp for OpAL2P {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3938 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3939 write!(f, "al2p")?;
3940 if self.access.output {
3941 write!(f, ".o")?;
3942 }
3943 if self.access.patch {
3944 write!(f, ".p")?;
3945 }
3946 write!(f, " a[{:#x}", self.access.addr)?;
3947 if !self.offset.is_zero() {
3948 write!(f, "+{}", self.offset)?;
3949 }
3950 write!(f, "]")
3951 }
3952 }
3953 impl_display_for_op!(OpAL2P);
3954
3955 #[repr(C)]
3956 #[derive(SrcsAsSlice, DstsAsSlice)]
3957 pub struct OpALd {
3958 pub dst: Dst,
3959
3960 #[src_type(GPR)]
3961 pub vtx: Src,
3962
3963 #[src_type(GPR)]
3964 pub offset: Src,
3965
3966 pub access: AttrAccess,
3967 }
3968
3969 impl DisplayOp for OpALd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3970 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3971 write!(f, "ald")?;
3972 if self.access.output {
3973 write!(f, ".o")?;
3974 }
3975 if self.access.patch {
3976 write!(f, ".p")?;
3977 }
3978 if self.access.phys {
3979 write!(f, ".phys")?;
3980 }
3981 write!(f, " a")?;
3982 if !self.vtx.is_zero() {
3983 write!(f, "[{}]", self.vtx)?;
3984 }
3985 write!(f, "[{:#x}", self.access.addr)?;
3986 if !self.offset.is_zero() {
3987 write!(f, "+{}", self.offset)?;
3988 }
3989 write!(f, "]")
3990 }
3991 }
3992 impl_display_for_op!(OpALd);
3993
3994 #[repr(C)]
3995 #[derive(SrcsAsSlice, DstsAsSlice)]
3996 pub struct OpASt {
3997 #[src_type(GPR)]
3998 pub vtx: Src,
3999
4000 #[src_type(GPR)]
4001 pub offset: Src,
4002
4003 #[src_type(SSA)]
4004 pub data: Src,
4005
4006 pub access: AttrAccess,
4007 }
4008
4009 impl DisplayOp for OpASt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4010 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4011 write!(f, "ast")?;
4012 if self.access.patch {
4013 write!(f, ".p")?;
4014 }
4015 if self.access.phys {
4016 write!(f, ".phys")?;
4017 }
4018 write!(f, " a")?;
4019 if !self.vtx.is_zero() {
4020 write!(f, "[{}]", self.vtx)?;
4021 }
4022 write!(f, "[{:#x}", self.access.addr)?;
4023 if !self.offset.is_zero() {
4024 write!(f, "+{}", self.offset)?;
4025 }
4026 write!(f, "] {}", self.data)
4027 }
4028 }
4029 impl_display_for_op!(OpASt);
4030
4031 #[repr(C)]
4032 #[derive(SrcsAsSlice, DstsAsSlice)]
4033 pub struct OpIpa {
4034 pub dst: Dst,
4035 pub addr: u16,
4036 pub freq: InterpFreq,
4037 pub loc: InterpLoc,
4038 pub inv_w: Src,
4039 pub offset: Src,
4040 }
4041
4042 impl DisplayOp for OpIpa {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4043 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4044 write!(f, "ipa")?;
4045 match self.freq {
4046 InterpFreq::Pass => write!(f, ".pass")?,
4047 InterpFreq::PassMulW => write!(f, ".pass_mul_w")?,
4048 InterpFreq::Constant => write!(f, ".constant")?,
4049 InterpFreq::State => write!(f, ".state")?,
4050 }
4051 match self.loc {
4052 InterpLoc::Default => (),
4053 InterpLoc::Centroid => write!(f, ".centroid")?,
4054 InterpLoc::Offset => write!(f, ".offset")?,
4055 }
4056
4057 write!(f, " {} a[{:#x}] {}", self.dst, self.addr, self.inv_w)?;
4058 if self.loc == InterpLoc::Offset {
4059 write!(f, " {}", self.offset)?;
4060 }
4061 Ok(())
4062 }
4063 }
4064 impl_display_for_op!(OpIpa);
4065
4066 #[repr(C)]
4067 #[derive(SrcsAsSlice, DstsAsSlice)]
4068 pub struct OpLdTram {
4069 pub dst: Dst,
4070 pub addr: u16,
4071 pub use_c: bool,
4072 }
4073
4074 impl DisplayOp for OpLdTram {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4075 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4076 write!(f, "ldtram")?;
4077 if self.use_c {
4078 write!(f, ".c")?;
4079 } else {
4080 write!(f, ".ab")?;
4081 }
4082 write!(f, " a[{:#x}]", self.addr)?;
4083 Ok(())
4084 }
4085 }
4086 impl_display_for_op!(OpLdTram);
4087
4088 #[allow(dead_code)]
4089 pub enum CCtlOp {
4090 PF1,
4091 PF2,
4092 WB,
4093 IV,
4094 IVAll,
4095 RS,
4096 IVAllP,
4097 WBAll,
4098 WBAllP,
4099 }
4100
4101 impl CCtlOp {
is_all(&self) -> bool4102 pub fn is_all(&self) -> bool {
4103 match self {
4104 CCtlOp::PF1
4105 | CCtlOp::PF2
4106 | CCtlOp::WB
4107 | CCtlOp::IV
4108 | CCtlOp::RS => false,
4109 CCtlOp::IVAll | CCtlOp::IVAllP | CCtlOp::WBAll | CCtlOp::WBAllP => {
4110 true
4111 }
4112 }
4113 }
4114 }
4115
4116 impl fmt::Display for CCtlOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4117 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4118 match self {
4119 CCtlOp::PF1 => write!(f, "pf1"),
4120 CCtlOp::PF2 => write!(f, "pf2"),
4121 CCtlOp::WB => write!(f, "wb"),
4122 CCtlOp::IV => write!(f, "iv"),
4123 CCtlOp::IVAll => write!(f, "ivall"),
4124 CCtlOp::RS => write!(f, "rs"),
4125 CCtlOp::IVAllP => write!(f, "ivallp"),
4126 CCtlOp::WBAll => write!(f, "wball"),
4127 CCtlOp::WBAllP => write!(f, "wballp"),
4128 }
4129 }
4130 }
4131
4132 #[repr(C)]
4133 #[derive(SrcsAsSlice, DstsAsSlice)]
4134 pub struct OpCCtl {
4135 pub op: CCtlOp,
4136
4137 pub mem_space: MemSpace,
4138
4139 #[src_type(GPR)]
4140 pub addr: Src,
4141
4142 pub addr_offset: i32,
4143 }
4144
4145 impl DisplayOp for OpCCtl {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4146 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4147 write!(f, "cctl{}", self.mem_space)?;
4148 if !self.op.is_all() {
4149 write!(f, " [{}", self.addr)?;
4150 if self.addr_offset > 0 {
4151 write!(f, "+{:#x}", self.addr_offset)?;
4152 }
4153 write!(f, "]")?;
4154 }
4155 Ok(())
4156 }
4157 }
4158 impl_display_for_op!(OpCCtl);
4159
4160 #[repr(C)]
4161 #[derive(SrcsAsSlice, DstsAsSlice)]
4162 pub struct OpMemBar {
4163 pub scope: MemScope,
4164 }
4165
4166 impl DisplayOp for OpMemBar {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4167 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4168 write!(f, "membar.sc.{}", self.scope)
4169 }
4170 }
4171 impl_display_for_op!(OpMemBar);
4172
4173 #[repr(C)]
4174 #[derive(SrcsAsSlice, DstsAsSlice)]
4175 pub struct OpBClear {
4176 pub dst: Dst,
4177 }
4178
4179 impl DisplayOp for OpBClear {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4180 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4181 write!(f, "bclear")
4182 }
4183 }
4184 impl_display_for_op!(OpBClear);
4185
4186 #[repr(C)]
4187 #[derive(SrcsAsSlice, DstsAsSlice)]
4188 pub struct OpBMov {
4189 pub dst: Dst,
4190 pub src: Src,
4191 pub clear: bool,
4192 }
4193
4194 impl DisplayOp for OpBMov {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4195 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4196 write!(f, "bmov.32")?;
4197 if self.clear {
4198 write!(f, ".clear")?;
4199 }
4200 write!(f, " {}", self.src)
4201 }
4202 }
4203 impl_display_for_op!(OpBMov);
4204
4205 #[repr(C)]
4206 #[derive(SrcsAsSlice, DstsAsSlice)]
4207 pub struct OpBreak {
4208 pub bar_out: Dst,
4209
4210 #[src_type(Bar)]
4211 pub bar_in: Src,
4212
4213 #[src_type(Pred)]
4214 pub cond: Src,
4215 }
4216
4217 impl DisplayOp for OpBreak {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4218 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4219 write!(f, "break {} {}", self.bar_in, self.cond)
4220 }
4221 }
4222 impl_display_for_op!(OpBreak);
4223
4224 #[repr(C)]
4225 #[derive(SrcsAsSlice, DstsAsSlice)]
4226 pub struct OpBSSy {
4227 pub bar_out: Dst,
4228
4229 #[src_type(Pred)]
4230 pub bar_in: Src,
4231
4232 #[src_type(Pred)]
4233 pub cond: Src,
4234
4235 pub target: Label,
4236 }
4237
4238 impl DisplayOp for OpBSSy {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4239 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4240 write!(f, "bssy {} {} {}", self.bar_in, self.cond, self.target)
4241 }
4242 }
4243 impl_display_for_op!(OpBSSy);
4244
4245 #[repr(C)]
4246 #[derive(SrcsAsSlice, DstsAsSlice)]
4247 pub struct OpBSync {
4248 #[src_type(Bar)]
4249 pub bar: Src,
4250
4251 #[src_type(Pred)]
4252 pub cond: Src,
4253 }
4254
4255 impl DisplayOp for OpBSync {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4256 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4257 write!(f, "bsync {} {}", self.bar, self.cond)
4258 }
4259 }
4260 impl_display_for_op!(OpBSync);
4261
4262 #[repr(C)]
4263 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
4264 pub struct OpBra {
4265 pub target: Label,
4266 }
4267
4268 impl DisplayOp for OpBra {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4269 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4270 write!(f, "bra {}", self.target)
4271 }
4272 }
4273 impl_display_for_op!(OpBra);
4274
4275 #[repr(C)]
4276 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
4277 pub struct OpExit {}
4278
4279 impl DisplayOp for OpExit {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4280 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4281 write!(f, "exit")
4282 }
4283 }
4284 impl_display_for_op!(OpExit);
4285
4286 #[repr(C)]
4287 #[derive(SrcsAsSlice, DstsAsSlice)]
4288 pub struct OpWarpSync {
4289 pub mask: u32,
4290 }
4291
4292 impl DisplayOp for OpWarpSync {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4293 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4294 write!(f, "warpsync 0x{:x}", self.mask)
4295 }
4296 }
4297 impl_display_for_op!(OpWarpSync);
4298
4299 #[repr(C)]
4300 #[derive(SrcsAsSlice, DstsAsSlice)]
4301 pub struct OpBar {}
4302
4303 impl DisplayOp for OpBar {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4304 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4305 write!(f, "bar.sync")
4306 }
4307 }
4308 impl_display_for_op!(OpBar);
4309
4310 #[repr(C)]
4311 #[derive(SrcsAsSlice, DstsAsSlice)]
4312 pub struct OpCS2R {
4313 pub dst: Dst,
4314 pub idx: u8,
4315 }
4316
4317 impl DisplayOp for OpCS2R {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4318 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4319 write!(f, "cs2r sr[{:#x}]", self.idx)
4320 }
4321 }
4322 impl_display_for_op!(OpCS2R);
4323
4324 #[repr(C)]
4325 #[derive(SrcsAsSlice, DstsAsSlice)]
4326 pub struct OpIsberd {
4327 pub dst: Dst,
4328
4329 #[src_type(SSA)]
4330 pub idx: Src,
4331 }
4332
4333 impl DisplayOp for OpIsberd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4334 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4335 write!(f, "isberd {} [{}]", self.dst, self.idx)
4336 }
4337 }
4338 impl_display_for_op!(OpIsberd);
4339
4340 #[repr(C)]
4341 #[derive(SrcsAsSlice, DstsAsSlice)]
4342 pub struct OpKill {}
4343
4344 impl DisplayOp for OpKill {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4345 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4346 write!(f, "kill")
4347 }
4348 }
4349 impl_display_for_op!(OpKill);
4350
4351 #[repr(C)]
4352 #[derive(SrcsAsSlice, DstsAsSlice)]
4353 pub struct OpNop {
4354 pub label: Option<Label>,
4355 }
4356
4357 impl DisplayOp for OpNop {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4358 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4359 write!(f, "nop")?;
4360 if let Some(label) = &self.label {
4361 write!(f, " {}", label)?;
4362 }
4363 Ok(())
4364 }
4365 }
4366 impl_display_for_op!(OpNop);
4367
4368 #[allow(dead_code)]
4369 pub enum PixVal {
4370 MsCount,
4371 CovMask,
4372 CentroidOffset,
4373 MyIndex,
4374 InnerCoverage,
4375 }
4376
4377 #[repr(C)]
4378 #[derive(SrcsAsSlice, DstsAsSlice)]
4379 pub struct OpPixLd {
4380 pub dst: Dst,
4381 pub val: PixVal,
4382 }
4383
4384 impl DisplayOp for OpPixLd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4385 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4386 write!(f, "pixld")?;
4387 match self.val {
4388 PixVal::MsCount => write!(f, ".mscount"),
4389 PixVal::CovMask => write!(f, ".covmask"),
4390 PixVal::CentroidOffset => write!(f, ".centroid_offset"),
4391 PixVal::MyIndex => write!(f, ".my_index"),
4392 PixVal::InnerCoverage => write!(f, ".inner_coverage"),
4393 }
4394 }
4395 }
4396 impl_display_for_op!(OpPixLd);
4397
4398 #[repr(C)]
4399 #[derive(SrcsAsSlice, DstsAsSlice)]
4400 pub struct OpS2R {
4401 pub dst: Dst,
4402 pub idx: u8,
4403 }
4404
4405 impl DisplayOp for OpS2R {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4406 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4407 write!(f, "s2r sr[{:#x}]", self.idx)
4408 }
4409 }
4410 impl_display_for_op!(OpS2R);
4411
4412 pub enum VoteOp {
4413 Any,
4414 All,
4415 Eq,
4416 }
4417
4418 impl fmt::Display for VoteOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4419 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4420 match self {
4421 VoteOp::Any => write!(f, "any"),
4422 VoteOp::All => write!(f, "all"),
4423 VoteOp::Eq => write!(f, "eq"),
4424 }
4425 }
4426 }
4427
4428 #[repr(C)]
4429 #[derive(SrcsAsSlice, DstsAsSlice)]
4430 pub struct OpVote {
4431 pub op: VoteOp,
4432
4433 pub ballot: Dst,
4434 pub vote: Dst,
4435
4436 #[src_type(Pred)]
4437 pub pred: Src,
4438 }
4439
4440 impl DisplayOp for OpVote {
fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4441 fn fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4442 if self.ballot.is_none() && self.vote.is_none() {
4443 write!(f, "none")
4444 } else {
4445 if !self.ballot.is_none() {
4446 write!(f, "{}", self.ballot)?;
4447 }
4448 if !self.vote.is_none() {
4449 write!(f, "{}", self.vote)?;
4450 }
4451 Ok(())
4452 }
4453 }
4454
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4455 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4456 write!(f, "vote.{} {}", self.op, self.pred)
4457 }
4458 }
4459 impl_display_for_op!(OpVote);
4460
4461 #[repr(C)]
4462 #[derive(SrcsAsSlice, DstsAsSlice)]
4463 pub struct OpUndef {
4464 pub dst: Dst,
4465 }
4466
4467 impl DisplayOp for OpUndef {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4468 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4469 write!(f, "undef {}", self.dst)
4470 }
4471 }
4472 impl_display_for_op!(OpUndef);
4473
4474 #[repr(C)]
4475 #[derive(SrcsAsSlice, DstsAsSlice)]
4476 pub struct OpSrcBar {
4477 pub src: Src,
4478 }
4479
4480 impl DisplayOp for OpSrcBar {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4481 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4482 write!(f, "src_bar {}", self.src)
4483 }
4484 }
4485 impl_display_for_op!(OpSrcBar);
4486
4487 pub struct VecPair<A, B> {
4488 a: Vec<A>,
4489 b: Vec<B>,
4490 }
4491
4492 impl<A, B> VecPair<A, B> {
append(&mut self, other: &mut VecPair<A, B>)4493 pub fn append(&mut self, other: &mut VecPair<A, B>) {
4494 self.a.append(&mut other.a);
4495 self.b.append(&mut other.b);
4496 }
4497
is_empty(&self) -> bool4498 pub fn is_empty(&self) -> bool {
4499 debug_assert!(self.a.len() == self.b.len());
4500 self.a.is_empty()
4501 }
4502
iter(&self) -> Zip<slice::Iter<'_, A>, slice::Iter<'_, B>>4503 pub fn iter(&self) -> Zip<slice::Iter<'_, A>, slice::Iter<'_, B>> {
4504 debug_assert!(self.a.len() == self.b.len());
4505 self.a.iter().zip(self.b.iter())
4506 }
4507
iter_mut( &mut self, ) -> Zip<slice::IterMut<'_, A>, slice::IterMut<'_, B>>4508 pub fn iter_mut(
4509 &mut self,
4510 ) -> Zip<slice::IterMut<'_, A>, slice::IterMut<'_, B>> {
4511 debug_assert!(self.a.len() == self.b.len());
4512 self.a.iter_mut().zip(self.b.iter_mut())
4513 }
4514
len(&self) -> usize4515 pub fn len(&self) -> usize {
4516 debug_assert!(self.a.len() == self.b.len());
4517 self.a.len()
4518 }
4519
new() -> Self4520 pub fn new() -> Self {
4521 Self {
4522 a: Vec::new(),
4523 b: Vec::new(),
4524 }
4525 }
4526
push(&mut self, a: A, b: B)4527 pub fn push(&mut self, a: A, b: B) {
4528 debug_assert!(self.a.len() == self.b.len());
4529 self.a.push(a);
4530 self.b.push(b);
4531 }
4532 }
4533
4534 impl<A: Clone, B: Clone> VecPair<A, B> {
retain(&mut self, mut f: impl FnMut(&A, &B) -> bool)4535 pub fn retain(&mut self, mut f: impl FnMut(&A, &B) -> bool) {
4536 debug_assert!(self.a.len() == self.b.len());
4537 let len = self.a.len();
4538 let mut i = 0_usize;
4539 while i < len {
4540 if !f(&self.a[i], &self.b[i]) {
4541 break;
4542 }
4543 i += 1;
4544 }
4545
4546 let mut new_len = i;
4547
4548 // Don't check this one twice.
4549 i += 1;
4550
4551 while i < len {
4552 // This could be more efficient but it's good enough for our
4553 // purposes since everything we're storing is small and has a
4554 // trivial Drop.
4555 if f(&self.a[i], &self.b[i]) {
4556 self.a[new_len] = self.a[i].clone();
4557 self.b[new_len] = self.b[i].clone();
4558 new_len += 1;
4559 }
4560 i += 1;
4561 }
4562
4563 if new_len < len {
4564 self.a.truncate(new_len);
4565 self.b.truncate(new_len);
4566 }
4567 }
4568 }
4569
4570 pub struct PhiAllocator {
4571 count: u32,
4572 }
4573
4574 impl PhiAllocator {
new() -> PhiAllocator4575 pub fn new() -> PhiAllocator {
4576 PhiAllocator { count: 0 }
4577 }
4578
alloc(&mut self) -> u324579 pub fn alloc(&mut self) -> u32 {
4580 let idx = self.count;
4581 self.count = idx + 1;
4582 idx
4583 }
4584 }
4585
4586 #[repr(C)]
4587 #[derive(DstsAsSlice)]
4588 pub struct OpPhiSrcs {
4589 pub srcs: VecPair<u32, Src>,
4590 }
4591
4592 impl OpPhiSrcs {
new() -> OpPhiSrcs4593 pub fn new() -> OpPhiSrcs {
4594 OpPhiSrcs {
4595 srcs: VecPair::new(),
4596 }
4597 }
4598 }
4599
4600 impl SrcsAsSlice for OpPhiSrcs {
srcs_as_slice(&self) -> &[Src]4601 fn srcs_as_slice(&self) -> &[Src] {
4602 &self.srcs.b
4603 }
4604
srcs_as_mut_slice(&mut self) -> &mut [Src]4605 fn srcs_as_mut_slice(&mut self) -> &mut [Src] {
4606 &mut self.srcs.b
4607 }
4608
src_types(&self) -> SrcTypeList4609 fn src_types(&self) -> SrcTypeList {
4610 SrcTypeList::Uniform(SrcType::GPR)
4611 }
4612 }
4613
4614 impl DisplayOp for OpPhiSrcs {
fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result4615 fn fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
4616 Ok(())
4617 }
4618
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4619 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4620 write!(f, "phi_src ")?;
4621 for (i, (id, src)) in self.srcs.iter().enumerate() {
4622 if i > 0 {
4623 write!(f, ", ")?;
4624 }
4625 write!(f, "φ{} = {}", id, src)?;
4626 }
4627 Ok(())
4628 }
4629 }
4630 impl_display_for_op!(OpPhiSrcs);
4631
4632 #[repr(C)]
4633 #[derive(SrcsAsSlice)]
4634 pub struct OpPhiDsts {
4635 pub dsts: VecPair<u32, Dst>,
4636 }
4637
4638 impl OpPhiDsts {
new() -> OpPhiDsts4639 pub fn new() -> OpPhiDsts {
4640 OpPhiDsts {
4641 dsts: VecPair::new(),
4642 }
4643 }
4644 }
4645
4646 impl DstsAsSlice for OpPhiDsts {
dsts_as_slice(&self) -> &[Dst]4647 fn dsts_as_slice(&self) -> &[Dst] {
4648 &self.dsts.b
4649 }
4650
dsts_as_mut_slice(&mut self) -> &mut [Dst]4651 fn dsts_as_mut_slice(&mut self) -> &mut [Dst] {
4652 &mut self.dsts.b
4653 }
4654 }
4655
4656 impl DisplayOp for OpPhiDsts {
fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result4657 fn fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
4658 Ok(())
4659 }
4660
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4661 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4662 write!(f, "phi_dst ")?;
4663 for (i, (id, dst)) in self.dsts.iter().enumerate() {
4664 if i > 0 {
4665 write!(f, ", ")?;
4666 }
4667 write!(f, "{} = φ{}", dst, id)?;
4668 }
4669 Ok(())
4670 }
4671 }
4672 impl_display_for_op!(OpPhiDsts);
4673
4674 #[repr(C)]
4675 #[derive(SrcsAsSlice, DstsAsSlice)]
4676 pub struct OpCopy {
4677 pub dst: Dst,
4678 pub src: Src,
4679 }
4680
4681 impl DisplayOp for OpCopy {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4682 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4683 write!(f, "copy {}", self.src)
4684 }
4685 }
4686 impl_display_for_op!(OpCopy);
4687
4688 #[repr(C)]
4689 #[derive(SrcsAsSlice, DstsAsSlice)]
4690 pub struct OpSwap {
4691 pub dsts: [Dst; 2],
4692 pub srcs: [Src; 2],
4693 }
4694
4695 impl DisplayOp for OpSwap {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4696 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4697 write!(f, "swap {} {}", self.srcs[0], self.srcs[1])
4698 }
4699 }
4700 impl_display_for_op!(OpSwap);
4701
4702 #[repr(C)]
4703 pub struct OpParCopy {
4704 pub dsts_srcs: VecPair<Dst, Src>,
4705 pub tmp: Option<RegRef>,
4706 }
4707
4708 impl OpParCopy {
new() -> OpParCopy4709 pub fn new() -> OpParCopy {
4710 OpParCopy {
4711 dsts_srcs: VecPair::new(),
4712 tmp: None,
4713 }
4714 }
4715
is_empty(&self) -> bool4716 pub fn is_empty(&self) -> bool {
4717 self.dsts_srcs.is_empty()
4718 }
4719
push(&mut self, dst: Dst, src: Src)4720 pub fn push(&mut self, dst: Dst, src: Src) {
4721 self.dsts_srcs.push(dst, src);
4722 }
4723 }
4724
4725 impl SrcsAsSlice for OpParCopy {
srcs_as_slice(&self) -> &[Src]4726 fn srcs_as_slice(&self) -> &[Src] {
4727 &self.dsts_srcs.b
4728 }
4729
srcs_as_mut_slice(&mut self) -> &mut [Src]4730 fn srcs_as_mut_slice(&mut self) -> &mut [Src] {
4731 &mut self.dsts_srcs.b
4732 }
4733
src_types(&self) -> SrcTypeList4734 fn src_types(&self) -> SrcTypeList {
4735 SrcTypeList::Uniform(SrcType::GPR)
4736 }
4737 }
4738
4739 impl DstsAsSlice for OpParCopy {
dsts_as_slice(&self) -> &[Dst]4740 fn dsts_as_slice(&self) -> &[Dst] {
4741 &self.dsts_srcs.a
4742 }
4743
dsts_as_mut_slice(&mut self) -> &mut [Dst]4744 fn dsts_as_mut_slice(&mut self) -> &mut [Dst] {
4745 &mut self.dsts_srcs.a
4746 }
4747 }
4748
4749 impl DisplayOp for OpParCopy {
fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result4750 fn fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
4751 Ok(())
4752 }
4753
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4754 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4755 write!(f, "par_copy")?;
4756 for (i, (dst, src)) in self.dsts_srcs.iter().enumerate() {
4757 if i > 0 {
4758 write!(f, ",")?;
4759 }
4760 write!(f, " {} = {}", dst, src)?;
4761 }
4762 Ok(())
4763 }
4764 }
4765 impl_display_for_op!(OpParCopy);
4766
4767 #[repr(C)]
4768 #[derive(DstsAsSlice)]
4769 pub struct OpFSOut {
4770 pub srcs: Vec<Src>,
4771 }
4772
4773 impl SrcsAsSlice for OpFSOut {
srcs_as_slice(&self) -> &[Src]4774 fn srcs_as_slice(&self) -> &[Src] {
4775 &self.srcs
4776 }
4777
srcs_as_mut_slice(&mut self) -> &mut [Src]4778 fn srcs_as_mut_slice(&mut self) -> &mut [Src] {
4779 &mut self.srcs
4780 }
4781
src_types(&self) -> SrcTypeList4782 fn src_types(&self) -> SrcTypeList {
4783 SrcTypeList::Uniform(SrcType::GPR)
4784 }
4785 }
4786
4787 impl DisplayOp for OpFSOut {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4788 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4789 write!(f, "fs_out {{")?;
4790 for (i, src) in self.srcs.iter().enumerate() {
4791 if i > 0 {
4792 write!(f, ",")?;
4793 }
4794 write!(f, " {}", src)?;
4795 }
4796 write!(f, " }}")
4797 }
4798 }
4799 impl_display_for_op!(OpFSOut);
4800
4801 #[derive(Copy, Clone, Debug, PartialEq)]
4802 pub enum OutType {
4803 Emit,
4804 Cut,
4805 EmitThenCut,
4806 }
4807
4808 impl fmt::Display for OutType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4809 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4810 match self {
4811 OutType::Emit => write!(f, "emit"),
4812 OutType::Cut => write!(f, "cut"),
4813 OutType::EmitThenCut => write!(f, "emit_then_cut"),
4814 }
4815 }
4816 }
4817
4818 #[repr(C)]
4819 #[derive(SrcsAsSlice, DstsAsSlice)]
4820 pub struct OpOut {
4821 pub dst: Dst,
4822
4823 #[src_type(SSA)]
4824 pub handle: Src,
4825
4826 #[src_type(ALU)]
4827 pub stream: Src,
4828
4829 pub out_type: OutType,
4830 }
4831
4832 impl DisplayOp for OpOut {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4833 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4834 write!(f, "out.{} {} {}", self.out_type, self.handle, self.stream)
4835 }
4836 }
4837 impl_display_for_op!(OpOut);
4838
4839 #[repr(C)]
4840 #[derive(SrcsAsSlice, DstsAsSlice)]
4841 pub struct OpOutFinal {
4842 #[src_type(SSA)]
4843 pub handle: Src,
4844 }
4845
4846 impl DisplayOp for OpOutFinal {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4847 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4848 write!(f, "out.final {{ {} }}", self.handle)
4849 }
4850 }
4851 impl_display_for_op!(OpOutFinal);
4852
4853 /// Describes an annotation on an instruction.
4854 #[repr(C)]
4855 #[derive(SrcsAsSlice, DstsAsSlice)]
4856 pub struct OpAnnotate {
4857 /// The annotation
4858 pub annotation: String,
4859 }
4860
4861 impl DisplayOp for OpAnnotate {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4862 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4863 write!(f, "// {}", self.annotation)
4864 }
4865 }
4866
4867 impl fmt::Display for OpAnnotate {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4868 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4869 self.fmt_op(f)
4870 }
4871 }
4872
4873 #[derive(DisplayOp, DstsAsSlice, SrcsAsSlice, FromVariants)]
4874 pub enum Op {
4875 FAdd(OpFAdd),
4876 FFma(OpFFma),
4877 FMnMx(OpFMnMx),
4878 FMul(OpFMul),
4879 Rro(OpRro),
4880 MuFu(OpMuFu),
4881 FSet(OpFSet),
4882 FSetP(OpFSetP),
4883 FSwzAdd(OpFSwzAdd),
4884 DAdd(OpDAdd),
4885 DFma(OpDFma),
4886 DMnMx(OpDMnMx),
4887 DMul(OpDMul),
4888 DSetP(OpDSetP),
4889 BMsk(OpBMsk),
4890 BRev(OpBRev),
4891 Bfe(OpBfe),
4892 Flo(OpFlo),
4893 IAbs(OpIAbs),
4894 INeg(OpINeg),
4895 IAdd2(OpIAdd2),
4896 IAdd3(OpIAdd3),
4897 IAdd3X(OpIAdd3X),
4898 IDp4(OpIDp4),
4899 IMad(OpIMad),
4900 IMad64(OpIMad64),
4901 IMul(OpIMul),
4902 IMnMx(OpIMnMx),
4903 ISetP(OpISetP),
4904 Lop2(OpLop2),
4905 Lop3(OpLop3),
4906 PopC(OpPopC),
4907 Shf(OpShf),
4908 Shl(OpShl),
4909 Shr(OpShr),
4910 F2F(OpF2F),
4911 F2I(OpF2I),
4912 I2F(OpI2F),
4913 I2I(OpI2I),
4914 FRnd(OpFRnd),
4915 Mov(OpMov),
4916 Prmt(OpPrmt),
4917 Sel(OpSel),
4918 Shfl(OpShfl),
4919 PLop3(OpPLop3),
4920 PSetP(OpPSetP),
4921 Tex(OpTex),
4922 Tld(OpTld),
4923 Tld4(OpTld4),
4924 Tmml(OpTmml),
4925 Txd(OpTxd),
4926 Txq(OpTxq),
4927 SuLd(OpSuLd),
4928 SuSt(OpSuSt),
4929 SuAtom(OpSuAtom),
4930 Ld(OpLd),
4931 Ldc(OpLdc),
4932 St(OpSt),
4933 Atom(OpAtom),
4934 AL2P(OpAL2P),
4935 ALd(OpALd),
4936 ASt(OpASt),
4937 Ipa(OpIpa),
4938 LdTram(OpLdTram),
4939 CCtl(OpCCtl),
4940 MemBar(OpMemBar),
4941 BClear(OpBClear),
4942 BMov(OpBMov),
4943 Break(OpBreak),
4944 BSSy(OpBSSy),
4945 BSync(OpBSync),
4946 Bra(OpBra),
4947 Exit(OpExit),
4948 WarpSync(OpWarpSync),
4949 Bar(OpBar),
4950 CS2R(OpCS2R),
4951 Isberd(OpIsberd),
4952 Kill(OpKill),
4953 Nop(OpNop),
4954 PixLd(OpPixLd),
4955 S2R(OpS2R),
4956 Vote(OpVote),
4957 Undef(OpUndef),
4958 SrcBar(OpSrcBar),
4959 PhiSrcs(OpPhiSrcs),
4960 PhiDsts(OpPhiDsts),
4961 Copy(OpCopy),
4962 Swap(OpSwap),
4963 ParCopy(OpParCopy),
4964 FSOut(OpFSOut),
4965 Out(OpOut),
4966 OutFinal(OpOutFinal),
4967 Annotate(OpAnnotate),
4968 }
4969 impl_display_for_op!(Op);
4970
4971 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
4972 pub enum PredRef {
4973 None,
4974 SSA(SSAValue),
4975 Reg(RegRef),
4976 }
4977
4978 impl PredRef {
4979 #[allow(dead_code)]
as_reg(&self) -> Option<&RegRef>4980 pub fn as_reg(&self) -> Option<&RegRef> {
4981 match self {
4982 PredRef::Reg(r) => Some(r),
4983 _ => None,
4984 }
4985 }
4986
4987 #[allow(dead_code)]
as_ssa(&self) -> Option<&SSAValue>4988 pub fn as_ssa(&self) -> Option<&SSAValue> {
4989 match self {
4990 PredRef::SSA(r) => Some(r),
4991 _ => None,
4992 }
4993 }
4994
is_none(&self) -> bool4995 pub fn is_none(&self) -> bool {
4996 matches!(self, PredRef::None)
4997 }
4998
iter_ssa(&self) -> slice::Iter<'_, SSAValue>4999 pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
5000 match self {
5001 PredRef::None | PredRef::Reg(_) => &[],
5002 PredRef::SSA(ssa) => slice::from_ref(ssa),
5003 }
5004 .iter()
5005 }
5006
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>5007 pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
5008 match self {
5009 PredRef::None | PredRef::Reg(_) => &mut [],
5010 PredRef::SSA(ssa) => slice::from_mut(ssa),
5011 }
5012 .iter_mut()
5013 }
5014 }
5015
5016 impl From<RegRef> for PredRef {
from(reg: RegRef) -> PredRef5017 fn from(reg: RegRef) -> PredRef {
5018 PredRef::Reg(reg)
5019 }
5020 }
5021
5022 impl From<SSAValue> for PredRef {
from(ssa: SSAValue) -> PredRef5023 fn from(ssa: SSAValue) -> PredRef {
5024 PredRef::SSA(ssa)
5025 }
5026 }
5027
5028 impl fmt::Display for PredRef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5029 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5030 match self {
5031 PredRef::None => write!(f, "pT"),
5032 PredRef::SSA(ssa) => ssa.fmt_plain(f),
5033 PredRef::Reg(reg) => reg.fmt(f),
5034 }
5035 }
5036 }
5037
5038 #[derive(Clone, Copy)]
5039 pub struct Pred {
5040 pub pred_ref: PredRef,
5041 pub pred_inv: bool,
5042 }
5043
5044 impl Pred {
is_true(&self) -> bool5045 pub fn is_true(&self) -> bool {
5046 self.pred_ref.is_none() && !self.pred_inv
5047 }
5048
is_false(&self) -> bool5049 pub fn is_false(&self) -> bool {
5050 self.pred_ref.is_none() && self.pred_inv
5051 }
5052
iter_ssa(&self) -> slice::Iter<'_, SSAValue>5053 pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
5054 self.pred_ref.iter_ssa()
5055 }
5056
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>5057 pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
5058 self.pred_ref.iter_ssa_mut()
5059 }
5060 }
5061
5062 impl<T: Into<PredRef>> From<T> for Pred {
from(p: T) -> Self5063 fn from(p: T) -> Self {
5064 Pred {
5065 pred_ref: p.into(),
5066 pred_inv: false,
5067 }
5068 }
5069 }
5070
5071 impl fmt::Display for Pred {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5072 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5073 if self.pred_inv {
5074 write!(f, "!")?;
5075 }
5076 self.pred_ref.fmt(f)
5077 }
5078 }
5079
5080 pub const MIN_INSTR_DELAY: u8 = 1;
5081 pub const MAX_INSTR_DELAY: u8 = 15;
5082
5083 pub struct InstrDeps {
5084 pub delay: u8,
5085 pub yld: bool,
5086 wr_bar: i8,
5087 rd_bar: i8,
5088 pub wt_bar_mask: u8,
5089 pub reuse_mask: u8,
5090 }
5091
5092 impl InstrDeps {
new() -> InstrDeps5093 pub fn new() -> InstrDeps {
5094 InstrDeps {
5095 delay: 0,
5096 yld: false,
5097 wr_bar: -1,
5098 rd_bar: -1,
5099 wt_bar_mask: 0,
5100 reuse_mask: 0,
5101 }
5102 }
5103
rd_bar(&self) -> Option<u8>5104 pub fn rd_bar(&self) -> Option<u8> {
5105 if self.rd_bar < 0 {
5106 None
5107 } else {
5108 Some(self.rd_bar.try_into().unwrap())
5109 }
5110 }
5111
wr_bar(&self) -> Option<u8>5112 pub fn wr_bar(&self) -> Option<u8> {
5113 if self.wr_bar < 0 {
5114 None
5115 } else {
5116 Some(self.wr_bar.try_into().unwrap())
5117 }
5118 }
5119
set_delay(&mut self, delay: u8)5120 pub fn set_delay(&mut self, delay: u8) {
5121 assert!(delay <= MAX_INSTR_DELAY);
5122 self.delay = delay;
5123 }
5124
set_yield(&mut self, yld: bool)5125 pub fn set_yield(&mut self, yld: bool) {
5126 self.yld = yld;
5127 }
5128
set_rd_bar(&mut self, idx: u8)5129 pub fn set_rd_bar(&mut self, idx: u8) {
5130 assert!(idx < 6);
5131 self.rd_bar = idx.try_into().unwrap();
5132 }
5133
set_wr_bar(&mut self, idx: u8)5134 pub fn set_wr_bar(&mut self, idx: u8) {
5135 assert!(idx < 6);
5136 self.wr_bar = idx.try_into().unwrap();
5137 }
5138
add_wt_bar(&mut self, idx: u8)5139 pub fn add_wt_bar(&mut self, idx: u8) {
5140 self.add_wt_bar_mask(1 << idx);
5141 }
5142
add_wt_bar_mask(&mut self, bar_mask: u8)5143 pub fn add_wt_bar_mask(&mut self, bar_mask: u8) {
5144 assert!(bar_mask < 1 << 6);
5145 self.wt_bar_mask |= bar_mask;
5146 }
5147
5148 #[allow(dead_code)]
add_reuse(&mut self, idx: u8)5149 pub fn add_reuse(&mut self, idx: u8) {
5150 assert!(idx < 6);
5151 self.reuse_mask |= 1_u8 << idx;
5152 }
5153 }
5154
5155 impl fmt::Display for InstrDeps {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5156 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5157 if self.delay > 0 {
5158 write!(f, " delay={}", self.delay)?;
5159 }
5160 if self.wt_bar_mask != 0 {
5161 write!(f, " wt={:06b}", self.wt_bar_mask)?;
5162 }
5163 if self.rd_bar >= 0 {
5164 write!(f, " rd:{}", self.rd_bar)?;
5165 }
5166 if self.wr_bar >= 0 {
5167 write!(f, " wr:{}", self.wr_bar)?;
5168 }
5169 if self.reuse_mask != 0 {
5170 write!(f, " reuse={:06b}", self.reuse_mask)?;
5171 }
5172 if self.yld {
5173 write!(f, " yld")?;
5174 }
5175 Ok(())
5176 }
5177 }
5178
5179 pub struct Instr {
5180 pub pred: Pred,
5181 pub op: Op,
5182 pub deps: InstrDeps,
5183 }
5184
5185 impl Instr {
new(op: impl Into<Op>) -> Instr5186 pub fn new(op: impl Into<Op>) -> Instr {
5187 Instr {
5188 op: op.into(),
5189 pred: PredRef::None.into(),
5190 deps: InstrDeps::new(),
5191 }
5192 }
5193
new_boxed(op: impl Into<Op>) -> Box<Self>5194 pub fn new_boxed(op: impl Into<Op>) -> Box<Self> {
5195 Box::new(Instr::new(op))
5196 }
5197
dsts(&self) -> &[Dst]5198 pub fn dsts(&self) -> &[Dst] {
5199 self.op.dsts_as_slice()
5200 }
5201
dsts_mut(&mut self) -> &mut [Dst]5202 pub fn dsts_mut(&mut self) -> &mut [Dst] {
5203 self.op.dsts_as_mut_slice()
5204 }
5205
srcs(&self) -> &[Src]5206 pub fn srcs(&self) -> &[Src] {
5207 self.op.srcs_as_slice()
5208 }
5209
srcs_mut(&mut self) -> &mut [Src]5210 pub fn srcs_mut(&mut self) -> &mut [Src] {
5211 self.op.srcs_as_mut_slice()
5212 }
5213
src_types(&self) -> SrcTypeList5214 pub fn src_types(&self) -> SrcTypeList {
5215 self.op.src_types()
5216 }
5217
for_each_ssa_use(&self, mut f: impl FnMut(&SSAValue))5218 pub fn for_each_ssa_use(&self, mut f: impl FnMut(&SSAValue)) {
5219 for ssa in self.pred.iter_ssa() {
5220 f(ssa);
5221 }
5222 for src in self.srcs() {
5223 for ssa in src.iter_ssa() {
5224 f(ssa);
5225 }
5226 }
5227 }
5228
for_each_ssa_use_mut(&mut self, mut f: impl FnMut(&mut SSAValue))5229 pub fn for_each_ssa_use_mut(&mut self, mut f: impl FnMut(&mut SSAValue)) {
5230 for ssa in self.pred.iter_ssa_mut() {
5231 f(ssa);
5232 }
5233 for src in self.srcs_mut() {
5234 for ssa in src.iter_ssa_mut() {
5235 f(ssa);
5236 }
5237 }
5238 }
5239
for_each_ssa_def(&self, mut f: impl FnMut(&SSAValue))5240 pub fn for_each_ssa_def(&self, mut f: impl FnMut(&SSAValue)) {
5241 for dst in self.dsts() {
5242 for ssa in dst.iter_ssa() {
5243 f(ssa);
5244 }
5245 }
5246 }
5247
for_each_ssa_def_mut(&mut self, mut f: impl FnMut(&mut SSAValue))5248 pub fn for_each_ssa_def_mut(&mut self, mut f: impl FnMut(&mut SSAValue)) {
5249 for dst in self.dsts_mut() {
5250 for ssa in dst.iter_ssa_mut() {
5251 f(ssa);
5252 }
5253 }
5254 }
5255
is_branch(&self) -> bool5256 pub fn is_branch(&self) -> bool {
5257 matches!(self.op, Op::Bra(_) | Op::Exit(_))
5258 }
5259
is_barrier(&self) -> bool5260 pub fn is_barrier(&self) -> bool {
5261 matches!(self.op, Op::Bar(_))
5262 }
5263
uses_global_mem(&self) -> bool5264 pub fn uses_global_mem(&self) -> bool {
5265 match &self.op {
5266 Op::Atom(op) => op.mem_space != MemSpace::Local,
5267 Op::Ld(op) => op.access.space != MemSpace::Local,
5268 Op::St(op) => op.access.space != MemSpace::Local,
5269 Op::SuAtom(_) | Op::SuLd(_) | Op::SuSt(_) => true,
5270 _ => false,
5271 }
5272 }
5273
writes_global_mem(&self) -> bool5274 pub fn writes_global_mem(&self) -> bool {
5275 match &self.op {
5276 Op::Atom(op) => matches!(op.mem_space, MemSpace::Global(_)),
5277 Op::St(op) => matches!(op.access.space, MemSpace::Global(_)),
5278 Op::SuAtom(_) | Op::SuSt(_) => true,
5279 _ => false,
5280 }
5281 }
5282
can_eliminate(&self) -> bool5283 pub fn can_eliminate(&self) -> bool {
5284 match &self.op {
5285 Op::ASt(_)
5286 | Op::SuSt(_)
5287 | Op::SuAtom(_)
5288 | Op::St(_)
5289 | Op::Atom(_)
5290 | Op::CCtl(_)
5291 | Op::MemBar(_)
5292 | Op::Kill(_)
5293 | Op::Nop(_)
5294 | Op::BSync(_)
5295 | Op::Bra(_)
5296 | Op::Exit(_)
5297 | Op::WarpSync(_)
5298 | Op::Bar(_)
5299 | Op::FSOut(_)
5300 | Op::Out(_)
5301 | Op::OutFinal(_)
5302 | Op::Annotate(_) => false,
5303 Op::BMov(op) => !op.clear,
5304 _ => true,
5305 }
5306 }
5307
has_fixed_latency(&self, _sm: u8) -> bool5308 pub fn has_fixed_latency(&self, _sm: u8) -> bool {
5309 match &self.op {
5310 // Float ALU
5311 Op::FAdd(_)
5312 | Op::FFma(_)
5313 | Op::FMnMx(_)
5314 | Op::FMul(_)
5315 | Op::FSet(_)
5316 | Op::FSetP(_)
5317 | Op::FSwzAdd(_) => true,
5318
5319 // Multi-function unit is variable latency
5320 Op::Rro(_) | Op::MuFu(_) => false,
5321
5322 // Double-precision float ALU
5323 Op::DAdd(_)
5324 | Op::DFma(_)
5325 | Op::DMnMx(_)
5326 | Op::DMul(_)
5327 | Op::DSetP(_) => false,
5328
5329 // Integer ALU
5330 Op::BRev(_) | Op::Flo(_) | Op::PopC(_) => false,
5331 Op::BMsk(_)
5332 | Op::IAbs(_)
5333 | Op::INeg(_)
5334 | Op::IAdd2(_)
5335 | Op::IAdd3(_)
5336 | Op::IAdd3X(_)
5337 | Op::IDp4(_)
5338 | Op::IMad(_)
5339 | Op::IMad64(_)
5340 | Op::IMul(_)
5341 | Op::IMnMx(_)
5342 | Op::ISetP(_)
5343 | Op::Lop2(_)
5344 | Op::Lop3(_)
5345 | Op::Shf(_)
5346 | Op::Shl(_)
5347 | Op::Shr(_)
5348 | Op::Bfe(_) => true,
5349
5350 // Conversions are variable latency?!?
5351 Op::F2F(_) | Op::F2I(_) | Op::I2F(_) | Op::I2I(_) | Op::FRnd(_) => {
5352 false
5353 }
5354
5355 // Move ops
5356 Op::Mov(_) | Op::Prmt(_) | Op::Sel(_) => true,
5357 Op::Shfl(_) => false,
5358
5359 // Predicate ops
5360 Op::PLop3(_) | Op::PSetP(_) => true,
5361
5362 // Texture ops
5363 Op::Tex(_)
5364 | Op::Tld(_)
5365 | Op::Tld4(_)
5366 | Op::Tmml(_)
5367 | Op::Txd(_)
5368 | Op::Txq(_) => false,
5369
5370 // Surface ops
5371 Op::SuLd(_) | Op::SuSt(_) | Op::SuAtom(_) => false,
5372
5373 // Memory ops
5374 Op::Ld(_)
5375 | Op::Ldc(_)
5376 | Op::St(_)
5377 | Op::Atom(_)
5378 | Op::AL2P(_)
5379 | Op::ALd(_)
5380 | Op::ASt(_)
5381 | Op::Ipa(_)
5382 | Op::CCtl(_)
5383 | Op::LdTram(_)
5384 | Op::MemBar(_) => false,
5385
5386 // Control-flow ops
5387 Op::BClear(_) | Op::Break(_) | Op::BSSy(_) | Op::BSync(_) => true,
5388 Op::Bra(_) | Op::Exit(_) => true,
5389 Op::WarpSync(_) => false,
5390
5391 // BMOV: barriers only when using gprs (and only valid for the gpr),
5392 // no barriers for the others.
5393 Op::BMov(op) => match &op.dst {
5394 Dst::None => true,
5395 Dst::SSA(vec) => vec.file() == RegFile::Bar,
5396 Dst::Reg(reg) => reg.file() == RegFile::Bar,
5397 },
5398
5399 // Geometry ops
5400 Op::Out(_) | Op::OutFinal(_) => false,
5401
5402 // Miscellaneous ops
5403 Op::Bar(_)
5404 | Op::CS2R(_)
5405 | Op::Isberd(_)
5406 | Op::Kill(_)
5407 | Op::PixLd(_)
5408 | Op::S2R(_) => false,
5409 Op::Nop(_) | Op::Vote(_) => true,
5410
5411 // Virtual ops
5412 Op::Undef(_)
5413 | Op::SrcBar(_)
5414 | Op::PhiSrcs(_)
5415 | Op::PhiDsts(_)
5416 | Op::Copy(_)
5417 | Op::Swap(_)
5418 | Op::ParCopy(_)
5419 | Op::FSOut(_)
5420 | Op::Annotate(_) => {
5421 panic!("Not a hardware opcode")
5422 }
5423 }
5424 }
5425
5426 /// Minimum latency before another instruction can execute
get_exec_latency(&self, sm: u8) -> u325427 pub fn get_exec_latency(&self, sm: u8) -> u32 {
5428 match &self.op {
5429 Op::Bar(_) | Op::MemBar(_) => {
5430 if sm >= 80 {
5431 6
5432 } else {
5433 5
5434 }
5435 }
5436 Op::CCtl(_op) => {
5437 // CCTL.C needs 8, CCTL.I needs 11
5438 11
5439 }
5440 // Op::DepBar(_) => 4,
5441 _ => 1, // TODO: co-issue
5442 }
5443 }
5444
get_dst_latency(&self, sm: u8, dst_idx: usize) -> u325445 pub fn get_dst_latency(&self, sm: u8, dst_idx: usize) -> u32 {
5446 debug_assert!(self.has_fixed_latency(sm));
5447 let file = match self.dsts()[dst_idx] {
5448 Dst::None => return 0,
5449 Dst::SSA(vec) => vec.file(),
5450 Dst::Reg(reg) => reg.file(),
5451 };
5452 if file.is_predicate() {
5453 13
5454 } else {
5455 6
5456 }
5457 }
5458
needs_yield(&self) -> bool5459 pub fn needs_yield(&self) -> bool {
5460 matches!(&self.op, Op::Bar(_) | Op::BSync(_))
5461 }
5462
fmt_pred(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5463 fn fmt_pred(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5464 if !self.pred.is_true() {
5465 write!(f, "@{} ", self.pred)?;
5466 }
5467 Ok(())
5468 }
5469 }
5470
5471 impl fmt::Display for Instr {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5472 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5473 write!(f, "{} {}{}", Fmt(|f| self.fmt_pred(f)), self.op, self.deps)
5474 }
5475 }
5476
5477 impl<T: Into<Op>> From<T> for Instr {
from(value: T) -> Self5478 fn from(value: T) -> Self {
5479 Self::new(value)
5480 }
5481 }
5482
5483 /// The result of map() done on a Box<Instr>. A Vec is only allocated if the
5484 /// mapping results in multiple instructions. This helps to reduce the amount of
5485 /// Vec's allocated in the optimization passes.
5486 pub enum MappedInstrs {
5487 None,
5488 One(Box<Instr>),
5489 Many(Vec<Box<Instr>>),
5490 }
5491
5492 impl MappedInstrs {
push(&mut self, i: Box<Instr>)5493 pub fn push(&mut self, i: Box<Instr>) {
5494 match self {
5495 MappedInstrs::None => {
5496 *self = MappedInstrs::One(i);
5497 }
5498 MappedInstrs::One(_) => {
5499 *self = match std::mem::replace(self, MappedInstrs::None) {
5500 MappedInstrs::One(o) => MappedInstrs::Many(vec![o, i]),
5501 _ => panic!("Not a One"),
5502 };
5503 }
5504 MappedInstrs::Many(v) => {
5505 v.push(i);
5506 }
5507 }
5508 }
5509
last_mut(&mut self) -> Option<&mut Box<Instr>>5510 pub fn last_mut(&mut self) -> Option<&mut Box<Instr>> {
5511 match self {
5512 MappedInstrs::None => None,
5513 MappedInstrs::One(instr) => Some(instr),
5514 MappedInstrs::Many(v) => v.last_mut(),
5515 }
5516 }
5517 }
5518
5519 pub struct BasicBlock {
5520 pub label: Label,
5521 pub instrs: Vec<Box<Instr>>,
5522 }
5523
5524 impl BasicBlock {
new(label: Label) -> BasicBlock5525 pub fn new(label: Label) -> BasicBlock {
5526 BasicBlock {
5527 label: label,
5528 instrs: Vec::new(),
5529 }
5530 }
5531
map_instrs_priv( &mut self, map: &mut impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs, ssa_alloc: &mut SSAValueAllocator, )5532 fn map_instrs_priv(
5533 &mut self,
5534 map: &mut impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs,
5535 ssa_alloc: &mut SSAValueAllocator,
5536 ) {
5537 let mut instrs = Vec::new();
5538 for i in self.instrs.drain(..) {
5539 match map(i, ssa_alloc) {
5540 MappedInstrs::None => (),
5541 MappedInstrs::One(i) => {
5542 instrs.push(i);
5543 }
5544 MappedInstrs::Many(mut v) => {
5545 instrs.append(&mut v);
5546 }
5547 }
5548 }
5549 self.instrs = instrs;
5550 }
5551
phi_dsts(&self) -> Option<&OpPhiDsts>5552 pub fn phi_dsts(&self) -> Option<&OpPhiDsts> {
5553 if let Op::PhiDsts(phi) = &self.instrs.first()?.op {
5554 return Some(phi);
5555 }
5556 None
5557 }
5558
5559 #[allow(dead_code)]
phi_dsts_mut(&mut self) -> Option<&mut OpPhiDsts>5560 pub fn phi_dsts_mut(&mut self) -> Option<&mut OpPhiDsts> {
5561 if let Op::PhiDsts(phi) = &mut self.instrs.first_mut()?.op {
5562 return Some(phi);
5563 }
5564 None
5565 }
5566
phi_srcs(&self) -> Option<&OpPhiSrcs>5567 pub fn phi_srcs(&self) -> Option<&OpPhiSrcs> {
5568 for instr in self.instrs.iter().rev() {
5569 if instr.is_branch() {
5570 continue;
5571 }
5572
5573 match &instr.op {
5574 Op::PhiSrcs(phi) => return Some(phi),
5575 _ => break,
5576 }
5577 }
5578 None
5579 }
5580
phi_srcs_mut(&mut self) -> Option<&mut OpPhiSrcs>5581 pub fn phi_srcs_mut(&mut self) -> Option<&mut OpPhiSrcs> {
5582 for instr in self.instrs.iter_mut().rev() {
5583 if instr.is_branch() {
5584 continue;
5585 }
5586
5587 match &mut instr.op {
5588 Op::PhiSrcs(phi) => return Some(phi),
5589 _ => break,
5590 }
5591 }
5592 None
5593 }
5594
branch(&self) -> Option<&Instr>5595 pub fn branch(&self) -> Option<&Instr> {
5596 if let Some(i) = self.instrs.last() {
5597 if i.is_branch() {
5598 Some(i)
5599 } else {
5600 None
5601 }
5602 } else {
5603 None
5604 }
5605 }
5606
5607 #[allow(dead_code)]
branch_mut(&mut self) -> Option<&mut Instr>5608 pub fn branch_mut(&mut self) -> Option<&mut Instr> {
5609 if let Some(i) = self.instrs.last_mut() {
5610 if i.is_branch() {
5611 Some(i)
5612 } else {
5613 None
5614 }
5615 } else {
5616 None
5617 }
5618 }
5619
falls_through(&self) -> bool5620 pub fn falls_through(&self) -> bool {
5621 if let Some(i) = self.branch() {
5622 !i.pred.is_true()
5623 } else {
5624 true
5625 }
5626 }
5627 }
5628
5629 pub struct Function {
5630 pub ssa_alloc: SSAValueAllocator,
5631 pub phi_alloc: PhiAllocator,
5632 pub blocks: CFG<BasicBlock>,
5633 }
5634
5635 impl Function {
map_instrs_priv( &mut self, map: &mut impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs, )5636 fn map_instrs_priv(
5637 &mut self,
5638 map: &mut impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs,
5639 ) {
5640 for b in &mut self.blocks {
5641 b.map_instrs_priv(map, &mut self.ssa_alloc);
5642 }
5643 }
5644
map_instrs( &mut self, mut map: impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs, )5645 pub fn map_instrs(
5646 &mut self,
5647 mut map: impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs,
5648 ) {
5649 self.map_instrs_priv(&mut map);
5650 }
5651 }
5652
5653 impl fmt::Display for Function {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5654 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5655 let mut pred_width = 0;
5656 let mut dsts_width = 0;
5657 let mut op_width = 0;
5658
5659 let mut blocks = Vec::new();
5660 for b in &self.blocks {
5661 let mut instrs = Vec::new();
5662 for i in &b.instrs {
5663 let mut pred = String::new();
5664 write!(pred, "{}", Fmt(|f| i.fmt_pred(f)))?;
5665 let mut dsts = String::new();
5666 write!(dsts, "{}", Fmt(|f| i.op.fmt_dsts(f)))?;
5667 let mut op = String::new();
5668 write!(op, "{}", Fmt(|f| i.op.fmt_op(f)))?;
5669 let mut deps = String::new();
5670 write!(deps, "{}", i.deps)?;
5671
5672 pred_width = max(pred_width, pred.len());
5673 dsts_width = max(dsts_width, dsts.len());
5674 op_width = max(op_width, op.len());
5675 let is_annotation = matches!(i.op, Op::Annotate(_));
5676
5677 instrs.push((pred, dsts, op, deps, is_annotation));
5678 }
5679 blocks.push(instrs);
5680 }
5681
5682 for (i, mut b) in blocks.drain(..).enumerate() {
5683 write!(f, "block {} {} [", i, self.blocks[i].label)?;
5684 for (pi, p) in self.blocks.pred_indices(i).iter().enumerate() {
5685 if pi > 0 {
5686 write!(f, ", ")?;
5687 }
5688 write!(f, "{}", p)?;
5689 }
5690 write!(f, "] -> {{\n")?;
5691
5692 for (pred, dsts, op, deps, is_annotation) in b.drain(..) {
5693 let eq_sym = if dsts.is_empty() { " " } else { "=" };
5694 if is_annotation {
5695 write!(f, "\n{}\n", op)?;
5696 } else if deps.is_empty() {
5697 write!(
5698 f,
5699 "{:<pred_width$} {:<dsts_width$} {} {}\n",
5700 pred, dsts, eq_sym, op,
5701 )?;
5702 } else {
5703 write!(
5704 f,
5705 "{:<pred_width$} {:<dsts_width$} {} \
5706 {:<op_width$} //{}\n",
5707 pred, dsts, eq_sym, op, deps,
5708 )?;
5709 }
5710 }
5711
5712 write!(f, "}} -> [")?;
5713 for (si, s) in self.blocks.succ_indices(i).iter().enumerate() {
5714 if si > 0 {
5715 write!(f, ", ")?;
5716 }
5717 write!(f, "{}", s)?;
5718 }
5719 write!(f, "]\n")?;
5720 }
5721 Ok(())
5722 }
5723 }
5724
5725 #[derive(Debug)]
5726 pub struct ComputeShaderInfo {
5727 pub local_size: [u16; 3],
5728 pub smem_size: u16,
5729 }
5730
5731 #[derive(Debug)]
5732 pub struct GeometryShaderInfo {
5733 pub passthrough_enable: bool,
5734 pub stream_out_mask: u8,
5735 pub threads_per_input_primitive: u8,
5736 pub output_topology: OutputTopology,
5737 pub max_output_vertex_count: u16,
5738 }
5739
5740 impl Default for GeometryShaderInfo {
default() -> Self5741 fn default() -> Self {
5742 Self {
5743 passthrough_enable: false,
5744 stream_out_mask: 0,
5745 threads_per_input_primitive: 0,
5746 output_topology: OutputTopology::LineStrip,
5747 max_output_vertex_count: 0,
5748 }
5749 }
5750 }
5751
5752 #[derive(Debug)]
5753 pub struct TessellationInitShaderInfo {
5754 pub per_patch_attribute_count: u8,
5755 pub threads_per_patch: u8,
5756 }
5757
5758 #[derive(Debug)]
5759 pub enum ShaderStageInfo {
5760 Compute(ComputeShaderInfo),
5761 Vertex,
5762 Fragment,
5763 Geometry(GeometryShaderInfo),
5764 TessellationInit(TessellationInitShaderInfo),
5765 Tessellation,
5766 }
5767
5768 #[derive(Debug, Default)]
5769 pub struct SysValInfo {
5770 pub ab: u32,
5771 pub c: u16,
5772 }
5773
5774 #[derive(Debug)]
5775 pub struct VtgIoInfo {
5776 pub sysvals_in: SysValInfo,
5777 pub sysvals_in_d: u8,
5778 pub sysvals_out: SysValInfo,
5779 pub sysvals_out_d: u8,
5780 pub attr_in: [u32; 4],
5781 pub attr_out: [u32; 4],
5782 pub store_req_start: u8,
5783 pub store_req_end: u8,
5784 }
5785
5786 impl VtgIoInfo {
mark_attrs(&mut self, addrs: Range<u16>, written: bool)5787 fn mark_attrs(&mut self, addrs: Range<u16>, written: bool) {
5788 let sysvals = if written {
5789 &mut self.sysvals_out
5790 } else {
5791 &mut self.sysvals_in
5792 };
5793
5794 let sysvals_d = if written {
5795 &mut self.sysvals_out_d
5796 } else {
5797 &mut self.sysvals_in_d
5798 };
5799
5800 let mut attr = BitMutView::new(if written {
5801 &mut self.attr_out
5802 } else {
5803 &mut self.attr_in
5804 });
5805
5806 let mut addrs = addrs;
5807 addrs.start &= !3;
5808 for addr in addrs.step_by(4) {
5809 if addr < 0x080 {
5810 sysvals.ab |= 1 << (addr / 4);
5811 } else if addr < 0x280 {
5812 let attr_idx = (addr - 0x080) as usize / 4;
5813 attr.set_bit(attr_idx, true);
5814 } else if addr < 0x2c0 {
5815 panic!("FF color I/O not supported");
5816 } else if addr < 0x300 {
5817 sysvals.c |= 1 << ((addr - 0x2c0) / 4);
5818 } else if addr >= 0x3a0 && addr < 0x3c0 {
5819 *sysvals_d |= 1 << ((addr - 0x3a0) / 4);
5820 }
5821 }
5822 }
5823
mark_attrs_read(&mut self, addrs: Range<u16>)5824 pub fn mark_attrs_read(&mut self, addrs: Range<u16>) {
5825 self.mark_attrs(addrs, false);
5826 }
5827
mark_attrs_written(&mut self, addrs: Range<u16>)5828 pub fn mark_attrs_written(&mut self, addrs: Range<u16>) {
5829 self.mark_attrs(addrs, true);
5830 }
5831
mark_store_req(&mut self, addrs: Range<u16>)5832 pub fn mark_store_req(&mut self, addrs: Range<u16>) {
5833 let start = (addrs.start / 4).try_into().unwrap();
5834 let end = ((addrs.end - 1) / 4).try_into().unwrap();
5835 self.store_req_start = min(self.store_req_start, start);
5836 self.store_req_end = max(self.store_req_end, end);
5837 }
5838 }
5839
5840 #[derive(Debug)]
5841 pub struct FragmentIoInfo {
5842 pub sysvals_in: SysValInfo,
5843 pub sysvals_in_d: [PixelImap; 8],
5844 pub attr_in: [PixelImap; 128],
5845 pub barycentric_attr_in: [u32; 4],
5846
5847 pub reads_sample_mask: bool,
5848 pub uses_kill: bool,
5849 pub writes_color: u32,
5850 pub writes_sample_mask: bool,
5851 pub writes_depth: bool,
5852 pub does_interlock: bool,
5853 }
5854
5855 impl FragmentIoInfo {
mark_attr_read(&mut self, addr: u16, interp: PixelImap)5856 pub fn mark_attr_read(&mut self, addr: u16, interp: PixelImap) {
5857 if addr < 0x080 {
5858 self.sysvals_in.ab |= 1 << (addr / 4);
5859 } else if addr < 0x280 {
5860 let attr_idx = (addr - 0x080) as usize / 4;
5861 self.attr_in[attr_idx] = interp;
5862 } else if addr < 0x2c0 {
5863 panic!("FF color I/O not supported");
5864 } else if addr < 0x300 {
5865 self.sysvals_in.c |= 1 << ((addr - 0x2c0) / 4);
5866 } else if addr >= 0x3a0 && addr < 0x3c0 {
5867 let attr_idx = (addr - 0x3a0) as usize / 4;
5868 self.sysvals_in_d[attr_idx] = interp;
5869 }
5870 }
5871
mark_barycentric_attr_in(&mut self, addr: u16)5872 pub fn mark_barycentric_attr_in(&mut self, addr: u16) {
5873 assert!(addr >= 0x80 && addr < 0x280);
5874
5875 let mut attr = BitMutView::new(&mut self.barycentric_attr_in);
5876
5877 let attr_idx = (addr - 0x080) as usize / 4;
5878 attr.set_bit(attr_idx, true);
5879 }
5880 }
5881
5882 #[derive(Debug)]
5883 pub enum ShaderIoInfo {
5884 None,
5885 Vtg(VtgIoInfo),
5886 Fragment(FragmentIoInfo),
5887 }
5888
5889 #[derive(Debug)]
5890 pub struct ShaderInfo {
5891 pub sm: u8,
5892 pub num_gprs: u8,
5893 pub num_barriers: u8,
5894 pub slm_size: u32,
5895 pub uses_global_mem: bool,
5896 pub writes_global_mem: bool,
5897 pub uses_fp64: bool,
5898 pub stage: ShaderStageInfo,
5899 pub io: ShaderIoInfo,
5900 }
5901
5902 pub struct Shader {
5903 pub info: ShaderInfo,
5904 pub functions: Vec<Function>,
5905 }
5906
5907 impl Shader {
for_each_instr(&self, f: &mut impl FnMut(&Instr))5908 pub fn for_each_instr(&self, f: &mut impl FnMut(&Instr)) {
5909 for func in &self.functions {
5910 for b in &func.blocks {
5911 for i in &b.instrs {
5912 f(i);
5913 }
5914 }
5915 }
5916 }
5917
map_instrs( &mut self, mut map: impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs, )5918 pub fn map_instrs(
5919 &mut self,
5920 mut map: impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs,
5921 ) {
5922 for f in &mut self.functions {
5923 f.map_instrs_priv(&mut map);
5924 }
5925 }
5926
5927 /// Remove all annotations, presumably before encoding the shader.
remove_annotations(&mut self)5928 pub fn remove_annotations(&mut self) {
5929 self.map_instrs(|instr: Box<Instr>, _| -> MappedInstrs {
5930 if matches!(instr.op, Op::Annotate(_)) {
5931 MappedInstrs::None
5932 } else {
5933 MappedInstrs::One(instr)
5934 }
5935 })
5936 }
5937
lower_ineg(&mut self)5938 pub fn lower_ineg(&mut self) {
5939 let sm = self.info.sm;
5940 self.map_instrs(|mut instr: Box<Instr>, _| -> MappedInstrs {
5941 match instr.op {
5942 Op::INeg(neg) => {
5943 if sm >= 70 {
5944 instr.op = Op::IAdd3(OpIAdd3 {
5945 dst: neg.dst,
5946 overflow: [Dst::None; 2],
5947 srcs: [0.into(), neg.src.ineg(), 0.into()],
5948 });
5949 } else {
5950 instr.op = Op::IAdd2(OpIAdd2 {
5951 dst: neg.dst,
5952 srcs: [0.into(), neg.src.ineg()],
5953 carry_in: 0.into(),
5954 carry_out: Dst::None,
5955 });
5956 }
5957 MappedInstrs::One(instr)
5958 }
5959 _ => MappedInstrs::One(instr),
5960 }
5961 })
5962 }
5963
gather_global_mem_usage(&mut self)5964 pub fn gather_global_mem_usage(&mut self) {
5965 if let ShaderStageInfo::Compute(_) = self.info.stage {
5966 return;
5967 }
5968
5969 let mut uses_global_mem = false;
5970 let mut writes_global_mem = false;
5971
5972 self.for_each_instr(&mut |instr| {
5973 if !uses_global_mem {
5974 uses_global_mem = instr.uses_global_mem();
5975 }
5976
5977 if !writes_global_mem {
5978 writes_global_mem = instr.writes_global_mem();
5979 }
5980 });
5981
5982 self.info.uses_global_mem = uses_global_mem;
5983 self.info.writes_global_mem = writes_global_mem;
5984 }
5985 }
5986
5987 impl fmt::Display for Shader {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5988 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5989 for func in &self.functions {
5990 write!(f, "{}", func)?;
5991 }
5992 Ok(())
5993 }
5994 }
5995