1 // Copyright © 2022 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3
4 extern crate bitview;
5 extern crate nak_ir_proc;
6
7 use bitview::{BitMutView, BitView};
8 use nak_bindings::*;
9
10 pub use crate::builder::{Builder, InstrBuilder, SSABuilder, SSAInstrBuilder};
11 use crate::legalize::LegalizeBuilder;
12 use crate::sph::{OutputTopology, PixelImap};
13 use compiler::as_slice::*;
14 use compiler::cfg::CFG;
15 use compiler::smallvec::SmallVec;
16 use nak_ir_proc::*;
17 use std::cmp::{max, min};
18 use std::fmt;
19 use std::fmt::Write;
20 use std::iter::Zip;
21 use std::ops::{BitAnd, BitOr, Deref, DerefMut, Index, IndexMut, Not, Range};
22 use std::slice;
23
24 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
25 pub struct Label {
26 idx: u32,
27 }
28
29 impl fmt::Display for Label {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result30 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
31 write!(f, "L{}", self.idx)
32 }
33 }
34
35 pub struct LabelAllocator {
36 count: u32,
37 }
38
39 impl LabelAllocator {
new() -> LabelAllocator40 pub fn new() -> LabelAllocator {
41 LabelAllocator { count: 0 }
42 }
43
alloc(&mut self) -> Label44 pub fn alloc(&mut self) -> Label {
45 let idx = self.count;
46 self.count += 1;
47 Label { idx: idx }
48 }
49 }
50
51 /// Represents a register file
52 #[repr(u8)]
53 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
54 pub enum RegFile {
55 /// The general-purpose register file
56 ///
57 /// General-purpose registers are 32 bits per SIMT channel.
58 GPR = 0,
59
60 /// The general-purpose uniform register file
61 ///
62 /// General-purpose uniform registers are 32 bits each and uniform across a
63 /// wave.
64 UGPR = 1,
65
66 /// The predicate reigster file
67 ///
68 /// Predicate registers are 1 bit per SIMT channel.
69 Pred = 2,
70
71 /// The uniform predicate reigster file
72 ///
73 /// Uniform predicate registers are 1 bit and uniform across a wave.
74 UPred = 3,
75
76 /// The carry flag register file
77 ///
78 /// Only one carry flag register exists in hardware, but representing it as
79 /// a reg file simplifies dependency tracking.
80 ///
81 /// This is used only on SM50.
82 Carry = 4,
83
84 /// The barrier register file
85 ///
86 /// This is a lane mask used for wave re-convergence instructions.
87 Bar = 5,
88
89 /// The memory register file
90 ///
91 /// This is a virtual register file for things which will get spilled to
92 /// local memory. Each memory location is 32 bits per SIMT channel.
93 Mem = 6,
94 }
95
96 const NUM_REG_FILES: usize = 7;
97
98 impl RegFile {
99 /// Returns true if the register file is uniform across a wave
is_uniform(&self) -> bool100 pub fn is_uniform(&self) -> bool {
101 match self {
102 RegFile::GPR
103 | RegFile::Pred
104 | RegFile::Carry
105 | RegFile::Bar
106 | RegFile::Mem => false,
107 RegFile::UGPR | RegFile::UPred => true,
108 }
109 }
110
to_uniform(&self) -> Option<RegFile>111 pub fn to_uniform(&self) -> Option<RegFile> {
112 match self {
113 RegFile::GPR | RegFile::UGPR => Some(RegFile::UGPR),
114 RegFile::Pred | RegFile::UPred => Some(RegFile::UPred),
115 RegFile::Carry | RegFile::Bar | RegFile::Mem => None,
116 }
117 }
118
to_warp(&self) -> RegFile119 pub fn to_warp(&self) -> RegFile {
120 match self {
121 RegFile::GPR | RegFile::UGPR => RegFile::GPR,
122 RegFile::Pred | RegFile::UPred => RegFile::Pred,
123 RegFile::Carry | RegFile::Bar | RegFile::Mem => *self,
124 }
125 }
126
127 /// Returns true if the register file is general-purpose
is_gpr(&self) -> bool128 pub fn is_gpr(&self) -> bool {
129 match self {
130 RegFile::GPR | RegFile::UGPR => true,
131 RegFile::Pred
132 | RegFile::UPred
133 | RegFile::Carry
134 | RegFile::Bar
135 | RegFile::Mem => false,
136 }
137 }
138
139 /// Returns true if the register file is a predicate register file
is_predicate(&self) -> bool140 pub fn is_predicate(&self) -> bool {
141 match self {
142 RegFile::GPR
143 | RegFile::UGPR
144 | RegFile::Carry
145 | RegFile::Bar
146 | RegFile::Mem => false,
147 RegFile::Pred | RegFile::UPred => true,
148 }
149 }
150
fmt_prefix(&self) -> &'static str151 fn fmt_prefix(&self) -> &'static str {
152 match self {
153 RegFile::GPR => "r",
154 RegFile::UGPR => "ur",
155 RegFile::Pred => "p",
156 RegFile::UPred => "up",
157 RegFile::Carry => "c",
158 RegFile::Bar => "b",
159 RegFile::Mem => "m",
160 }
161 }
162 }
163
164 impl fmt::Display for RegFile {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result165 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
166 match self {
167 RegFile::GPR => write!(f, "GPR"),
168 RegFile::UGPR => write!(f, "UGPR"),
169 RegFile::Pred => write!(f, "Pred"),
170 RegFile::UPred => write!(f, "UPred"),
171 RegFile::Carry => write!(f, "Carry"),
172 RegFile::Bar => write!(f, "Bar"),
173 RegFile::Mem => write!(f, "Mem"),
174 }
175 }
176 }
177
178 impl From<RegFile> for u8 {
from(value: RegFile) -> u8179 fn from(value: RegFile) -> u8 {
180 value as u8
181 }
182 }
183
184 impl TryFrom<u32> for RegFile {
185 type Error = &'static str;
186
try_from(value: u32) -> Result<Self, Self::Error>187 fn try_from(value: u32) -> Result<Self, Self::Error> {
188 match value {
189 0 => Ok(RegFile::GPR),
190 1 => Ok(RegFile::UGPR),
191 2 => Ok(RegFile::Pred),
192 3 => Ok(RegFile::UPred),
193 4 => Ok(RegFile::Carry),
194 5 => Ok(RegFile::Bar),
195 6 => Ok(RegFile::Mem),
196 _ => Err("Invalid register file number"),
197 }
198 }
199 }
200
201 impl TryFrom<u16> for RegFile {
202 type Error = &'static str;
203
try_from(value: u16) -> Result<Self, Self::Error>204 fn try_from(value: u16) -> Result<Self, Self::Error> {
205 RegFile::try_from(u32::from(value))
206 }
207 }
208
209 impl TryFrom<u8> for RegFile {
210 type Error = &'static str;
211
try_from(value: u8) -> Result<Self, Self::Error>212 fn try_from(value: u8) -> Result<Self, Self::Error> {
213 RegFile::try_from(u32::from(value))
214 }
215 }
216
217 /// A trait for things which have an associated register file
218 pub trait HasRegFile {
file(&self) -> RegFile219 fn file(&self) -> RegFile;
220
is_uniform(&self) -> bool221 fn is_uniform(&self) -> bool {
222 self.file().is_uniform()
223 }
224
is_gpr(&self) -> bool225 fn is_gpr(&self) -> bool {
226 self.file().is_gpr()
227 }
228
is_predicate(&self) -> bool229 fn is_predicate(&self) -> bool {
230 self.file().is_predicate()
231 }
232 }
233
234 #[derive(Clone)]
235 pub struct RegFileSet {
236 bits: u8,
237 }
238
239 impl RegFileSet {
new() -> RegFileSet240 pub fn new() -> RegFileSet {
241 RegFileSet { bits: 0 }
242 }
243
len(&self) -> usize244 pub fn len(&self) -> usize {
245 self.bits.count_ones() as usize
246 }
247
contains(&self, file: RegFile) -> bool248 pub fn contains(&self, file: RegFile) -> bool {
249 self.bits & (1 << (file as u8)) != 0
250 }
251
insert(&mut self, file: RegFile) -> bool252 pub fn insert(&mut self, file: RegFile) -> bool {
253 let has_file = self.contains(file);
254 self.bits |= 1 << (file as u8);
255 !has_file
256 }
257
is_empty(&self) -> bool258 pub fn is_empty(&self) -> bool {
259 self.bits == 0
260 }
261
262 #[allow(dead_code)]
iter(&self) -> RegFileSet263 pub fn iter(&self) -> RegFileSet {
264 self.clone()
265 }
266
remove(&mut self, file: RegFile) -> bool267 pub fn remove(&mut self, file: RegFile) -> bool {
268 let has_file = self.contains(file);
269 self.bits &= !(1 << (file as u8));
270 has_file
271 }
272 }
273
274 impl FromIterator<RegFile> for RegFileSet {
from_iter<T: IntoIterator<Item = RegFile>>(iter: T) -> Self275 fn from_iter<T: IntoIterator<Item = RegFile>>(iter: T) -> Self {
276 let mut set = RegFileSet::new();
277 for file in iter {
278 set.insert(file);
279 }
280 set
281 }
282 }
283
284 impl Iterator for RegFileSet {
285 type Item = RegFile;
286
next(&mut self) -> Option<RegFile>287 fn next(&mut self) -> Option<RegFile> {
288 if self.is_empty() {
289 None
290 } else {
291 let file = self.bits.trailing_zeros().try_into().unwrap();
292 self.remove(file);
293 Some(file)
294 }
295 }
296
size_hint(&self) -> (usize, Option<usize>)297 fn size_hint(&self) -> (usize, Option<usize>) {
298 let len = self.len();
299 (len, Some(len))
300 }
301 }
302
303 #[derive(Clone, Copy)]
304 pub struct PerRegFile<T> {
305 per_file: [T; NUM_REG_FILES],
306 }
307
308 impl<T> PerRegFile<T> {
new_with<F: Fn(RegFile) -> T>(f: F) -> Self309 pub fn new_with<F: Fn(RegFile) -> T>(f: F) -> Self {
310 PerRegFile {
311 per_file: [
312 f(RegFile::GPR),
313 f(RegFile::UGPR),
314 f(RegFile::Pred),
315 f(RegFile::UPred),
316 f(RegFile::Carry),
317 f(RegFile::Bar),
318 f(RegFile::Mem),
319 ],
320 }
321 }
322
values(&self) -> slice::Iter<T>323 pub fn values(&self) -> slice::Iter<T> {
324 self.per_file.iter()
325 }
326
values_mut(&mut self) -> slice::IterMut<T>327 pub fn values_mut(&mut self) -> slice::IterMut<T> {
328 self.per_file.iter_mut()
329 }
330 }
331
332 impl<T: Default> Default for PerRegFile<T> {
default() -> Self333 fn default() -> Self {
334 PerRegFile {
335 per_file: Default::default(),
336 }
337 }
338 }
339
340 impl<T> Index<RegFile> for PerRegFile<T> {
341 type Output = T;
342
index(&self, idx: RegFile) -> &T343 fn index(&self, idx: RegFile) -> &T {
344 &self.per_file[idx as u8 as usize]
345 }
346 }
347
348 impl<T> IndexMut<RegFile> for PerRegFile<T> {
index_mut(&mut self, idx: RegFile) -> &mut T349 fn index_mut(&mut self, idx: RegFile) -> &mut T {
350 &mut self.per_file[idx as u8 as usize]
351 }
352 }
353
354 /// An SSA value
355 ///
356 /// Each SSA in NAK represents a single 32-bit or 1-bit (if a predicate) value
357 /// which must either be spilled to memory or allocated space in the specified
358 /// register file. Whenever more data is required such as a 64-bit memory
359 /// address, double-precision float, or a vec4 texture result, multiple SSA
360 /// values are used.
361 ///
362 /// Each SSA value logically contains two things: an index and a register file.
363 /// It is required that each index refers to a unique SSA value, regardless of
364 /// register file. This way the index can be used to index tightly-packed data
365 /// structures such as bitsets without having to determine separate ranges for
366 /// each register file.
367 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
368 pub struct SSAValue {
369 packed: u32,
370 }
371
372 impl SSAValue {
373 /// A special SSA value which is always invalid
374 pub const NONE: Self = SSAValue { packed: 0 };
375
376 /// Returns an SSA value with the given register file and index
new(file: RegFile, idx: u32) -> SSAValue377 pub fn new(file: RegFile, idx: u32) -> SSAValue {
378 assert!(idx > 0 && idx < (1 << 29) - 2);
379 let mut packed = idx;
380 assert!(u8::from(file) < 8);
381 packed |= u32::from(u8::from(file)) << 29;
382 SSAValue { packed: packed }
383 }
384
385 /// Returns the index of this SSA value
idx(&self) -> u32386 pub fn idx(&self) -> u32 {
387 self.packed & 0x1fffffff
388 }
389
390 /// Returns true if this SSA value is equal to SSAValue::NONE
is_none(&self) -> bool391 pub fn is_none(&self) -> bool {
392 self.packed == 0
393 }
394
fmt_plain(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result395 fn fmt_plain(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
396 write!(f, "{}{}", self.file().fmt_prefix(), self.idx())
397 }
398 }
399
400 impl HasRegFile for SSAValue {
401 /// Returns the register file of this SSA value
file(&self) -> RegFile402 fn file(&self) -> RegFile {
403 RegFile::try_from(self.packed >> 29).unwrap()
404 }
405 }
406
407 impl fmt::Display for SSAValue {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result408 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
409 write!(f, "%")?;
410 self.fmt_plain(f)
411 }
412 }
413
414 /// A reference to one or more SSA values
415 ///
416 /// Because each SSA value represents a single 1 or 32-bit scalar, we need a way
417 /// to reference multiple SSA values for instructions which read or write
418 /// multiple registers in the same source. When the register allocator runs,
419 /// all the SSA values in a given SSA ref will be placed in consecutive
420 /// registers, with the base register aligned to the number of values, aligned
421 /// to the next power of two.
422 ///
423 /// An SSA reference can reference between 1 and 4 SSA values. It dereferences
424 /// to a slice for easy access to individual SSA values. The structure is
425 /// designed so that is always 16B, regardless of how many SSA values are
426 /// referenced so it's easy and fairly cheap to copy around and embed in other
427 /// structures.
428 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
429 pub struct SSARef {
430 v: [SSAValue; 4],
431 }
432
433 impl SSARef {
434 /// Returns a new SSA reference
435 #[inline]
new(comps: &[SSAValue]) -> SSARef436 fn new(comps: &[SSAValue]) -> SSARef {
437 assert!(comps.len() > 0 && comps.len() <= 4);
438 let mut r = SSARef {
439 v: [SSAValue::NONE; 4],
440 };
441 for i in 0..comps.len() {
442 r.v[i] = comps[i];
443 }
444 if comps.len() < 4 {
445 r.v[3].packed = (comps.len() as u32).wrapping_neg();
446 }
447 r
448 }
449
450 /// Returns the number of components in this SSA reference
comps(&self) -> u8451 pub fn comps(&self) -> u8 {
452 if self.v[3].packed >= u32::MAX - 2 {
453 self.v[3].packed.wrapping_neg() as u8
454 } else {
455 4
456 }
457 }
458
file(&self) -> Option<RegFile>459 pub fn file(&self) -> Option<RegFile> {
460 let comps = usize::from(self.comps());
461 let file = self.v[0].file();
462 for i in 1..comps {
463 if self.v[i].file() != file {
464 return None;
465 }
466 }
467 Some(file)
468 }
469
is_uniform(&self) -> bool470 pub fn is_uniform(&self) -> bool {
471 for ssa in &self[..] {
472 if !ssa.is_uniform() {
473 return false;
474 }
475 }
476 true
477 }
478
is_gpr(&self) -> bool479 pub fn is_gpr(&self) -> bool {
480 for ssa in &self[..] {
481 if !ssa.is_gpr() {
482 return false;
483 }
484 }
485 true
486 }
487
is_predicate(&self) -> bool488 pub fn is_predicate(&self) -> bool {
489 if self.v[0].is_predicate() {
490 true
491 } else {
492 for ssa in &self[..] {
493 debug_assert!(!ssa.is_predicate());
494 }
495 false
496 }
497 }
498 }
499
500 impl Deref for SSARef {
501 type Target = [SSAValue];
502
deref(&self) -> &[SSAValue]503 fn deref(&self) -> &[SSAValue] {
504 let comps = usize::from(self.comps());
505 &self.v[..comps]
506 }
507 }
508
509 impl DerefMut for SSARef {
deref_mut(&mut self) -> &mut [SSAValue]510 fn deref_mut(&mut self) -> &mut [SSAValue] {
511 let comps = usize::from(self.comps());
512 &mut self.v[..comps]
513 }
514 }
515
516 impl TryFrom<&[SSAValue]> for SSARef {
517 type Error = &'static str;
518
try_from(comps: &[SSAValue]) -> Result<Self, Self::Error>519 fn try_from(comps: &[SSAValue]) -> Result<Self, Self::Error> {
520 if comps.len() == 0 {
521 Err("Empty vector")
522 } else if comps.len() > 4 {
523 Err("Too many vector components")
524 } else {
525 Ok(SSARef::new(comps))
526 }
527 }
528 }
529
530 impl TryFrom<Vec<SSAValue>> for SSARef {
531 type Error = &'static str;
532
try_from(comps: Vec<SSAValue>) -> Result<Self, Self::Error>533 fn try_from(comps: Vec<SSAValue>) -> Result<Self, Self::Error> {
534 SSARef::try_from(&comps[..])
535 }
536 }
537
538 macro_rules! impl_ssa_ref_from_arr {
539 ($n: expr) => {
540 impl From<[SSAValue; $n]> for SSARef {
541 fn from(comps: [SSAValue; $n]) -> Self {
542 SSARef::new(&comps[..])
543 }
544 }
545 };
546 }
547 impl_ssa_ref_from_arr!(1);
548 impl_ssa_ref_from_arr!(2);
549 impl_ssa_ref_from_arr!(3);
550 impl_ssa_ref_from_arr!(4);
551
552 impl From<SSAValue> for SSARef {
from(val: SSAValue) -> Self553 fn from(val: SSAValue) -> Self {
554 [val].into()
555 }
556 }
557
558 impl fmt::Display for SSARef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result559 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
560 if self.comps() == 1 {
561 write!(f, "{}", self[0])
562 } else {
563 write!(f, "{{")?;
564 for (i, v) in self.iter().enumerate() {
565 if i != 0 {
566 write!(f, " ")?;
567 }
568 write!(f, "{}", v)?;
569 }
570 write!(f, "}}")
571 }
572 }
573 }
574
575 pub struct SSAValueAllocator {
576 count: u32,
577 }
578
579 impl SSAValueAllocator {
new() -> SSAValueAllocator580 pub fn new() -> SSAValueAllocator {
581 SSAValueAllocator { count: 0 }
582 }
583
584 #[allow(dead_code)]
max_idx(&self) -> u32585 pub fn max_idx(&self) -> u32 {
586 self.count
587 }
588
alloc(&mut self, file: RegFile) -> SSAValue589 pub fn alloc(&mut self, file: RegFile) -> SSAValue {
590 self.count += 1;
591 SSAValue::new(file, self.count)
592 }
593
alloc_vec(&mut self, file: RegFile, comps: u8) -> SSARef594 pub fn alloc_vec(&mut self, file: RegFile, comps: u8) -> SSARef {
595 assert!(comps >= 1 && comps <= 4);
596 let mut vec = [SSAValue::NONE; 4];
597 for c in 0..comps {
598 vec[usize::from(c)] = self.alloc(file);
599 }
600 vec[0..usize::from(comps)].try_into().unwrap()
601 }
602 }
603
604 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
605 pub struct RegRef {
606 packed: u32,
607 }
608
609 impl RegRef {
610 pub const MAX_IDX: u32 = (1 << 26) - 1;
611
zero_idx(file: RegFile) -> u32612 fn zero_idx(file: RegFile) -> u32 {
613 match file {
614 RegFile::GPR => 255,
615 RegFile::UGPR => 63,
616 RegFile::Pred => 7,
617 RegFile::UPred => 7,
618 RegFile::Carry => panic!("Carry has no zero index"),
619 RegFile::Bar => panic!("Bar has no zero index"),
620 RegFile::Mem => panic!("Mem has no zero index"),
621 }
622 }
623
new(file: RegFile, base_idx: u32, comps: u8) -> RegRef624 pub fn new(file: RegFile, base_idx: u32, comps: u8) -> RegRef {
625 assert!(base_idx <= Self::MAX_IDX);
626 let mut packed = base_idx;
627 assert!(comps > 0 && comps <= 8);
628 packed |= u32::from(comps - 1) << 26;
629 assert!(u8::from(file) < 8);
630 packed |= u32::from(u8::from(file)) << 29;
631 RegRef { packed: packed }
632 }
633
zero(file: RegFile, comps: u8) -> RegRef634 pub fn zero(file: RegFile, comps: u8) -> RegRef {
635 RegRef::new(file, RegRef::zero_idx(file), comps)
636 }
637
base_idx(&self) -> u32638 pub fn base_idx(&self) -> u32 {
639 self.packed & 0x03ffffff
640 }
641
idx_range(&self) -> Range<u32>642 pub fn idx_range(&self) -> Range<u32> {
643 let start = self.base_idx();
644 let end = start + u32::from(self.comps());
645 start..end
646 }
647
comps(&self) -> u8648 pub fn comps(&self) -> u8 {
649 (((self.packed >> 26) & 0x7) + 1).try_into().unwrap()
650 }
651
comp(&self, c: u8) -> RegRef652 pub fn comp(&self, c: u8) -> RegRef {
653 assert!(c < self.comps());
654 RegRef::new(self.file(), self.base_idx() + u32::from(c), 1)
655 }
656 }
657
658 impl HasRegFile for RegRef {
file(&self) -> RegFile659 fn file(&self) -> RegFile {
660 ((self.packed >> 29) & 0x7).try_into().unwrap()
661 }
662 }
663
664 impl fmt::Display for RegRef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result665 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
666 write!(f, "{}{}", self.file().fmt_prefix(), self.base_idx())?;
667 if self.comps() > 1 {
668 write!(f, "..{}", self.idx_range().end)?;
669 }
670 Ok(())
671 }
672 }
673
674 #[derive(Clone, Copy)]
675 pub enum Dst {
676 None,
677 SSA(SSARef),
678 Reg(RegRef),
679 }
680
681 impl Dst {
is_none(&self) -> bool682 pub fn is_none(&self) -> bool {
683 matches!(self, Dst::None)
684 }
685
as_reg(&self) -> Option<&RegRef>686 pub fn as_reg(&self) -> Option<&RegRef> {
687 match self {
688 Dst::Reg(r) => Some(r),
689 _ => None,
690 }
691 }
692
as_ssa(&self) -> Option<&SSARef>693 pub fn as_ssa(&self) -> Option<&SSARef> {
694 match self {
695 Dst::SSA(r) => Some(r),
696 _ => None,
697 }
698 }
699
iter_ssa(&self) -> slice::Iter<'_, SSAValue>700 pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
701 match self {
702 Dst::None | Dst::Reg(_) => &[],
703 Dst::SSA(ssa) => ssa.deref(),
704 }
705 .iter()
706 }
707
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>708 pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
709 match self {
710 Dst::None | Dst::Reg(_) => &mut [],
711 Dst::SSA(ssa) => ssa.deref_mut(),
712 }
713 .iter_mut()
714 }
715 }
716
717 impl From<RegRef> for Dst {
from(reg: RegRef) -> Dst718 fn from(reg: RegRef) -> Dst {
719 Dst::Reg(reg)
720 }
721 }
722
723 impl<T: Into<SSARef>> From<T> for Dst {
from(ssa: T) -> Dst724 fn from(ssa: T) -> Dst {
725 Dst::SSA(ssa.into())
726 }
727 }
728
729 impl fmt::Display for Dst {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result730 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
731 match self {
732 Dst::None => write!(f, "null")?,
733 Dst::SSA(v) => v.fmt(f)?,
734 Dst::Reg(r) => r.fmt(f)?,
735 }
736 Ok(())
737 }
738 }
739
740 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
741 pub enum CBuf {
742 Binding(u8),
743
744 #[allow(dead_code)]
745 BindlessSSA(SSARef),
746
747 #[allow(dead_code)]
748 BindlessUGPR(RegRef),
749 }
750
751 impl fmt::Display for CBuf {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result752 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
753 match self {
754 CBuf::Binding(idx) => write!(f, "c[{:#x}]", idx),
755 CBuf::BindlessSSA(v) => write!(f, "cx[{}]", v),
756 CBuf::BindlessUGPR(r) => write!(f, "cx[{}]", r),
757 }
758 }
759 }
760
761 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
762 pub struct CBufRef {
763 pub buf: CBuf,
764 pub offset: u16,
765 }
766
767 impl CBufRef {
offset(self, offset: u16) -> CBufRef768 pub fn offset(self, offset: u16) -> CBufRef {
769 CBufRef {
770 buf: self.buf,
771 offset: self.offset + offset,
772 }
773 }
774 }
775
776 impl fmt::Display for CBufRef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result777 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
778 write!(f, "{}[{:#x}]", self.buf, self.offset)
779 }
780 }
781
782 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
783 pub enum SrcRef {
784 Zero,
785 True,
786 False,
787 Imm32(u32),
788 CBuf(CBufRef),
789 SSA(SSARef),
790 Reg(RegRef),
791 }
792
793 impl SrcRef {
794 #[allow(dead_code)]
is_alu(&self) -> bool795 pub fn is_alu(&self) -> bool {
796 match self {
797 SrcRef::Zero | SrcRef::Imm32(_) | SrcRef::CBuf(_) => true,
798 SrcRef::SSA(ssa) => ssa.is_gpr(),
799 SrcRef::Reg(reg) => reg.is_gpr(),
800 SrcRef::True | SrcRef::False => false,
801 }
802 }
803
is_predicate(&self) -> bool804 pub fn is_predicate(&self) -> bool {
805 match self {
806 SrcRef::Zero | SrcRef::Imm32(_) | SrcRef::CBuf(_) => false,
807 SrcRef::True | SrcRef::False => true,
808 SrcRef::SSA(ssa) => ssa.is_predicate(),
809 SrcRef::Reg(reg) => reg.is_predicate(),
810 }
811 }
812
is_carry(&self) -> bool813 pub fn is_carry(&self) -> bool {
814 match self {
815 SrcRef::SSA(ssa) => ssa.file() == Some(RegFile::Carry),
816 SrcRef::Reg(reg) => reg.file() == RegFile::Carry,
817 _ => false,
818 }
819 }
820
821 #[allow(dead_code)]
is_barrier(&self) -> bool822 pub fn is_barrier(&self) -> bool {
823 match self {
824 SrcRef::SSA(ssa) => ssa.file() == Some(RegFile::Bar),
825 SrcRef::Reg(reg) => reg.file() == RegFile::Bar,
826 _ => false,
827 }
828 }
829
as_reg(&self) -> Option<&RegRef>830 pub fn as_reg(&self) -> Option<&RegRef> {
831 match self {
832 SrcRef::Reg(r) => Some(r),
833 _ => None,
834 }
835 }
836
as_ssa(&self) -> Option<&SSARef>837 pub fn as_ssa(&self) -> Option<&SSARef> {
838 match self {
839 SrcRef::SSA(r) => Some(r),
840 _ => None,
841 }
842 }
843
as_u32(&self) -> Option<u32>844 pub fn as_u32(&self) -> Option<u32> {
845 match self {
846 SrcRef::Zero => Some(0),
847 SrcRef::Imm32(u) => Some(*u),
848 SrcRef::CBuf(_) | SrcRef::SSA(_) | SrcRef::Reg(_) => None,
849 _ => panic!("Invalid integer source"),
850 }
851 }
852
get_reg(&self) -> Option<&RegRef>853 pub fn get_reg(&self) -> Option<&RegRef> {
854 match self {
855 SrcRef::Zero
856 | SrcRef::True
857 | SrcRef::False
858 | SrcRef::Imm32(_)
859 | SrcRef::SSA(_) => None,
860 SrcRef::CBuf(cb) => match &cb.buf {
861 CBuf::Binding(_) | CBuf::BindlessSSA(_) => None,
862 CBuf::BindlessUGPR(reg) => Some(reg),
863 },
864 SrcRef::Reg(reg) => Some(reg),
865 }
866 }
867
iter_ssa(&self) -> slice::Iter<'_, SSAValue>868 pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
869 match self {
870 SrcRef::Zero
871 | SrcRef::True
872 | SrcRef::False
873 | SrcRef::Imm32(_)
874 | SrcRef::Reg(_) => &[],
875 SrcRef::CBuf(cb) => match &cb.buf {
876 CBuf::Binding(_) | CBuf::BindlessUGPR(_) => &[],
877 CBuf::BindlessSSA(ssa) => ssa.deref(),
878 },
879 SrcRef::SSA(ssa) => ssa.deref(),
880 }
881 .iter()
882 }
883
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>884 pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
885 match self {
886 SrcRef::Zero
887 | SrcRef::True
888 | SrcRef::False
889 | SrcRef::Imm32(_)
890 | SrcRef::Reg(_) => &mut [],
891 SrcRef::CBuf(cb) => match &mut cb.buf {
892 CBuf::Binding(_) | CBuf::BindlessUGPR(_) => &mut [],
893 CBuf::BindlessSSA(ssa) => ssa.deref_mut(),
894 },
895 SrcRef::SSA(ssa) => ssa.deref_mut(),
896 }
897 .iter_mut()
898 }
899 }
900
901 impl From<bool> for SrcRef {
from(b: bool) -> SrcRef902 fn from(b: bool) -> SrcRef {
903 if b {
904 SrcRef::True
905 } else {
906 SrcRef::False
907 }
908 }
909 }
910
911 impl From<u32> for SrcRef {
from(u: u32) -> SrcRef912 fn from(u: u32) -> SrcRef {
913 if u == 0 {
914 SrcRef::Zero
915 } else {
916 SrcRef::Imm32(u)
917 }
918 }
919 }
920
921 impl From<f32> for SrcRef {
from(f: f32) -> SrcRef922 fn from(f: f32) -> SrcRef {
923 f.to_bits().into()
924 }
925 }
926
927 impl From<PrmtSel> for SrcRef {
from(sel: PrmtSel) -> SrcRef928 fn from(sel: PrmtSel) -> SrcRef {
929 u32::from(sel.0).into()
930 }
931 }
932
933 impl From<CBufRef> for SrcRef {
from(cb: CBufRef) -> SrcRef934 fn from(cb: CBufRef) -> SrcRef {
935 SrcRef::CBuf(cb)
936 }
937 }
938
939 impl From<RegRef> for SrcRef {
from(reg: RegRef) -> SrcRef940 fn from(reg: RegRef) -> SrcRef {
941 SrcRef::Reg(reg)
942 }
943 }
944
945 impl<T: Into<SSARef>> From<T> for SrcRef {
from(ssa: T) -> SrcRef946 fn from(ssa: T) -> SrcRef {
947 SrcRef::SSA(ssa.into())
948 }
949 }
950
951 impl fmt::Display for SrcRef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result952 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
953 match self {
954 SrcRef::Zero => write!(f, "rZ"),
955 SrcRef::True => write!(f, "pT"),
956 SrcRef::False => write!(f, "pF"),
957 SrcRef::Imm32(u) => write!(f, "{:#x}", u),
958 SrcRef::CBuf(c) => c.fmt(f),
959 SrcRef::SSA(v) => v.fmt(f),
960 SrcRef::Reg(r) => r.fmt(f),
961 }
962 }
963 }
964
965 #[derive(Clone, Copy, PartialEq)]
966 pub enum SrcMod {
967 None,
968 FAbs,
969 FNeg,
970 FNegAbs,
971 INeg,
972 BNot,
973 }
974
975 impl SrcMod {
is_none(&self) -> bool976 pub fn is_none(&self) -> bool {
977 matches!(self, SrcMod::None)
978 }
979
has_fabs(&self) -> bool980 pub fn has_fabs(&self) -> bool {
981 match self {
982 SrcMod::None | SrcMod::FNeg => false,
983 SrcMod::FAbs | SrcMod::FNegAbs => true,
984 _ => panic!("Not a float modifier"),
985 }
986 }
987
has_fneg(&self) -> bool988 pub fn has_fneg(&self) -> bool {
989 match self {
990 SrcMod::None | SrcMod::FAbs => false,
991 SrcMod::FNeg | SrcMod::FNegAbs => true,
992 _ => panic!("Not a float modifier"),
993 }
994 }
995
is_ineg(&self) -> bool996 pub fn is_ineg(&self) -> bool {
997 match self {
998 SrcMod::None => false,
999 SrcMod::INeg => true,
1000 _ => panic!("Not an integer modifier"),
1001 }
1002 }
1003
is_bnot(&self) -> bool1004 pub fn is_bnot(&self) -> bool {
1005 match self {
1006 SrcMod::None => false,
1007 SrcMod::BNot => true,
1008 _ => panic!("Not a bitwise modifier"),
1009 }
1010 }
1011
fabs(self) -> SrcMod1012 pub fn fabs(self) -> SrcMod {
1013 match self {
1014 SrcMod::None | SrcMod::FAbs | SrcMod::FNeg | SrcMod::FNegAbs => {
1015 SrcMod::FAbs
1016 }
1017 _ => panic!("Not a float source modifier"),
1018 }
1019 }
1020
fneg(self) -> SrcMod1021 pub fn fneg(self) -> SrcMod {
1022 match self {
1023 SrcMod::None => SrcMod::FNeg,
1024 SrcMod::FAbs => SrcMod::FNegAbs,
1025 SrcMod::FNeg => SrcMod::None,
1026 SrcMod::FNegAbs => SrcMod::FAbs,
1027 _ => panic!("Not a float source modifier"),
1028 }
1029 }
1030
ineg(self) -> SrcMod1031 pub fn ineg(self) -> SrcMod {
1032 match self {
1033 SrcMod::None => SrcMod::INeg,
1034 SrcMod::INeg => SrcMod::None,
1035 _ => panic!("Not an integer source modifier"),
1036 }
1037 }
1038
bnot(self) -> SrcMod1039 pub fn bnot(self) -> SrcMod {
1040 match self {
1041 SrcMod::None => SrcMod::BNot,
1042 SrcMod::BNot => SrcMod::None,
1043 _ => panic!("Not a boolean source modifier"),
1044 }
1045 }
1046
modify(self, other: SrcMod) -> SrcMod1047 pub fn modify(self, other: SrcMod) -> SrcMod {
1048 match other {
1049 SrcMod::None => self,
1050 SrcMod::FAbs => self.fabs(),
1051 SrcMod::FNeg => self.fneg(),
1052 SrcMod::FNegAbs => self.fabs().fneg(),
1053 SrcMod::INeg => self.ineg(),
1054 SrcMod::BNot => self.bnot(),
1055 }
1056 }
1057 }
1058
1059 #[derive(Clone, Copy, PartialEq)]
1060 #[allow(dead_code)]
1061 pub enum SrcSwizzle {
1062 None,
1063 Xx,
1064 Yy,
1065 }
1066
1067 impl SrcSwizzle {
is_none(&self) -> bool1068 pub fn is_none(&self) -> bool {
1069 matches!(self, SrcSwizzle::None)
1070 }
1071 }
1072
1073 impl fmt::Display for SrcSwizzle {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1074 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1075 match self {
1076 SrcSwizzle::None => Ok(()),
1077 SrcSwizzle::Xx => write!(f, ".xx"),
1078 SrcSwizzle::Yy => write!(f, ".yy"),
1079 }
1080 }
1081 }
1082
1083 #[derive(Clone, Copy, PartialEq)]
1084 pub struct Src {
1085 pub src_ref: SrcRef,
1086 pub src_mod: SrcMod,
1087 pub src_swizzle: SrcSwizzle,
1088 }
1089
1090 impl Src {
new_zero() -> Src1091 pub fn new_zero() -> Src {
1092 SrcRef::Zero.into()
1093 }
1094
new_imm_u32(u: u32) -> Src1095 pub fn new_imm_u32(u: u32) -> Src {
1096 u.into()
1097 }
1098
new_imm_bool(b: bool) -> Src1099 pub fn new_imm_bool(b: bool) -> Src {
1100 b.into()
1101 }
1102
fabs(&self) -> Src1103 pub fn fabs(&self) -> Src {
1104 Src {
1105 src_ref: self.src_ref,
1106 src_mod: self.src_mod.fabs(),
1107 src_swizzle: self.src_swizzle,
1108 }
1109 }
1110
fneg(&self) -> Src1111 pub fn fneg(&self) -> Src {
1112 Src {
1113 src_ref: self.src_ref,
1114 src_mod: self.src_mod.fneg(),
1115 src_swizzle: self.src_swizzle,
1116 }
1117 }
1118
ineg(&self) -> Src1119 pub fn ineg(&self) -> Src {
1120 Src {
1121 src_ref: self.src_ref,
1122 src_mod: self.src_mod.ineg(),
1123 src_swizzle: self.src_swizzle,
1124 }
1125 }
1126
bnot(&self) -> Src1127 pub fn bnot(&self) -> Src {
1128 Src {
1129 src_ref: self.src_ref,
1130 src_mod: self.src_mod.bnot(),
1131 src_swizzle: self.src_swizzle,
1132 }
1133 }
1134
fold_imm(&self, src_type: SrcType) -> Src1135 pub fn fold_imm(&self, src_type: SrcType) -> Src {
1136 let SrcRef::Imm32(mut u) = self.src_ref else {
1137 return *self;
1138 };
1139
1140 if self.src_mod.is_none() && self.src_swizzle.is_none() {
1141 return *self;
1142 }
1143
1144 assert!(src_type == SrcType::F16v2 || self.src_swizzle.is_none());
1145
1146 // INeg affects more than just the 32 bits of input data so it can't be
1147 // trivially folded. In fact, -imm may not be representable as a 32-bit
1148 // immediate at all.
1149 if src_type == SrcType::I32 {
1150 return *self;
1151 }
1152
1153 u = match src_type {
1154 SrcType::F16 => {
1155 let low = u & 0xFFFF;
1156
1157 match self.src_mod {
1158 SrcMod::None => low,
1159 SrcMod::FAbs => low & !(1_u32 << 15),
1160 SrcMod::FNeg => low ^ (1_u32 << 15),
1161 SrcMod::FNegAbs => low | (1_u32 << 15),
1162 _ => panic!("Not a float source modifier"),
1163 }
1164 }
1165 SrcType::F16v2 => {
1166 let u = match self.src_swizzle {
1167 SrcSwizzle::None => u,
1168 SrcSwizzle::Xx => (u << 16) | (u & 0xffff),
1169 SrcSwizzle::Yy => (u & 0xffff0000) | (u >> 16),
1170 };
1171
1172 match self.src_mod {
1173 SrcMod::None => u,
1174 SrcMod::FAbs => u & 0x7FFF7FFF,
1175 SrcMod::FNeg => u ^ 0x80008000,
1176 SrcMod::FNegAbs => u | 0x80008000,
1177 _ => panic!("Not a float source modifier"),
1178 }
1179 }
1180 SrcType::F32 | SrcType::F64 => match self.src_mod {
1181 SrcMod::None => u,
1182 SrcMod::FAbs => u & !(1_u32 << 31),
1183 SrcMod::FNeg => u ^ (1_u32 << 31),
1184 SrcMod::FNegAbs => u | (1_u32 << 31),
1185 _ => panic!("Not a float source modifier"),
1186 },
1187 SrcType::I32 => match self.src_mod {
1188 SrcMod::None => u,
1189 SrcMod::INeg => -(u as i32) as u32,
1190 _ => panic!("Not an integer source modifier"),
1191 },
1192 SrcType::B32 => match self.src_mod {
1193 SrcMod::None => u,
1194 SrcMod::BNot => !u,
1195 _ => panic!("Not a bitwise source modifier"),
1196 },
1197 _ => {
1198 assert!(self.src_mod.is_none());
1199 u
1200 }
1201 };
1202
1203 Src {
1204 src_mod: SrcMod::None,
1205 src_ref: u.into(),
1206 src_swizzle: SrcSwizzle::None,
1207 }
1208 }
1209
as_ssa(&self) -> Option<&SSARef>1210 pub fn as_ssa(&self) -> Option<&SSARef> {
1211 if self.src_mod.is_none() {
1212 self.src_ref.as_ssa()
1213 } else {
1214 None
1215 }
1216 }
1217
as_bool(&self) -> Option<bool>1218 pub fn as_bool(&self) -> Option<bool> {
1219 match self.src_ref {
1220 SrcRef::True => Some(!self.src_mod.is_bnot()),
1221 SrcRef::False => Some(self.src_mod.is_bnot()),
1222 SrcRef::SSA(vec) => {
1223 assert!(vec.is_predicate() && vec.comps() == 1);
1224 None
1225 }
1226 SrcRef::Reg(reg) => {
1227 assert!(reg.is_predicate() && reg.comps() == 1);
1228 None
1229 }
1230 _ => panic!("Not a boolean source"),
1231 }
1232 }
1233
as_u32(&self) -> Option<u32>1234 pub fn as_u32(&self) -> Option<u32> {
1235 if self.src_mod.is_none() {
1236 self.src_ref.as_u32()
1237 } else {
1238 None
1239 }
1240 }
1241
as_imm_not_i20(&self) -> Option<u32>1242 pub fn as_imm_not_i20(&self) -> Option<u32> {
1243 match self.src_ref {
1244 SrcRef::Imm32(i) => {
1245 assert!(self.src_mod.is_none());
1246 let top = i & 0xfff80000;
1247 if top == 0 || top == 0xfff80000 {
1248 None
1249 } else {
1250 Some(i)
1251 }
1252 }
1253 _ => None,
1254 }
1255 }
1256
as_imm_not_f20(&self) -> Option<u32>1257 pub fn as_imm_not_f20(&self) -> Option<u32> {
1258 match self.src_ref {
1259 SrcRef::Imm32(i) => {
1260 assert!(self.src_mod.is_none());
1261 if (i & 0xfff) == 0 {
1262 None
1263 } else {
1264 Some(i)
1265 }
1266 }
1267 _ => None,
1268 }
1269 }
1270
iter_ssa(&self) -> slice::Iter<'_, SSAValue>1271 pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
1272 self.src_ref.iter_ssa()
1273 }
1274
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>1275 pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
1276 self.src_ref.iter_ssa_mut()
1277 }
1278
is_uniform(&self) -> bool1279 pub fn is_uniform(&self) -> bool {
1280 match self.src_ref {
1281 SrcRef::Zero
1282 | SrcRef::True
1283 | SrcRef::False
1284 | SrcRef::Imm32(_)
1285 | SrcRef::CBuf(_) => true,
1286 SrcRef::SSA(ssa) => ssa.is_uniform(),
1287 SrcRef::Reg(reg) => reg.is_uniform(),
1288 }
1289 }
1290
is_predicate(&self) -> bool1291 pub fn is_predicate(&self) -> bool {
1292 self.src_ref.is_predicate()
1293 }
1294
is_zero(&self) -> bool1295 pub fn is_zero(&self) -> bool {
1296 match self.src_ref {
1297 SrcRef::Zero | SrcRef::Imm32(0) => match self.src_mod {
1298 SrcMod::None | SrcMod::FAbs => true,
1299 SrcMod::FNeg | SrcMod::FNegAbs | SrcMod::BNot => false,
1300 // INeg affects more than just the 32 bits of input data so -0
1301 // may not be equivalent to 0.
1302 SrcMod::INeg => false,
1303 },
1304 _ => false,
1305 }
1306 }
1307
is_fneg_zero(&self, src_type: SrcType) -> bool1308 pub fn is_fneg_zero(&self, src_type: SrcType) -> bool {
1309 match self.fold_imm(src_type).src_ref {
1310 SrcRef::Imm32(0x00008000) => src_type == SrcType::F16,
1311 SrcRef::Imm32(0x80000000) => src_type == SrcType::F32,
1312 SrcRef::Imm32(0x80008000) => src_type == SrcType::F16v2,
1313 _ => false,
1314 }
1315 }
1316
1317 #[allow(dead_code)]
supports_type(&self, src_type: &SrcType) -> bool1318 pub fn supports_type(&self, src_type: &SrcType) -> bool {
1319 match src_type {
1320 SrcType::SSA => {
1321 if !self.src_mod.is_none() {
1322 return false;
1323 }
1324
1325 matches!(self.src_ref, SrcRef::SSA(_) | SrcRef::Reg(_))
1326 }
1327 SrcType::GPR => {
1328 if !self.src_mod.is_none() {
1329 return false;
1330 }
1331
1332 matches!(
1333 self.src_ref,
1334 SrcRef::Zero | SrcRef::SSA(_) | SrcRef::Reg(_)
1335 )
1336 }
1337 SrcType::ALU => self.src_mod.is_none() && self.src_ref.is_alu(),
1338 SrcType::F16 | SrcType::F32 | SrcType::F64 | SrcType::F16v2 => {
1339 match self.src_mod {
1340 SrcMod::None
1341 | SrcMod::FAbs
1342 | SrcMod::FNeg
1343 | SrcMod::FNegAbs => (),
1344 _ => return false,
1345 }
1346
1347 self.src_ref.is_alu()
1348 }
1349 SrcType::I32 => {
1350 match self.src_mod {
1351 SrcMod::None | SrcMod::INeg => (),
1352 _ => return false,
1353 }
1354
1355 self.src_ref.is_alu()
1356 }
1357 SrcType::B32 => {
1358 match self.src_mod {
1359 SrcMod::None | SrcMod::BNot => (),
1360 _ => return false,
1361 }
1362
1363 self.src_ref.is_alu()
1364 }
1365 SrcType::Pred => {
1366 match self.src_mod {
1367 SrcMod::None | SrcMod::BNot => (),
1368 _ => return false,
1369 }
1370
1371 self.src_ref.is_predicate()
1372 }
1373 SrcType::Carry => self.src_mod.is_none() && self.src_ref.is_carry(),
1374 SrcType::Bar => self.src_mod.is_none() && self.src_ref.is_barrier(),
1375 }
1376 }
1377 }
1378
1379 impl<T: Into<SrcRef>> From<T> for Src {
from(value: T) -> Src1380 fn from(value: T) -> Src {
1381 Src {
1382 src_ref: value.into(),
1383 src_mod: SrcMod::None,
1384 src_swizzle: SrcSwizzle::None,
1385 }
1386 }
1387 }
1388
1389 impl fmt::Display for Src {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1390 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1391 match self.src_mod {
1392 SrcMod::None => write!(f, "{}{}", self.src_ref, self.src_swizzle),
1393 SrcMod::FAbs => write!(f, "|{}{}|", self.src_ref, self.src_swizzle),
1394 SrcMod::FNeg => write!(f, "-{}{}", self.src_ref, self.src_swizzle),
1395 SrcMod::FNegAbs => {
1396 write!(f, "-|{}{}|", self.src_ref, self.src_swizzle)
1397 }
1398 SrcMod::INeg => write!(f, "-{}{}", self.src_ref, self.src_swizzle),
1399 SrcMod::BNot => write!(f, "!{}{}", self.src_ref, self.src_swizzle),
1400 }
1401 }
1402 }
1403
1404 #[repr(u8)]
1405 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
1406 pub enum SrcType {
1407 SSA,
1408 GPR,
1409 ALU,
1410 F16,
1411 F16v2,
1412 F32,
1413 F64,
1414 I32,
1415 B32,
1416 Pred,
1417 Carry,
1418 Bar,
1419 }
1420
1421 impl SrcType {
1422 const DEFAULT: SrcType = SrcType::GPR;
1423 }
1424
1425 pub type SrcTypeList = AttrList<SrcType>;
1426
1427 pub trait SrcsAsSlice: AsSlice<Src, Attr = SrcType> {
srcs_as_slice(&self) -> &[Src]1428 fn srcs_as_slice(&self) -> &[Src] {
1429 self.as_slice()
1430 }
1431
srcs_as_mut_slice(&mut self) -> &mut [Src]1432 fn srcs_as_mut_slice(&mut self) -> &mut [Src] {
1433 self.as_mut_slice()
1434 }
1435
src_types(&self) -> SrcTypeList1436 fn src_types(&self) -> SrcTypeList {
1437 self.attrs()
1438 }
1439
src_idx(&self, src: &Src) -> usize1440 fn src_idx(&self, src: &Src) -> usize {
1441 let r = self.srcs_as_slice().as_ptr_range();
1442 assert!(r.contains(&(src as *const Src)));
1443 unsafe { (src as *const Src).offset_from(r.start) as usize }
1444 }
1445 }
1446
1447 impl<T: AsSlice<Src, Attr = SrcType>> SrcsAsSlice for T {}
1448
all_dsts_uniform(dsts: &[Dst]) -> bool1449 fn all_dsts_uniform(dsts: &[Dst]) -> bool {
1450 let mut uniform = None;
1451 for dst in dsts {
1452 let dst_uniform = match dst {
1453 Dst::None => continue,
1454 Dst::Reg(r) => r.is_uniform(),
1455 Dst::SSA(r) => r.file().unwrap().is_uniform(),
1456 };
1457 assert!(uniform == None || uniform == Some(dst_uniform));
1458 uniform = Some(dst_uniform);
1459 }
1460 uniform == Some(true)
1461 }
1462
1463 #[repr(u8)]
1464 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
1465 pub enum DstType {
1466 Pred,
1467 GPR,
1468 F16,
1469 F16v2,
1470 F32,
1471 F64,
1472 Carry,
1473 Bar,
1474 Vec,
1475 }
1476
1477 impl DstType {
1478 const DEFAULT: DstType = DstType::Vec;
1479 }
1480
1481 pub type DstTypeList = AttrList<DstType>;
1482
1483 pub trait DstsAsSlice: AsSlice<Dst, Attr = DstType> {
dsts_as_slice(&self) -> &[Dst]1484 fn dsts_as_slice(&self) -> &[Dst] {
1485 self.as_slice()
1486 }
1487
dsts_as_mut_slice(&mut self) -> &mut [Dst]1488 fn dsts_as_mut_slice(&mut self) -> &mut [Dst] {
1489 self.as_mut_slice()
1490 }
1491
1492 // Currently only used by test code
1493 #[allow(dead_code)]
dst_types(&self) -> DstTypeList1494 fn dst_types(&self) -> DstTypeList {
1495 self.attrs()
1496 }
1497
dst_idx(&self, dst: &Dst) -> usize1498 fn dst_idx(&self, dst: &Dst) -> usize {
1499 let r = self.dsts_as_slice().as_ptr_range();
1500 assert!(r.contains(&(dst as *const Dst)));
1501 unsafe { (dst as *const Dst).offset_from(r.start) as usize }
1502 }
1503 }
1504
1505 impl<T: AsSlice<Dst, Attr = DstType>> DstsAsSlice for T {}
1506
1507 pub trait IsUniform {
is_uniform(&self) -> bool1508 fn is_uniform(&self) -> bool;
1509 }
1510
1511 impl<T: DstsAsSlice> IsUniform for T {
is_uniform(&self) -> bool1512 fn is_uniform(&self) -> bool {
1513 all_dsts_uniform(self.dsts_as_slice())
1514 }
1515 }
1516
fmt_dst_slice(f: &mut fmt::Formatter<'_>, dsts: &[Dst]) -> fmt::Result1517 fn fmt_dst_slice(f: &mut fmt::Formatter<'_>, dsts: &[Dst]) -> fmt::Result {
1518 if dsts.is_empty() {
1519 return Ok(());
1520 }
1521
1522 // Figure out the last non-null dst
1523 //
1524 // Note: By making the top inclusive and starting at 0, we ensure that
1525 // at least one dst always gets printed.
1526 let mut last_dst = 0;
1527 for (i, dst) in dsts.iter().enumerate() {
1528 if !dst.is_none() {
1529 last_dst = i;
1530 }
1531 }
1532
1533 for i in 0..(last_dst + 1) {
1534 if i != 0 {
1535 write!(f, " ")?;
1536 }
1537 write!(f, "{}", &dsts[i])?;
1538 }
1539 Ok(())
1540 }
1541
1542 #[allow(dead_code)]
1543 #[derive(Clone, Copy)]
1544 pub enum FoldData {
1545 Pred(bool),
1546 Carry(bool),
1547 U32(u32),
1548 Vec2([u32; 2]),
1549 }
1550
1551 pub struct OpFoldData<'a> {
1552 pub dsts: &'a mut [FoldData],
1553 pub srcs: &'a [FoldData],
1554 }
1555
1556 impl OpFoldData<'_> {
get_pred_src(&self, op: &impl SrcsAsSlice, src: &Src) -> bool1557 pub fn get_pred_src(&self, op: &impl SrcsAsSlice, src: &Src) -> bool {
1558 let i = op.src_idx(src);
1559 let b = match src.src_ref {
1560 SrcRef::Zero | SrcRef::Imm32(_) => panic!("Expected a predicate"),
1561 SrcRef::True => true,
1562 SrcRef::False => false,
1563 _ => {
1564 if let FoldData::Pred(b) = self.srcs[i] {
1565 b
1566 } else {
1567 panic!("FoldData is not a predicate");
1568 }
1569 }
1570 };
1571 b ^ src.src_mod.is_bnot()
1572 }
1573
get_u32_src(&self, op: &impl SrcsAsSlice, src: &Src) -> u321574 pub fn get_u32_src(&self, op: &impl SrcsAsSlice, src: &Src) -> u32 {
1575 let i = op.src_idx(src);
1576 match src.src_ref {
1577 SrcRef::Zero => 0,
1578 SrcRef::Imm32(imm) => imm,
1579 SrcRef::True | SrcRef::False => panic!("Unexpected predicate"),
1580 _ => {
1581 if let FoldData::U32(u) = self.srcs[i] {
1582 u
1583 } else {
1584 panic!("FoldData is not a U32");
1585 }
1586 }
1587 }
1588 }
1589
get_u32_bnot_src(&self, op: &impl SrcsAsSlice, src: &Src) -> u321590 pub fn get_u32_bnot_src(&self, op: &impl SrcsAsSlice, src: &Src) -> u32 {
1591 let x = self.get_u32_src(op, src);
1592 if src.src_mod.is_bnot() {
1593 !x
1594 } else {
1595 x
1596 }
1597 }
1598
get_carry_src(&self, op: &impl SrcsAsSlice, src: &Src) -> bool1599 pub fn get_carry_src(&self, op: &impl SrcsAsSlice, src: &Src) -> bool {
1600 assert!(src.src_ref.as_ssa().is_some());
1601 let i = op.src_idx(src);
1602 if let FoldData::Carry(b) = self.srcs[i] {
1603 b
1604 } else {
1605 panic!("FoldData is not a predicate");
1606 }
1607 }
1608
1609 #[allow(dead_code)]
get_f32_src(&self, op: &impl SrcsAsSlice, src: &Src) -> f321610 pub fn get_f32_src(&self, op: &impl SrcsAsSlice, src: &Src) -> f32 {
1611 f32::from_bits(self.get_u32_src(op, src))
1612 }
1613
1614 #[allow(dead_code)]
get_f64_src(&self, op: &impl SrcsAsSlice, src: &Src) -> f641615 pub fn get_f64_src(&self, op: &impl SrcsAsSlice, src: &Src) -> f64 {
1616 let i = op.src_idx(src);
1617 match src.src_ref {
1618 SrcRef::Zero => 0.0,
1619 SrcRef::Imm32(imm) => f64::from_bits(u64::from(imm) << 32),
1620 SrcRef::True | SrcRef::False => panic!("Unexpected predicate"),
1621 _ => {
1622 if let FoldData::Vec2(v) = self.srcs[i] {
1623 let u = u64::from(v[0]) | (u64::from(v[1]) << 32);
1624 f64::from_bits(u)
1625 } else {
1626 panic!("FoldData is not a U32");
1627 }
1628 }
1629 }
1630 }
1631
set_pred_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, b: bool)1632 pub fn set_pred_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, b: bool) {
1633 self.dsts[op.dst_idx(dst)] = FoldData::Pred(b);
1634 }
1635
set_carry_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, b: bool)1636 pub fn set_carry_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, b: bool) {
1637 self.dsts[op.dst_idx(dst)] = FoldData::Carry(b);
1638 }
1639
set_u32_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, u: u32)1640 pub fn set_u32_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, u: u32) {
1641 self.dsts[op.dst_idx(dst)] = FoldData::U32(u);
1642 }
1643
1644 #[allow(dead_code)]
set_f32_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, f: f32)1645 pub fn set_f32_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, f: f32) {
1646 self.set_u32_dst(op, dst, f.to_bits());
1647 }
1648
1649 #[allow(dead_code)]
set_f64_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, f: f64)1650 pub fn set_f64_dst(&mut self, op: &impl DstsAsSlice, dst: &Dst, f: f64) {
1651 let u = f.to_bits();
1652 let v = [u as u32, (u >> 32) as u32];
1653 self.dsts[op.dst_idx(dst)] = FoldData::Vec2(v);
1654 }
1655 }
1656
1657 pub trait Foldable: SrcsAsSlice + DstsAsSlice {
1658 // Currently only used by test code
1659 #[allow(dead_code)]
fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)1660 fn fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>);
1661 }
1662
1663 pub trait DisplayOp: DstsAsSlice {
fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1664 fn fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1665 fmt_dst_slice(f, self.dsts_as_slice())
1666 }
1667
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1668 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result;
1669 }
1670
1671 // Hack struct so we can re-use Formatters. Shamelessly stolen from
1672 // https://users.rust-lang.org/t/reusing-an-fmt-formatter/8531/4
1673 pub struct Fmt<F>(pub F)
1674 where
1675 F: Fn(&mut fmt::Formatter) -> fmt::Result;
1676
1677 impl<F> fmt::Display for Fmt<F>
1678 where
1679 F: Fn(&mut fmt::Formatter) -> fmt::Result,
1680 {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result1681 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1682 (self.0)(f)
1683 }
1684 }
1685
1686 macro_rules! impl_display_for_op {
1687 ($op: ident) => {
1688 impl fmt::Display for $op {
1689 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1690 let mut s = String::new();
1691 write!(s, "{}", Fmt(|f| self.fmt_dsts(f)))?;
1692 if !s.is_empty() {
1693 write!(f, "{} = ", s)?;
1694 }
1695 self.fmt_op(f)
1696 }
1697 }
1698 };
1699 }
1700
1701 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1702 pub enum PredSetOp {
1703 And,
1704 Or,
1705 Xor,
1706 }
1707
1708 impl PredSetOp {
eval(&self, a: bool, b: bool) -> bool1709 pub fn eval(&self, a: bool, b: bool) -> bool {
1710 match self {
1711 PredSetOp::And => a & b,
1712 PredSetOp::Or => a | b,
1713 PredSetOp::Xor => a ^ b,
1714 }
1715 }
1716
is_trivial(&self, accum: &Src) -> bool1717 pub fn is_trivial(&self, accum: &Src) -> bool {
1718 if let Some(b) = accum.as_bool() {
1719 match self {
1720 PredSetOp::And => b,
1721 PredSetOp::Or => !b,
1722 PredSetOp::Xor => !b,
1723 }
1724 } else {
1725 false
1726 }
1727 }
1728 }
1729
1730 impl fmt::Display for PredSetOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1731 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1732 match self {
1733 PredSetOp::And => write!(f, ".and"),
1734 PredSetOp::Or => write!(f, ".or"),
1735 PredSetOp::Xor => write!(f, ".xor"),
1736 }
1737 }
1738 }
1739
1740 #[allow(dead_code)]
1741 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1742 pub enum FloatCmpOp {
1743 OrdEq,
1744 OrdNe,
1745 OrdLt,
1746 OrdLe,
1747 OrdGt,
1748 OrdGe,
1749 UnordEq,
1750 UnordNe,
1751 UnordLt,
1752 UnordLe,
1753 UnordGt,
1754 UnordGe,
1755 IsNum,
1756 IsNan,
1757 }
1758
1759 impl FloatCmpOp {
flip(self) -> FloatCmpOp1760 pub fn flip(self) -> FloatCmpOp {
1761 match self {
1762 FloatCmpOp::OrdEq | FloatCmpOp::OrdNe => self,
1763 FloatCmpOp::OrdLt => FloatCmpOp::OrdGt,
1764 FloatCmpOp::OrdLe => FloatCmpOp::OrdGe,
1765 FloatCmpOp::OrdGt => FloatCmpOp::OrdLt,
1766 FloatCmpOp::OrdGe => FloatCmpOp::OrdLe,
1767 FloatCmpOp::UnordEq | FloatCmpOp::UnordNe => self,
1768 FloatCmpOp::UnordLt => FloatCmpOp::UnordGt,
1769 FloatCmpOp::UnordLe => FloatCmpOp::UnordGe,
1770 FloatCmpOp::UnordGt => FloatCmpOp::UnordLt,
1771 FloatCmpOp::UnordGe => FloatCmpOp::UnordLe,
1772 FloatCmpOp::IsNum | FloatCmpOp::IsNan => panic!("Cannot flip unop"),
1773 }
1774 }
1775 }
1776
1777 impl fmt::Display for FloatCmpOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1778 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1779 match self {
1780 FloatCmpOp::OrdEq => write!(f, ".eq"),
1781 FloatCmpOp::OrdNe => write!(f, ".ne"),
1782 FloatCmpOp::OrdLt => write!(f, ".lt"),
1783 FloatCmpOp::OrdLe => write!(f, ".le"),
1784 FloatCmpOp::OrdGt => write!(f, ".gt"),
1785 FloatCmpOp::OrdGe => write!(f, ".ge"),
1786 FloatCmpOp::UnordEq => write!(f, ".equ"),
1787 FloatCmpOp::UnordNe => write!(f, ".neu"),
1788 FloatCmpOp::UnordLt => write!(f, ".ltu"),
1789 FloatCmpOp::UnordLe => write!(f, ".leu"),
1790 FloatCmpOp::UnordGt => write!(f, ".gtu"),
1791 FloatCmpOp::UnordGe => write!(f, ".geu"),
1792 FloatCmpOp::IsNum => write!(f, ".num"),
1793 FloatCmpOp::IsNan => write!(f, ".nan"),
1794 }
1795 }
1796 }
1797
1798 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1799 pub enum IntCmpOp {
1800 Eq,
1801 Ne,
1802 Lt,
1803 Le,
1804 Gt,
1805 Ge,
1806 }
1807
1808 impl IntCmpOp {
flip(self) -> IntCmpOp1809 pub fn flip(self) -> IntCmpOp {
1810 match self {
1811 IntCmpOp::Eq | IntCmpOp::Ne => self,
1812 IntCmpOp::Lt => IntCmpOp::Gt,
1813 IntCmpOp::Le => IntCmpOp::Ge,
1814 IntCmpOp::Gt => IntCmpOp::Lt,
1815 IntCmpOp::Ge => IntCmpOp::Le,
1816 }
1817 }
1818 }
1819
1820 impl fmt::Display for IntCmpOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1821 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1822 match self {
1823 IntCmpOp::Eq => write!(f, ".eq"),
1824 IntCmpOp::Ne => write!(f, ".ne"),
1825 IntCmpOp::Lt => write!(f, ".lt"),
1826 IntCmpOp::Le => write!(f, ".le"),
1827 IntCmpOp::Gt => write!(f, ".gt"),
1828 IntCmpOp::Ge => write!(f, ".ge"),
1829 }
1830 }
1831 }
1832
1833 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1834 pub enum IntCmpType {
1835 U32,
1836 I32,
1837 }
1838
1839 impl IntCmpType {
1840 #[allow(dead_code)]
is_signed(&self) -> bool1841 pub fn is_signed(&self) -> bool {
1842 match self {
1843 IntCmpType::U32 => false,
1844 IntCmpType::I32 => true,
1845 }
1846 }
1847 }
1848
1849 impl fmt::Display for IntCmpType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1850 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1851 match self {
1852 IntCmpType::U32 => write!(f, ".u32"),
1853 IntCmpType::I32 => write!(f, ".i32"),
1854 }
1855 }
1856 }
1857
1858 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1859 pub enum LogicOp2 {
1860 And,
1861 Or,
1862 Xor,
1863 PassB,
1864 }
1865
1866 impl fmt::Display for LogicOp2 {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1867 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1868 match self {
1869 LogicOp2::And => write!(f, "and"),
1870 LogicOp2::Or => write!(f, "or"),
1871 LogicOp2::Xor => write!(f, "xor"),
1872 LogicOp2::PassB => write!(f, "pass_b"),
1873 }
1874 }
1875 }
1876
1877 impl LogicOp2 {
to_lut(self) -> LogicOp31878 pub fn to_lut(self) -> LogicOp3 {
1879 match self {
1880 LogicOp2::And => LogicOp3::new_lut(&|x, y, _| x & y),
1881 LogicOp2::Or => LogicOp3::new_lut(&|x, y, _| x | y),
1882 LogicOp2::Xor => LogicOp3::new_lut(&|x, y, _| x ^ y),
1883 LogicOp2::PassB => LogicOp3::new_lut(&|_, b, _| b),
1884 }
1885 }
1886 }
1887
1888 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1889 pub struct LogicOp3 {
1890 pub lut: u8,
1891 }
1892
1893 impl LogicOp3 {
1894 pub const SRC_MASKS: [u8; 3] = [0xf0, 0xcc, 0xaa];
1895
1896 #[inline]
new_lut<F: Fn(u8, u8, u8) -> u8>(f: &F) -> LogicOp31897 pub fn new_lut<F: Fn(u8, u8, u8) -> u8>(f: &F) -> LogicOp3 {
1898 LogicOp3 {
1899 lut: f(
1900 LogicOp3::SRC_MASKS[0],
1901 LogicOp3::SRC_MASKS[1],
1902 LogicOp3::SRC_MASKS[2],
1903 ),
1904 }
1905 }
1906
new_const(val: bool) -> LogicOp31907 pub fn new_const(val: bool) -> LogicOp3 {
1908 LogicOp3 {
1909 lut: if val { !0 } else { 0 },
1910 }
1911 }
1912
src_used(&self, src_idx: usize) -> bool1913 pub fn src_used(&self, src_idx: usize) -> bool {
1914 let mask = LogicOp3::SRC_MASKS[src_idx];
1915 let shift = LogicOp3::SRC_MASKS[src_idx].trailing_zeros();
1916 self.lut & !mask != (self.lut >> shift) & !mask
1917 }
1918
fix_src(&mut self, src_idx: usize, val: bool)1919 pub fn fix_src(&mut self, src_idx: usize, val: bool) {
1920 let mask = LogicOp3::SRC_MASKS[src_idx];
1921 let shift = LogicOp3::SRC_MASKS[src_idx].trailing_zeros();
1922 if val {
1923 let t_bits = self.lut & mask;
1924 self.lut = t_bits | (t_bits >> shift)
1925 } else {
1926 let f_bits = self.lut & !mask;
1927 self.lut = (f_bits << shift) | f_bits
1928 };
1929 }
1930
invert_src(&mut self, src_idx: usize)1931 pub fn invert_src(&mut self, src_idx: usize) {
1932 let mask = LogicOp3::SRC_MASKS[src_idx];
1933 let shift = LogicOp3::SRC_MASKS[src_idx].trailing_zeros();
1934 let t_bits = self.lut & mask;
1935 let f_bits = self.lut & !mask;
1936 self.lut = (f_bits << shift) | (t_bits >> shift);
1937 }
1938
eval< T: BitAnd<Output = T> + BitOr<Output = T> + Copy + Not<Output = T>, >( &self, x: T, y: T, z: T, ) -> T1939 pub fn eval<
1940 T: BitAnd<Output = T> + BitOr<Output = T> + Copy + Not<Output = T>,
1941 >(
1942 &self,
1943 x: T,
1944 y: T,
1945 z: T,
1946 ) -> T {
1947 let mut res = x & !x; // zero
1948 if (self.lut & (1 << 0)) != 0 {
1949 res = res | (!x & !y & !z);
1950 }
1951 if (self.lut & (1 << 1)) != 0 {
1952 res = res | (!x & !y & z);
1953 }
1954 if (self.lut & (1 << 2)) != 0 {
1955 res = res | (!x & y & !z);
1956 }
1957 if (self.lut & (1 << 3)) != 0 {
1958 res = res | (!x & y & z);
1959 }
1960 if (self.lut & (1 << 4)) != 0 {
1961 res = res | (x & !y & !z);
1962 }
1963 if (self.lut & (1 << 5)) != 0 {
1964 res = res | (x & !y & z);
1965 }
1966 if (self.lut & (1 << 6)) != 0 {
1967 res = res | (x & y & !z);
1968 }
1969 if (self.lut & (1 << 7)) != 0 {
1970 res = res | (x & y & z);
1971 }
1972 res
1973 }
1974 }
1975
1976 impl fmt::Display for LogicOp3 {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result1977 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1978 write!(f, "LUT[{:#x}]", self.lut)
1979 }
1980 }
1981
1982 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
1983 pub enum FloatType {
1984 F16,
1985 F32,
1986 F64,
1987 }
1988
1989 impl FloatType {
from_bits(bytes: usize) -> FloatType1990 pub fn from_bits(bytes: usize) -> FloatType {
1991 match bytes {
1992 16 => FloatType::F16,
1993 32 => FloatType::F32,
1994 64 => FloatType::F64,
1995 _ => panic!("Invalid float type size"),
1996 }
1997 }
1998
bits(&self) -> usize1999 pub fn bits(&self) -> usize {
2000 match self {
2001 FloatType::F16 => 16,
2002 FloatType::F32 => 32,
2003 FloatType::F64 => 64,
2004 }
2005 }
2006 }
2007
2008 impl fmt::Display for FloatType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2009 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2010 match self {
2011 FloatType::F16 => write!(f, ".f16"),
2012 FloatType::F32 => write!(f, ".f32"),
2013 FloatType::F64 => write!(f, ".f64"),
2014 }
2015 }
2016 }
2017
2018 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2019 pub enum FRndMode {
2020 NearestEven,
2021 NegInf,
2022 PosInf,
2023 Zero,
2024 }
2025
2026 impl fmt::Display for FRndMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2027 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2028 match self {
2029 FRndMode::NearestEven => write!(f, ".re"),
2030 FRndMode::NegInf => write!(f, ".rm"),
2031 FRndMode::PosInf => write!(f, ".rp"),
2032 FRndMode::Zero => write!(f, ".rz"),
2033 }
2034 }
2035 }
2036
2037 #[derive(Clone, Copy, Eq, PartialEq)]
2038 pub enum TexDim {
2039 _1D,
2040 Array1D,
2041 _2D,
2042 Array2D,
2043 _3D,
2044 Cube,
2045 ArrayCube,
2046 }
2047
2048 impl fmt::Display for TexDim {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2049 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2050 match self {
2051 TexDim::_1D => write!(f, ".1d"),
2052 TexDim::Array1D => write!(f, ".a1d"),
2053 TexDim::_2D => write!(f, ".2d"),
2054 TexDim::Array2D => write!(f, ".a2d"),
2055 TexDim::_3D => write!(f, ".3d"),
2056 TexDim::Cube => write!(f, ".cube"),
2057 TexDim::ArrayCube => write!(f, ".acube"),
2058 }
2059 }
2060 }
2061
2062 #[derive(Clone, Copy, Eq, PartialEq)]
2063 pub enum TexLodMode {
2064 Auto,
2065 Zero,
2066 Bias,
2067 Lod,
2068 Clamp,
2069 BiasClamp,
2070 }
2071
2072 impl fmt::Display for TexLodMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2073 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2074 match self {
2075 TexLodMode::Auto => write!(f, "la"),
2076 TexLodMode::Zero => write!(f, "lz"),
2077 TexLodMode::Bias => write!(f, "lb"),
2078 TexLodMode::Lod => write!(f, "ll"),
2079 TexLodMode::Clamp => write!(f, "lc"),
2080 TexLodMode::BiasClamp => write!(f, "lb.lc"),
2081 }
2082 }
2083 }
2084
2085 #[derive(Clone, Copy, Eq, PartialEq)]
2086 pub enum Tld4OffsetMode {
2087 None,
2088 AddOffI,
2089 PerPx,
2090 }
2091
2092 impl fmt::Display for Tld4OffsetMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2093 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2094 match self {
2095 Tld4OffsetMode::None => write!(f, "no_off"),
2096 Tld4OffsetMode::AddOffI => write!(f, "aoffi"),
2097 Tld4OffsetMode::PerPx => write!(f, "ptp"),
2098 }
2099 }
2100 }
2101
2102 #[allow(dead_code)]
2103 #[derive(Clone, Copy, Eq, PartialEq)]
2104 pub enum TexQuery {
2105 Dimension,
2106 TextureType,
2107 SamplerPos,
2108 }
2109
2110 impl fmt::Display for TexQuery {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2111 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2112 match self {
2113 TexQuery::Dimension => write!(f, "dimension"),
2114 TexQuery::TextureType => write!(f, "texture_type"),
2115 TexQuery::SamplerPos => write!(f, "sampler_pos"),
2116 }
2117 }
2118 }
2119
2120 #[derive(Clone, Copy, Eq, PartialEq)]
2121 pub enum ImageDim {
2122 _1D,
2123 _1DBuffer,
2124 _1DArray,
2125 _2D,
2126 _2DArray,
2127 _3D,
2128 }
2129
2130 impl ImageDim {
coord_comps(&self) -> u82131 pub fn coord_comps(&self) -> u8 {
2132 match self {
2133 ImageDim::_1D => 1,
2134 ImageDim::_1DBuffer => 1,
2135 ImageDim::_1DArray => 2,
2136 ImageDim::_2D => 2,
2137 ImageDim::_2DArray => 3,
2138 ImageDim::_3D => 3,
2139 }
2140 }
2141 }
2142
2143 impl fmt::Display for ImageDim {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2144 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2145 match self {
2146 ImageDim::_1D => write!(f, ".1d"),
2147 ImageDim::_1DBuffer => write!(f, ".buf"),
2148 ImageDim::_1DArray => write!(f, ".a1d"),
2149 ImageDim::_2D => write!(f, ".2d"),
2150 ImageDim::_2DArray => write!(f, ".a2d"),
2151 ImageDim::_3D => write!(f, ".3d"),
2152 }
2153 }
2154 }
2155
2156 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
2157 pub enum IntType {
2158 U8,
2159 I8,
2160 U16,
2161 I16,
2162 U32,
2163 I32,
2164 U64,
2165 I64,
2166 }
2167
2168 impl IntType {
from_bits(bits: usize, is_signed: bool) -> IntType2169 pub fn from_bits(bits: usize, is_signed: bool) -> IntType {
2170 match bits {
2171 8 => {
2172 if is_signed {
2173 IntType::I8
2174 } else {
2175 IntType::U8
2176 }
2177 }
2178 16 => {
2179 if is_signed {
2180 IntType::I16
2181 } else {
2182 IntType::U16
2183 }
2184 }
2185 32 => {
2186 if is_signed {
2187 IntType::I32
2188 } else {
2189 IntType::U32
2190 }
2191 }
2192 64 => {
2193 if is_signed {
2194 IntType::I64
2195 } else {
2196 IntType::U64
2197 }
2198 }
2199 _ => panic!("Invalid integer type size"),
2200 }
2201 }
2202
is_signed(&self) -> bool2203 pub fn is_signed(&self) -> bool {
2204 match self {
2205 IntType::U8 | IntType::U16 | IntType::U32 | IntType::U64 => false,
2206 IntType::I8 | IntType::I16 | IntType::I32 | IntType::I64 => true,
2207 }
2208 }
2209
bits(&self) -> usize2210 pub fn bits(&self) -> usize {
2211 match self {
2212 IntType::U8 | IntType::I8 => 8,
2213 IntType::U16 | IntType::I16 => 16,
2214 IntType::U32 | IntType::I32 => 32,
2215 IntType::U64 | IntType::I64 => 64,
2216 }
2217 }
2218 }
2219
2220 impl fmt::Display for IntType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2221 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2222 match self {
2223 IntType::U8 => write!(f, ".u8"),
2224 IntType::I8 => write!(f, ".i8"),
2225 IntType::U16 => write!(f, ".u16"),
2226 IntType::I16 => write!(f, ".i16"),
2227 IntType::U32 => write!(f, ".u32"),
2228 IntType::I32 => write!(f, ".i32"),
2229 IntType::U64 => write!(f, ".u64"),
2230 IntType::I64 => write!(f, ".i64"),
2231 }
2232 }
2233 }
2234
2235 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2236 pub enum MemAddrType {
2237 A32,
2238 A64,
2239 }
2240
2241 impl fmt::Display for MemAddrType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2242 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2243 match self {
2244 MemAddrType::A32 => write!(f, ".a32"),
2245 MemAddrType::A64 => write!(f, ".a64"),
2246 }
2247 }
2248 }
2249
2250 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2251 pub enum MemType {
2252 U8,
2253 I8,
2254 U16,
2255 I16,
2256 B32,
2257 B64,
2258 B128,
2259 }
2260
2261 impl MemType {
from_size(size: u8, is_signed: bool) -> MemType2262 pub fn from_size(size: u8, is_signed: bool) -> MemType {
2263 match size {
2264 1 => {
2265 if is_signed {
2266 MemType::I8
2267 } else {
2268 MemType::U8
2269 }
2270 }
2271 2 => {
2272 if is_signed {
2273 MemType::I16
2274 } else {
2275 MemType::U16
2276 }
2277 }
2278 4 => MemType::B32,
2279 8 => MemType::B64,
2280 16 => MemType::B128,
2281 _ => panic!("Invalid memory load/store size"),
2282 }
2283 }
2284
2285 #[allow(dead_code)]
bits(&self) -> usize2286 pub fn bits(&self) -> usize {
2287 match self {
2288 MemType::U8 | MemType::I8 => 8,
2289 MemType::U16 | MemType::I16 => 16,
2290 MemType::B32 => 32,
2291 MemType::B64 => 64,
2292 MemType::B128 => 128,
2293 }
2294 }
2295 }
2296
2297 impl fmt::Display for MemType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2298 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2299 match self {
2300 MemType::U8 => write!(f, ".u8"),
2301 MemType::I8 => write!(f, ".i8"),
2302 MemType::U16 => write!(f, ".u16"),
2303 MemType::I16 => write!(f, ".i16"),
2304 MemType::B32 => write!(f, ".b32"),
2305 MemType::B64 => write!(f, ".b64"),
2306 MemType::B128 => write!(f, ".b128"),
2307 }
2308 }
2309 }
2310
2311 #[allow(dead_code)]
2312 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2313 pub enum MemOrder {
2314 Constant,
2315 Weak,
2316 Strong(MemScope),
2317 }
2318
2319 impl fmt::Display for MemOrder {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2320 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2321 match self {
2322 MemOrder::Constant => write!(f, ".constant"),
2323 MemOrder::Weak => write!(f, ".weak"),
2324 MemOrder::Strong(scope) => write!(f, ".strong{}", scope),
2325 }
2326 }
2327 }
2328
2329 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2330 pub enum MemScope {
2331 CTA,
2332 GPU,
2333 System,
2334 }
2335
2336 impl fmt::Display for MemScope {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2337 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2338 match self {
2339 MemScope::CTA => write!(f, ".cta"),
2340 MemScope::GPU => write!(f, ".gpu"),
2341 MemScope::System => write!(f, ".sys"),
2342 }
2343 }
2344 }
2345
2346 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2347 pub enum MemSpace {
2348 Global(MemAddrType),
2349 Local,
2350 Shared,
2351 }
2352
2353 impl MemSpace {
addr_type(&self) -> MemAddrType2354 pub fn addr_type(&self) -> MemAddrType {
2355 match self {
2356 MemSpace::Global(t) => *t,
2357 MemSpace::Local => MemAddrType::A32,
2358 MemSpace::Shared => MemAddrType::A32,
2359 }
2360 }
2361 }
2362
2363 impl fmt::Display for MemSpace {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2364 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2365 match self {
2366 MemSpace::Global(t) => write!(f, ".global{t}"),
2367 MemSpace::Local => write!(f, ".local"),
2368 MemSpace::Shared => write!(f, ".shared"),
2369 }
2370 }
2371 }
2372
2373 #[allow(dead_code)]
2374 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2375 pub enum MemEvictionPriority {
2376 First,
2377 Normal,
2378 Last,
2379 Unchanged,
2380 }
2381
2382 impl fmt::Display for MemEvictionPriority {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2383 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2384 match self {
2385 MemEvictionPriority::First => write!(f, ".ef"),
2386 MemEvictionPriority::Normal => Ok(()),
2387 MemEvictionPriority::Last => write!(f, ".el"),
2388 MemEvictionPriority::Unchanged => write!(f, ".lu"),
2389 }
2390 }
2391 }
2392
2393 #[derive(Clone)]
2394 pub struct MemAccess {
2395 pub mem_type: MemType,
2396 pub space: MemSpace,
2397 pub order: MemOrder,
2398 pub eviction_priority: MemEvictionPriority,
2399 }
2400
2401 impl fmt::Display for MemAccess {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2402 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2403 write!(
2404 f,
2405 "{}{}{}{}",
2406 self.space, self.order, self.eviction_priority, self.mem_type,
2407 )
2408 }
2409 }
2410
2411 #[allow(dead_code)]
2412 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2413 pub enum AtomType {
2414 F16x2,
2415 U32,
2416 I32,
2417 F32,
2418 U64,
2419 I64,
2420 F64,
2421 }
2422
2423 impl AtomType {
F(bits: u8) -> AtomType2424 pub fn F(bits: u8) -> AtomType {
2425 match bits {
2426 16 => panic!("16-bit float atomics not yet supported"),
2427 32 => AtomType::F32,
2428 64 => AtomType::F64,
2429 _ => panic!("Invalid float atomic type"),
2430 }
2431 }
2432
U(bits: u8) -> AtomType2433 pub fn U(bits: u8) -> AtomType {
2434 match bits {
2435 32 => AtomType::U32,
2436 64 => AtomType::U64,
2437 _ => panic!("Invalid uint atomic type"),
2438 }
2439 }
2440
I(bits: u8) -> AtomType2441 pub fn I(bits: u8) -> AtomType {
2442 match bits {
2443 32 => AtomType::I32,
2444 64 => AtomType::I64,
2445 _ => panic!("Invalid int atomic type"),
2446 }
2447 }
2448
bits(&self) -> usize2449 pub fn bits(&self) -> usize {
2450 match self {
2451 AtomType::F16x2 | AtomType::F32 => 32,
2452 AtomType::U32 | AtomType::I32 => 32,
2453 AtomType::U64 | AtomType::I64 | AtomType::F64 => 64,
2454 }
2455 }
2456 }
2457
2458 impl fmt::Display for AtomType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2459 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2460 match self {
2461 AtomType::F16x2 => write!(f, ".f16x2"),
2462 AtomType::U32 => write!(f, ".u32"),
2463 AtomType::I32 => write!(f, ".i32"),
2464 AtomType::F32 => write!(f, ".f32"),
2465 AtomType::U64 => write!(f, ".u64"),
2466 AtomType::I64 => write!(f, ".i64"),
2467 AtomType::F64 => write!(f, ".f64"),
2468 }
2469 }
2470 }
2471
2472 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2473 pub enum AtomCmpSrc {
2474 /// The cmpr value is passed as a separate source
2475 Separate,
2476 /// The cmpr value is packed in with the data with cmpr coming first
2477 Packed,
2478 }
2479
2480 #[allow(dead_code)]
2481 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
2482 pub enum AtomOp {
2483 Add,
2484 Min,
2485 Max,
2486 Inc,
2487 Dec,
2488 And,
2489 Or,
2490 Xor,
2491 Exch,
2492 CmpExch(AtomCmpSrc),
2493 }
2494
2495 impl AtomOp {
is_reduction(&self) -> bool2496 pub fn is_reduction(&self) -> bool {
2497 match self {
2498 AtomOp::Add
2499 | AtomOp::Min
2500 | AtomOp::Max
2501 | AtomOp::Inc
2502 | AtomOp::Dec
2503 | AtomOp::And
2504 | AtomOp::Or
2505 | AtomOp::Xor => true,
2506 AtomOp::Exch | AtomOp::CmpExch(_) => false,
2507 }
2508 }
2509 }
2510
2511 impl fmt::Display for AtomOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2512 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2513 match self {
2514 AtomOp::Add => write!(f, ".add"),
2515 AtomOp::Min => write!(f, ".min"),
2516 AtomOp::Max => write!(f, ".max"),
2517 AtomOp::Inc => write!(f, ".inc"),
2518 AtomOp::Dec => write!(f, ".dec"),
2519 AtomOp::And => write!(f, ".and"),
2520 AtomOp::Or => write!(f, ".or"),
2521 AtomOp::Xor => write!(f, ".xor"),
2522 AtomOp::Exch => write!(f, ".exch"),
2523 AtomOp::CmpExch(AtomCmpSrc::Separate) => write!(f, ".cmpexch"),
2524 AtomOp::CmpExch(AtomCmpSrc::Packed) => write!(f, ".cmpexch.packed"),
2525 }
2526 }
2527 }
2528
2529 #[derive(Clone, Copy, Eq, PartialEq)]
2530 pub enum InterpFreq {
2531 Pass,
2532 PassMulW,
2533 Constant,
2534 State,
2535 }
2536
2537 impl fmt::Display for InterpFreq {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2538 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2539 match self {
2540 InterpFreq::Pass => write!(f, ".pass"),
2541 InterpFreq::PassMulW => write!(f, ".pass_mul_w"),
2542 InterpFreq::Constant => write!(f, ".constant"),
2543 InterpFreq::State => write!(f, ".state"),
2544 }
2545 }
2546 }
2547 #[derive(Clone, Copy, Eq, PartialEq)]
2548 pub enum InterpLoc {
2549 Default,
2550 Centroid,
2551 Offset,
2552 }
2553
2554 impl fmt::Display for InterpLoc {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2555 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2556 match self {
2557 InterpLoc::Default => Ok(()),
2558 InterpLoc::Centroid => write!(f, ".centroid"),
2559 InterpLoc::Offset => write!(f, ".offset"),
2560 }
2561 }
2562 }
2563
2564 pub struct AttrAccess {
2565 pub addr: u16,
2566 pub comps: u8,
2567 pub patch: bool,
2568 pub output: bool,
2569 pub phys: bool,
2570 }
2571
2572 #[repr(C)]
2573 #[derive(SrcsAsSlice, DstsAsSlice)]
2574 pub struct OpFAdd {
2575 #[dst_type(F32)]
2576 pub dst: Dst,
2577
2578 #[src_type(F32)]
2579 pub srcs: [Src; 2],
2580
2581 pub saturate: bool,
2582 pub rnd_mode: FRndMode,
2583 pub ftz: bool,
2584 }
2585
2586 impl DisplayOp for OpFAdd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2587 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2588 let sat = if self.saturate { ".sat" } else { "" };
2589 write!(f, "fadd{sat}")?;
2590 if self.rnd_mode != FRndMode::NearestEven {
2591 write!(f, "{}", self.rnd_mode)?;
2592 }
2593 if self.ftz {
2594 write!(f, ".ftz")?;
2595 }
2596 write!(f, " {} {}", self.srcs[0], self.srcs[1],)
2597 }
2598 }
2599 impl_display_for_op!(OpFAdd);
2600
2601 #[repr(C)]
2602 #[derive(SrcsAsSlice, DstsAsSlice)]
2603 pub struct OpFFma {
2604 #[dst_type(F32)]
2605 pub dst: Dst,
2606
2607 #[src_type(F32)]
2608 pub srcs: [Src; 3],
2609
2610 pub saturate: bool,
2611 pub rnd_mode: FRndMode,
2612 pub ftz: bool,
2613 pub dnz: bool,
2614 }
2615
2616 impl DisplayOp for OpFFma {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2617 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2618 let sat = if self.saturate { ".sat" } else { "" };
2619 write!(f, "ffma{sat}")?;
2620 if self.rnd_mode != FRndMode::NearestEven {
2621 write!(f, "{}", self.rnd_mode)?;
2622 }
2623 if self.dnz {
2624 write!(f, ".dnz")?;
2625 } else if self.ftz {
2626 write!(f, ".ftz")?;
2627 }
2628 write!(f, " {} {} {}", self.srcs[0], self.srcs[1], self.srcs[2])
2629 }
2630 }
2631 impl_display_for_op!(OpFFma);
2632
2633 #[repr(C)]
2634 #[derive(SrcsAsSlice, DstsAsSlice)]
2635 pub struct OpFMnMx {
2636 #[dst_type(F32)]
2637 pub dst: Dst,
2638
2639 #[src_type(F32)]
2640 pub srcs: [Src; 2],
2641
2642 #[src_type(Pred)]
2643 pub min: Src,
2644
2645 pub ftz: bool,
2646 }
2647
2648 impl DisplayOp for OpFMnMx {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2649 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2650 let ftz = if self.ftz { ".ftz" } else { "" };
2651 write!(
2652 f,
2653 "fmnmx{ftz} {} {} {}",
2654 self.srcs[0], self.srcs[1], self.min
2655 )
2656 }
2657 }
2658 impl_display_for_op!(OpFMnMx);
2659
2660 #[repr(C)]
2661 #[derive(SrcsAsSlice, DstsAsSlice)]
2662 pub struct OpFMul {
2663 #[dst_type(F32)]
2664 pub dst: Dst,
2665
2666 #[src_type(F32)]
2667 pub srcs: [Src; 2],
2668
2669 pub saturate: bool,
2670 pub rnd_mode: FRndMode,
2671 pub ftz: bool,
2672 pub dnz: bool,
2673 }
2674
2675 impl DisplayOp for OpFMul {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2676 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2677 let sat = if self.saturate { ".sat" } else { "" };
2678 write!(f, "fmul{sat}")?;
2679 if self.rnd_mode != FRndMode::NearestEven {
2680 write!(f, "{}", self.rnd_mode)?;
2681 }
2682 if self.dnz {
2683 write!(f, ".dnz")?;
2684 } else if self.ftz {
2685 write!(f, ".ftz")?;
2686 }
2687 write!(f, " {} {}", self.srcs[0], self.srcs[1],)
2688 }
2689 }
2690 impl_display_for_op!(OpFMul);
2691
2692 #[repr(C)]
2693 #[derive(SrcsAsSlice, DstsAsSlice)]
2694 pub struct OpFSet {
2695 #[dst_type(F32)]
2696 pub dst: Dst,
2697
2698 pub cmp_op: FloatCmpOp,
2699
2700 #[src_type(F32)]
2701 pub srcs: [Src; 2],
2702
2703 pub ftz: bool,
2704 }
2705
2706 impl DisplayOp for OpFSet {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2707 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2708 let ftz = if self.ftz { ".ftz" } else { "" };
2709 write!(
2710 f,
2711 "fset{}{ftz} {} {}",
2712 self.cmp_op, self.srcs[0], self.srcs[1]
2713 )
2714 }
2715 }
2716 impl_display_for_op!(OpFSet);
2717
2718 #[repr(C)]
2719 #[derive(SrcsAsSlice, DstsAsSlice)]
2720 pub struct OpFSetP {
2721 #[dst_type(Pred)]
2722 pub dst: Dst,
2723
2724 pub set_op: PredSetOp,
2725 pub cmp_op: FloatCmpOp,
2726
2727 #[src_type(F32)]
2728 pub srcs: [Src; 2],
2729
2730 #[src_type(Pred)]
2731 pub accum: Src,
2732
2733 pub ftz: bool,
2734 }
2735
2736 impl DisplayOp for OpFSetP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2737 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2738 let ftz = if self.ftz { ".ftz" } else { "" };
2739 write!(f, "fsetp{}{ftz}", self.cmp_op)?;
2740 if !self.set_op.is_trivial(&self.accum) {
2741 write!(f, "{}", self.set_op)?;
2742 }
2743 write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
2744 if !self.set_op.is_trivial(&self.accum) {
2745 write!(f, " {}", self.accum)?;
2746 }
2747 Ok(())
2748 }
2749 }
2750 impl_display_for_op!(OpFSetP);
2751
2752 #[allow(dead_code)]
2753 #[derive(Clone, Copy, Eq, PartialEq)]
2754 pub enum FSwzAddOp {
2755 Add,
2756 SubRight,
2757 SubLeft,
2758 MoveLeft,
2759 }
2760
2761 impl fmt::Display for FSwzAddOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2762 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2763 match self {
2764 FSwzAddOp::Add => write!(f, "add"),
2765 FSwzAddOp::SubRight => write!(f, "subr"),
2766 FSwzAddOp::SubLeft => write!(f, "sub"),
2767 FSwzAddOp::MoveLeft => write!(f, "mov2"),
2768 }
2769 }
2770 }
2771
2772 #[repr(C)]
2773 #[derive(SrcsAsSlice, DstsAsSlice)]
2774 pub struct OpFSwzAdd {
2775 #[dst_type(F32)]
2776 pub dst: Dst,
2777
2778 #[src_type(GPR)]
2779 pub srcs: [Src; 2],
2780
2781 pub rnd_mode: FRndMode,
2782 pub ftz: bool,
2783
2784 pub ops: [FSwzAddOp; 4],
2785 }
2786
2787 impl DisplayOp for OpFSwzAdd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2788 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2789 write!(f, "fswzadd",)?;
2790 if self.rnd_mode != FRndMode::NearestEven {
2791 write!(f, "{}", self.rnd_mode)?;
2792 }
2793 if self.ftz {
2794 write!(f, ".ftz")?;
2795 }
2796 write!(
2797 f,
2798 " {} {} [{}, {}, {}, {}]",
2799 self.srcs[0],
2800 self.srcs[1],
2801 self.ops[0],
2802 self.ops[1],
2803 self.ops[2],
2804 self.ops[3],
2805 )
2806 }
2807 }
2808 impl_display_for_op!(OpFSwzAdd);
2809
2810 pub enum RroOp {
2811 SinCos,
2812 Exp2,
2813 }
2814
2815 impl fmt::Display for RroOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2816 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2817 match self {
2818 RroOp::SinCos => write!(f, ".sincos"),
2819 RroOp::Exp2 => write!(f, ".exp2"),
2820 }
2821 }
2822 }
2823
2824 /// MuFu range reduction operator
2825 ///
2826 /// Not available on SM70+
2827 #[repr(C)]
2828 #[derive(SrcsAsSlice, DstsAsSlice)]
2829 pub struct OpRro {
2830 #[dst_type(F32)]
2831 pub dst: Dst,
2832
2833 pub op: RroOp,
2834
2835 #[src_type(F32)]
2836 pub src: Src,
2837 }
2838
2839 impl DisplayOp for OpRro {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2840 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2841 write!(f, "rro{} {}", self.op, self.src)
2842 }
2843 }
2844 impl_display_for_op!(OpRro);
2845
2846 #[allow(dead_code)]
2847 #[derive(Clone, Copy, Eq, PartialEq)]
2848 pub enum MuFuOp {
2849 Cos,
2850 Sin,
2851 Exp2,
2852 Log2,
2853 Rcp,
2854 Rsq,
2855 Rcp64H,
2856 Rsq64H,
2857 Sqrt,
2858 Tanh,
2859 }
2860
2861 impl fmt::Display for MuFuOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2862 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2863 match self {
2864 MuFuOp::Cos => write!(f, "cos"),
2865 MuFuOp::Sin => write!(f, "sin"),
2866 MuFuOp::Exp2 => write!(f, "exp2"),
2867 MuFuOp::Log2 => write!(f, "log2"),
2868 MuFuOp::Rcp => write!(f, "rcp"),
2869 MuFuOp::Rsq => write!(f, "rsq"),
2870 MuFuOp::Rcp64H => write!(f, "rcp64h"),
2871 MuFuOp::Rsq64H => write!(f, "rsq64h"),
2872 MuFuOp::Sqrt => write!(f, "sqrt"),
2873 MuFuOp::Tanh => write!(f, "tanh"),
2874 }
2875 }
2876 }
2877
2878 #[repr(C)]
2879 #[derive(SrcsAsSlice, DstsAsSlice)]
2880 pub struct OpMuFu {
2881 #[dst_type(F32)]
2882 pub dst: Dst,
2883
2884 pub op: MuFuOp,
2885
2886 #[src_type(F32)]
2887 pub src: Src,
2888 }
2889
2890 impl DisplayOp for OpMuFu {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2891 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2892 write!(f, "mufu.{} {}", self.op, self.src)
2893 }
2894 }
2895 impl_display_for_op!(OpMuFu);
2896
2897 #[repr(C)]
2898 #[derive(SrcsAsSlice, DstsAsSlice)]
2899 pub struct OpDAdd {
2900 #[dst_type(F64)]
2901 pub dst: Dst,
2902
2903 #[src_type(F64)]
2904 pub srcs: [Src; 2],
2905
2906 pub rnd_mode: FRndMode,
2907 }
2908
2909 impl DisplayOp for OpDAdd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2910 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2911 write!(f, "dadd")?;
2912 if self.rnd_mode != FRndMode::NearestEven {
2913 write!(f, "{}", self.rnd_mode)?;
2914 }
2915 write!(f, " {} {}", self.srcs[0], self.srcs[1],)
2916 }
2917 }
2918 impl_display_for_op!(OpDAdd);
2919
2920 #[repr(C)]
2921 #[derive(SrcsAsSlice, DstsAsSlice)]
2922 pub struct OpDMul {
2923 #[dst_type(F64)]
2924 pub dst: Dst,
2925
2926 #[src_type(F64)]
2927 pub srcs: [Src; 2],
2928
2929 pub rnd_mode: FRndMode,
2930 }
2931
2932 impl DisplayOp for OpDMul {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2933 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2934 write!(f, "dmul")?;
2935 if self.rnd_mode != FRndMode::NearestEven {
2936 write!(f, "{}", self.rnd_mode)?;
2937 }
2938 write!(f, " {} {}", self.srcs[0], self.srcs[1],)
2939 }
2940 }
2941 impl_display_for_op!(OpDMul);
2942
2943 #[repr(C)]
2944 #[derive(SrcsAsSlice, DstsAsSlice)]
2945 pub struct OpDFma {
2946 #[dst_type(F64)]
2947 pub dst: Dst,
2948
2949 #[src_type(F64)]
2950 pub srcs: [Src; 3],
2951
2952 pub rnd_mode: FRndMode,
2953 }
2954
2955 impl DisplayOp for OpDFma {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2956 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2957 write!(f, "dfma")?;
2958 if self.rnd_mode != FRndMode::NearestEven {
2959 write!(f, "{}", self.rnd_mode)?;
2960 }
2961 write!(f, " {} {} {}", self.srcs[0], self.srcs[1], self.srcs[2])
2962 }
2963 }
2964 impl_display_for_op!(OpDFma);
2965
2966 #[repr(C)]
2967 #[derive(SrcsAsSlice, DstsAsSlice)]
2968 pub struct OpDMnMx {
2969 #[dst_type(F64)]
2970 pub dst: Dst,
2971
2972 #[src_type(F64)]
2973 pub srcs: [Src; 2],
2974
2975 #[src_type(Pred)]
2976 pub min: Src,
2977 }
2978
2979 impl DisplayOp for OpDMnMx {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result2980 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2981 write!(f, "dmnmx {} {} {}", self.srcs[0], self.srcs[1], self.min)
2982 }
2983 }
2984 impl_display_for_op!(OpDMnMx);
2985
2986 #[repr(C)]
2987 #[derive(SrcsAsSlice, DstsAsSlice)]
2988 pub struct OpDSetP {
2989 #[dst_type(Pred)]
2990 pub dst: Dst,
2991
2992 pub set_op: PredSetOp,
2993 pub cmp_op: FloatCmpOp,
2994
2995 #[src_type(F64)]
2996 pub srcs: [Src; 2],
2997
2998 #[src_type(Pred)]
2999 pub accum: Src,
3000 }
3001
3002 impl DisplayOp for OpDSetP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3003 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3004 write!(f, "dsetp{}", self.cmp_op)?;
3005 if !self.set_op.is_trivial(&self.accum) {
3006 write!(f, "{}", self.set_op)?;
3007 }
3008 write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
3009 if !self.set_op.is_trivial(&self.accum) {
3010 write!(f, " {}", self.accum)?;
3011 }
3012 Ok(())
3013 }
3014 }
3015 impl_display_for_op!(OpDSetP);
3016
3017 #[repr(C)]
3018 #[derive(SrcsAsSlice, DstsAsSlice)]
3019 pub struct OpHAdd2 {
3020 #[dst_type(F16v2)]
3021 pub dst: Dst,
3022
3023 #[src_type(F16v2)]
3024 pub srcs: [Src; 2],
3025
3026 pub saturate: bool,
3027 pub ftz: bool,
3028 pub f32: bool,
3029 }
3030
3031 impl DisplayOp for OpHAdd2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3032 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3033 let sat = if self.saturate { ".sat" } else { "" };
3034 let f32 = if self.f32 { ".f32" } else { "" };
3035 write!(f, "hadd2{sat}{f32}")?;
3036 if self.ftz {
3037 write!(f, ".ftz")?;
3038 }
3039 write!(f, " {} {}", self.srcs[0], self.srcs[1])
3040 }
3041 }
3042 impl_display_for_op!(OpHAdd2);
3043
3044 #[repr(C)]
3045 #[derive(SrcsAsSlice, DstsAsSlice)]
3046 pub struct OpHSet2 {
3047 #[dst_type(F16v2)]
3048 pub dst: Dst,
3049
3050 pub set_op: PredSetOp,
3051 pub cmp_op: FloatCmpOp,
3052
3053 #[src_type(F16v2)]
3054 pub srcs: [Src; 2],
3055
3056 #[src_type(Pred)]
3057 pub accum: Src,
3058
3059 pub ftz: bool,
3060 }
3061
3062 impl DisplayOp for OpHSet2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3063 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3064 let ftz = if self.ftz { ".ftz" } else { "" };
3065 write!(f, "hset2{}{ftz}", self.cmp_op)?;
3066 if !self.set_op.is_trivial(&self.accum) {
3067 write!(f, "{}", self.set_op)?;
3068 }
3069 write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
3070 if !self.set_op.is_trivial(&self.accum) {
3071 write!(f, " {}", self.accum)?;
3072 }
3073 Ok(())
3074 }
3075 }
3076 impl_display_for_op!(OpHSet2);
3077
3078 #[repr(C)]
3079 #[derive(SrcsAsSlice, DstsAsSlice)]
3080 pub struct OpHSetP2 {
3081 #[dst_type(Pred)]
3082 pub dsts: [Dst; 2],
3083
3084 pub set_op: PredSetOp,
3085 pub cmp_op: FloatCmpOp,
3086
3087 #[src_type(F16v2)]
3088 pub srcs: [Src; 2],
3089
3090 #[src_type(Pred)]
3091 pub accum: Src,
3092
3093 pub ftz: bool,
3094
3095 // When not set, each dsts get the result of each lanes.
3096 // When set, the first dst gets the result of both lanes (res0 && res1)
3097 // and the second dst gets the negation !(res0 && res1)
3098 // before applying the accumulator.
3099 pub horizontal: bool,
3100 }
3101
3102 impl DisplayOp for OpHSetP2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3103 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3104 let ftz = if self.ftz { ".ftz" } else { "" };
3105 write!(f, "hsetp2{}{ftz}", self.cmp_op)?;
3106 if !self.set_op.is_trivial(&self.accum) {
3107 write!(f, "{}", self.set_op)?;
3108 }
3109 write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
3110 if !self.set_op.is_trivial(&self.accum) {
3111 write!(f, " {}", self.accum)?;
3112 }
3113 Ok(())
3114 }
3115 }
3116 impl_display_for_op!(OpHSetP2);
3117
3118 #[repr(C)]
3119 #[derive(SrcsAsSlice, DstsAsSlice)]
3120 pub struct OpHMul2 {
3121 #[dst_type(F16v2)]
3122 pub dst: Dst,
3123
3124 #[src_type(F16v2)]
3125 pub srcs: [Src; 2],
3126
3127 pub saturate: bool,
3128 pub ftz: bool,
3129 pub dnz: bool,
3130 }
3131
3132 impl DisplayOp for OpHMul2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3133 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3134 let sat = if self.saturate { ".sat" } else { "" };
3135 write!(f, "hmul2{sat}")?;
3136 if self.dnz {
3137 write!(f, ".dnz")?;
3138 } else if self.ftz {
3139 write!(f, ".ftz")?;
3140 }
3141 write!(f, " {} {}", self.srcs[0], self.srcs[1])
3142 }
3143 }
3144 impl_display_for_op!(OpHMul2);
3145
3146 #[repr(C)]
3147 #[derive(SrcsAsSlice, DstsAsSlice)]
3148 pub struct OpHFma2 {
3149 #[dst_type(F16v2)]
3150 pub dst: Dst,
3151
3152 #[src_type(F16v2)]
3153 pub srcs: [Src; 3],
3154
3155 pub saturate: bool,
3156 pub ftz: bool,
3157 pub dnz: bool,
3158 pub f32: bool,
3159 }
3160
3161 impl DisplayOp for OpHFma2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3162 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3163 let sat = if self.saturate { ".sat" } else { "" };
3164 let f32 = if self.f32 { ".f32" } else { "" };
3165 write!(f, "hfma2{sat}{f32}")?;
3166 if self.dnz {
3167 write!(f, ".dnz")?;
3168 } else if self.ftz {
3169 write!(f, ".ftz")?;
3170 }
3171 write!(f, " {} {} {}", self.srcs[0], self.srcs[1], self.srcs[2])
3172 }
3173 }
3174 impl_display_for_op!(OpHFma2);
3175
3176 #[repr(C)]
3177 #[derive(SrcsAsSlice, DstsAsSlice)]
3178 pub struct OpHMnMx2 {
3179 #[dst_type(F16v2)]
3180 pub dst: Dst,
3181
3182 #[src_type(F16v2)]
3183 pub srcs: [Src; 2],
3184
3185 #[src_type(Pred)]
3186 pub min: Src,
3187
3188 pub ftz: bool,
3189 }
3190
3191 impl DisplayOp for OpHMnMx2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3192 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3193 let ftz = if self.ftz { ".ftz" } else { "" };
3194 write!(
3195 f,
3196 "hmnmx2{ftz} {} {} {}",
3197 self.srcs[0], self.srcs[1], self.min
3198 )
3199 }
3200 }
3201 impl_display_for_op!(OpHMnMx2);
3202
3203 #[repr(C)]
3204 #[derive(SrcsAsSlice, DstsAsSlice)]
3205 pub struct OpBMsk {
3206 #[dst_type(GPR)]
3207 pub dst: Dst,
3208
3209 #[src_type(ALU)]
3210 pub pos: Src,
3211
3212 #[src_type(ALU)]
3213 pub width: Src,
3214
3215 pub wrap: bool,
3216 }
3217
3218 impl DisplayOp for OpBMsk {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3219 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3220 let wrap = if self.wrap { ".wrap" } else { ".clamp" };
3221 write!(f, "bmsk{} {} {}", wrap, self.pos, self.width)
3222 }
3223 }
3224 impl_display_for_op!(OpBMsk);
3225
3226 #[repr(C)]
3227 #[derive(SrcsAsSlice, DstsAsSlice)]
3228 pub struct OpBRev {
3229 #[dst_type(GPR)]
3230 pub dst: Dst,
3231
3232 #[src_type(ALU)]
3233 pub src: Src,
3234 }
3235
3236 impl DisplayOp for OpBRev {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3237 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3238 write!(f, "brev {}", self.src)
3239 }
3240 }
3241 impl_display_for_op!(OpBRev);
3242
3243 /// Bitfield extract. Extracts all bits from `base` starting at `offset` into
3244 /// `dst`.
3245 #[repr(C)]
3246 #[derive(SrcsAsSlice, DstsAsSlice)]
3247 pub struct OpBfe {
3248 /// Where to insert the bits.
3249 #[dst_type(GPR)]
3250 pub dst: Dst,
3251
3252 /// The source of bits to extract.
3253 #[src_type(ALU)]
3254 pub base: Src,
3255
3256 /// The range of bits to extract. This source is interpreted as four
3257 /// separate bytes, [b0, b1, b2, b3].
3258 ///
3259 /// b0 and b1: unused
3260 /// b2: the number of bits to extract.
3261 /// b3: the offset of the first bit to extract.
3262 ///
3263 /// This matches the way the hardware works.
3264 #[src_type(ALU)]
3265 pub range: Src,
3266
3267 /// Whether the output is signed
3268 pub signed: bool,
3269
3270 /// Whether to reverse the bits before inserting them into `dst`.
3271 pub reverse: bool,
3272 }
3273
3274 impl DisplayOp for OpBfe {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3275 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3276 write!(f, "bfe")?;
3277 if self.signed {
3278 write!(f, ".s")?;
3279 }
3280 if self.reverse {
3281 write!(f, ".rev")?;
3282 }
3283 write!(f, " {} {}", self.base, self.range,)
3284 }
3285 }
3286 impl_display_for_op!(OpBfe);
3287
3288 #[repr(C)]
3289 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3290 pub struct OpFlo {
3291 #[dst_type(GPR)]
3292 pub dst: Dst,
3293
3294 #[src_type(ALU)]
3295 pub src: Src,
3296
3297 pub signed: bool,
3298 pub return_shift_amount: bool,
3299 }
3300
3301 impl Foldable for OpFlo {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3302 fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3303 let src = f.get_u32_src(self, &self.src);
3304 let leading = if self.signed && (src & 0x80000000) != 0 {
3305 (!src).leading_zeros()
3306 } else {
3307 src.leading_zeros()
3308 };
3309 let dst = if self.return_shift_amount {
3310 leading
3311 } else {
3312 31 - leading
3313 };
3314 f.set_u32_dst(self, &self.dst, dst);
3315 }
3316 }
3317
3318 impl DisplayOp for OpFlo {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3319 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3320 write!(f, "flo")?;
3321 if self.return_shift_amount {
3322 write!(f, ".samt")?;
3323 }
3324 write!(f, " {}", self.src)
3325 }
3326 }
3327 impl_display_for_op!(OpFlo);
3328
3329 #[repr(C)]
3330 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3331 pub struct OpIAbs {
3332 #[dst_type(GPR)]
3333 pub dst: Dst,
3334
3335 #[src_type(ALU)]
3336 pub src: Src,
3337 }
3338
3339 impl Foldable for OpIAbs {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3340 fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3341 let src = f.get_u32_src(self, &self.src);
3342 let dst = (src as i32).abs() as u32;
3343 f.set_u32_dst(self, &self.dst, dst);
3344 }
3345 }
3346
3347 impl DisplayOp for OpIAbs {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3348 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3349 write!(f, "iabs {}", self.src)
3350 }
3351 }
3352 impl_display_for_op!(OpIAbs);
3353
3354 /// Only used on SM50
3355 #[repr(C)]
3356 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3357 pub struct OpIAdd2 {
3358 #[dst_type(GPR)]
3359 pub dst: Dst,
3360 #[dst_type(Carry)]
3361 pub carry_out: Dst,
3362
3363 #[src_type(I32)]
3364 pub srcs: [Src; 2],
3365 }
3366
3367 impl Foldable for OpIAdd2 {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3368 fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3369 let srcs = [
3370 f.get_u32_src(self, &self.srcs[0]),
3371 f.get_u32_src(self, &self.srcs[1]),
3372 ];
3373
3374 let mut sum = 0_u64;
3375 for i in 0..2 {
3376 if self.srcs[i].src_mod.is_ineg() {
3377 // This is a very literal interpretation of 2's compliment.
3378 // This is not -u64::from(src) or u64::from(-src).
3379 sum += u64::from(!srcs[i]) + 1;
3380 } else {
3381 sum += u64::from(srcs[i]);
3382 }
3383 }
3384
3385 f.set_u32_dst(self, &self.dst, sum as u32);
3386 f.set_carry_dst(self, &self.carry_out, sum >= (1 << 32));
3387 }
3388 }
3389
3390 impl DisplayOp for OpIAdd2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3391 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3392 write!(f, "iadd2 {} {}", self.srcs[0], self.srcs[1])
3393 }
3394 }
3395
3396 /// Only used on SM50
3397 #[repr(C)]
3398 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3399 pub struct OpIAdd2X {
3400 #[dst_type(GPR)]
3401 pub dst: Dst,
3402 #[dst_type(Carry)]
3403 pub carry_out: Dst,
3404
3405 #[src_type(B32)]
3406 pub srcs: [Src; 2],
3407 #[src_type(Carry)]
3408 pub carry_in: Src,
3409 }
3410
3411 impl Foldable for OpIAdd2X {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3412 fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3413 let srcs = [
3414 f.get_u32_bnot_src(self, &self.srcs[0]),
3415 f.get_u32_bnot_src(self, &self.srcs[1]),
3416 ];
3417 let carry_in = f.get_carry_src(self, &self.carry_in);
3418
3419 let sum = u64::from(srcs[0]) + u64::from(srcs[1]) + u64::from(carry_in);
3420
3421 f.set_u32_dst(self, &self.dst, sum as u32);
3422 f.set_carry_dst(self, &self.carry_out, sum >= (1 << 32));
3423 }
3424 }
3425
3426 impl DisplayOp for OpIAdd2X {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3427 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3428 write!(f, "iadd2.x {} {}", self.srcs[0], self.srcs[1])?;
3429 if !self.carry_in.is_zero() {
3430 write!(f, " {}", self.carry_in)?;
3431 }
3432 Ok(())
3433 }
3434 }
3435
3436 #[repr(C)]
3437 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3438 pub struct OpIAdd3 {
3439 #[dst_type(GPR)]
3440 pub dst: Dst,
3441
3442 #[dst_type(Pred)]
3443 pub overflow: [Dst; 2],
3444
3445 #[src_type(I32)]
3446 pub srcs: [Src; 3],
3447 }
3448
3449 impl Foldable for OpIAdd3 {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3450 fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3451 let srcs = [
3452 f.get_u32_src(self, &self.srcs[0]),
3453 f.get_u32_src(self, &self.srcs[1]),
3454 f.get_u32_src(self, &self.srcs[2]),
3455 ];
3456
3457 let mut sum = 0_u64;
3458 for i in 0..3 {
3459 if self.srcs[i].src_mod.is_ineg() {
3460 // This is a very literal interpretation of 2's compliment.
3461 // This is not -u64::from(src) or u64::from(-src).
3462 sum += u64::from(!srcs[i]) + 1;
3463 } else {
3464 sum += u64::from(srcs[i]);
3465 }
3466 }
3467
3468 f.set_u32_dst(self, &self.dst, sum as u32);
3469 f.set_pred_dst(self, &self.overflow[0], sum >= 1_u64 << 32);
3470 f.set_pred_dst(self, &self.overflow[1], sum >= 2_u64 << 32);
3471 }
3472 }
3473
3474 impl DisplayOp for OpIAdd3 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3475 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3476 write!(
3477 f,
3478 "iadd3 {} {} {}",
3479 self.srcs[0], self.srcs[1], self.srcs[2],
3480 )
3481 }
3482 }
3483 impl_display_for_op!(OpIAdd3);
3484
3485 #[repr(C)]
3486 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3487 pub struct OpIAdd3X {
3488 #[dst_type(GPR)]
3489 pub dst: Dst,
3490
3491 #[dst_type(Pred)]
3492 pub overflow: [Dst; 2],
3493
3494 #[src_type(B32)]
3495 pub srcs: [Src; 3],
3496
3497 #[src_type(Pred)]
3498 pub carry: [Src; 2],
3499 }
3500
3501 impl Foldable for OpIAdd3X {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3502 fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3503 let srcs = [
3504 f.get_u32_bnot_src(self, &self.srcs[0]),
3505 f.get_u32_bnot_src(self, &self.srcs[1]),
3506 f.get_u32_bnot_src(self, &self.srcs[2]),
3507 ];
3508 let carry = [
3509 f.get_pred_src(self, &self.carry[0]),
3510 f.get_pred_src(self, &self.carry[1]),
3511 ];
3512
3513 let mut sum = 0_u64;
3514 for i in 0..3 {
3515 sum += u64::from(srcs[i]);
3516 }
3517
3518 for i in 0..2 {
3519 sum += u64::from(carry[i]);
3520 }
3521
3522 f.set_u32_dst(self, &self.dst, sum as u32);
3523 f.set_pred_dst(self, &self.overflow[0], sum >= 1_u64 << 32);
3524 f.set_pred_dst(self, &self.overflow[1], sum >= 2_u64 << 32);
3525 }
3526 }
3527
3528 impl DisplayOp for OpIAdd3X {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3529 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3530 write!(
3531 f,
3532 "iadd3.x {} {} {} {} {}",
3533 self.srcs[0],
3534 self.srcs[1],
3535 self.srcs[2],
3536 self.carry[0],
3537 self.carry[1]
3538 )
3539 }
3540 }
3541 impl_display_for_op!(OpIAdd3X);
3542
3543 #[repr(C)]
3544 #[derive(SrcsAsSlice, DstsAsSlice)]
3545 pub struct OpIDp4 {
3546 #[dst_type(GPR)]
3547 pub dst: Dst,
3548
3549 pub src_types: [IntType; 2],
3550
3551 #[src_type(I32)]
3552 pub srcs: [Src; 3],
3553 }
3554
3555 impl DisplayOp for OpIDp4 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3556 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3557 write!(
3558 f,
3559 "idp4{}{} {} {} {}",
3560 self.src_types[0],
3561 self.src_types[1],
3562 self.srcs[0],
3563 self.srcs[1],
3564 self.srcs[2],
3565 )
3566 }
3567 }
3568 impl_display_for_op!(OpIDp4);
3569
3570 #[repr(C)]
3571 #[derive(SrcsAsSlice, DstsAsSlice)]
3572 pub struct OpIMad {
3573 #[dst_type(GPR)]
3574 pub dst: Dst,
3575
3576 #[src_type(ALU)]
3577 pub srcs: [Src; 3],
3578
3579 pub signed: bool,
3580 }
3581
3582 impl DisplayOp for OpIMad {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3583 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3584 write!(f, "imad {} {} {}", self.srcs[0], self.srcs[1], self.srcs[2],)
3585 }
3586 }
3587 impl_display_for_op!(OpIMad);
3588
3589 /// Only used on SM50
3590 #[repr(C)]
3591 #[derive(SrcsAsSlice, DstsAsSlice)]
3592 pub struct OpIMul {
3593 #[dst_type(GPR)]
3594 pub dst: Dst,
3595
3596 #[src_type(ALU)]
3597 pub srcs: [Src; 2],
3598
3599 pub signed: [bool; 2],
3600 pub high: bool,
3601 }
3602
3603 impl DisplayOp for OpIMul {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3604 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3605 write!(f, "imul")?;
3606 if self.high {
3607 write!(f, ".hi")?;
3608 }
3609 let src_type = |signed| if signed { ".s32" } else { ".u32" };
3610 write!(
3611 f,
3612 "{}{}",
3613 src_type(self.signed[0]),
3614 src_type(self.signed[1])
3615 )?;
3616 write!(f, " {} {}", self.srcs[0], self.srcs[1])
3617 }
3618 }
3619
3620 #[repr(C)]
3621 #[derive(SrcsAsSlice, DstsAsSlice)]
3622 pub struct OpIMad64 {
3623 #[dst_type(Vec)]
3624 pub dst: Dst,
3625
3626 #[src_type(ALU)]
3627 pub srcs: [Src; 3],
3628
3629 pub signed: bool,
3630 }
3631
3632 impl DisplayOp for OpIMad64 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3633 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3634 write!(
3635 f,
3636 "imad64 {} {} {}",
3637 self.srcs[0], self.srcs[1], self.srcs[2],
3638 )
3639 }
3640 }
3641 impl_display_for_op!(OpIMad64);
3642
3643 #[repr(C)]
3644 #[derive(SrcsAsSlice, DstsAsSlice)]
3645 pub struct OpIMnMx {
3646 #[dst_type(GPR)]
3647 pub dst: Dst,
3648
3649 pub cmp_type: IntCmpType,
3650
3651 #[src_type(ALU)]
3652 pub srcs: [Src; 2],
3653
3654 #[src_type(Pred)]
3655 pub min: Src,
3656 }
3657
3658 impl DisplayOp for OpIMnMx {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3659 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3660 write!(
3661 f,
3662 "imnmx{} {} {} {}",
3663 self.cmp_type, self.srcs[0], self.srcs[1], self.min
3664 )
3665 }
3666 }
3667 impl_display_for_op!(OpIMnMx);
3668
3669 #[repr(C)]
3670 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3671 pub struct OpISetP {
3672 #[dst_type(Pred)]
3673 pub dst: Dst,
3674
3675 pub set_op: PredSetOp,
3676 pub cmp_op: IntCmpOp,
3677 pub cmp_type: IntCmpType,
3678 pub ex: bool,
3679
3680 #[src_type(ALU)]
3681 pub srcs: [Src; 2],
3682
3683 #[src_type(Pred)]
3684 pub accum: Src,
3685
3686 #[src_type(Pred)]
3687 pub low_cmp: Src,
3688 }
3689
3690 impl Foldable for OpISetP {
fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3691 fn fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3692 let x = f.get_u32_src(self, &self.srcs[0]);
3693 let y = f.get_u32_src(self, &self.srcs[1]);
3694 let accum = f.get_pred_src(self, &self.accum);
3695 let low_cmp = f.get_pred_src(self, &self.low_cmp);
3696
3697 let cmp = if self.cmp_type.is_signed() {
3698 let x = x as i32;
3699 let y = y as i32;
3700 match &self.cmp_op {
3701 IntCmpOp::Eq => x == y,
3702 IntCmpOp::Ne => x != y,
3703 IntCmpOp::Lt => x < y,
3704 IntCmpOp::Le => x <= y,
3705 IntCmpOp::Gt => x > y,
3706 IntCmpOp::Ge => x >= y,
3707 }
3708 } else {
3709 match &self.cmp_op {
3710 IntCmpOp::Eq => x == y,
3711 IntCmpOp::Ne => x != y,
3712 IntCmpOp::Lt => x < y,
3713 IntCmpOp::Le => x <= y,
3714 IntCmpOp::Gt => x > y,
3715 IntCmpOp::Ge => x >= y,
3716 }
3717 };
3718
3719 let cmp = if self.ex && x == y {
3720 // Pre-Volta, isetp.x takes the accumulator into account. If we
3721 // want to support this, we need to take an an accumulator into
3722 // account. Disallow it for now.
3723 assert!(sm.sm() >= 70);
3724 low_cmp
3725 } else {
3726 cmp
3727 };
3728
3729 let dst = self.set_op.eval(cmp, accum);
3730
3731 f.set_pred_dst(self, &self.dst, dst);
3732 }
3733 }
3734
3735 impl DisplayOp for OpISetP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3736 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3737 write!(f, "isetp{}{}", self.cmp_op, self.cmp_type)?;
3738 if !self.set_op.is_trivial(&self.accum) {
3739 write!(f, "{}", self.set_op)?;
3740 }
3741 if self.ex {
3742 write!(f, ".ex")?;
3743 }
3744 write!(f, " {} {}", self.srcs[0], self.srcs[1])?;
3745 if !self.set_op.is_trivial(&self.accum) {
3746 write!(f, " {}", self.accum)?;
3747 }
3748 if self.ex {
3749 write!(f, " {}", self.low_cmp)?;
3750 }
3751 Ok(())
3752 }
3753 }
3754 impl_display_for_op!(OpISetP);
3755
3756 #[repr(C)]
3757 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3758 pub struct OpLop2 {
3759 #[dst_type(GPR)]
3760 pub dst: Dst,
3761
3762 #[src_type(B32)]
3763 pub srcs: [Src; 2],
3764
3765 pub op: LogicOp2,
3766 }
3767
3768 impl DisplayOp for OpLop2 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3769 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3770 write!(f, "lop2.{} {} {}", self.op, self.srcs[0], self.srcs[1],)
3771 }
3772 }
3773
3774 impl Foldable for OpLop2 {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3775 fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3776 let srcs = [
3777 f.get_u32_bnot_src(self, &self.srcs[0]),
3778 f.get_u32_bnot_src(self, &self.srcs[1]),
3779 ];
3780 let dst = match self.op {
3781 LogicOp2::And => srcs[0] & srcs[1],
3782 LogicOp2::Or => srcs[0] | srcs[1],
3783 LogicOp2::Xor => srcs[0] ^ srcs[1],
3784 LogicOp2::PassB => srcs[1],
3785 };
3786 f.set_u32_dst(self, &self.dst, dst);
3787 }
3788 }
3789
3790 #[repr(C)]
3791 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3792 pub struct OpLop3 {
3793 #[dst_type(GPR)]
3794 pub dst: Dst,
3795
3796 #[src_type(ALU)]
3797 pub srcs: [Src; 3],
3798
3799 pub op: LogicOp3,
3800 }
3801
3802 impl Foldable for OpLop3 {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3803 fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3804 let srcs = [
3805 f.get_u32_bnot_src(self, &self.srcs[0]),
3806 f.get_u32_bnot_src(self, &self.srcs[1]),
3807 f.get_u32_bnot_src(self, &self.srcs[2]),
3808 ];
3809 let dst = self.op.eval(srcs[0], srcs[1], srcs[2]);
3810 f.set_u32_dst(self, &self.dst, dst);
3811 }
3812 }
3813
3814 impl DisplayOp for OpLop3 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3815 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3816 write!(
3817 f,
3818 "lop3.{} {} {} {}",
3819 self.op, self.srcs[0], self.srcs[1], self.srcs[2],
3820 )
3821 }
3822 }
3823 impl_display_for_op!(OpLop3);
3824
3825 #[derive(Clone, Copy, Eq, PartialEq)]
3826 pub enum ShflOp {
3827 Idx,
3828 Up,
3829 Down,
3830 Bfly,
3831 }
3832
3833 impl fmt::Display for ShflOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3834 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3835 match self {
3836 ShflOp::Idx => write!(f, "idx"),
3837 ShflOp::Up => write!(f, "up"),
3838 ShflOp::Down => write!(f, "down"),
3839 ShflOp::Bfly => write!(f, "bfly"),
3840 }
3841 }
3842 }
3843
3844 #[repr(C)]
3845 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
3846 pub struct OpShf {
3847 #[dst_type(GPR)]
3848 pub dst: Dst,
3849
3850 #[src_type(GPR)]
3851 pub low: Src,
3852
3853 #[src_type(ALU)]
3854 pub high: Src,
3855
3856 #[src_type(ALU)]
3857 pub shift: Src,
3858
3859 pub right: bool,
3860 pub wrap: bool,
3861 pub data_type: IntType,
3862 pub dst_high: bool,
3863 }
3864
3865 impl Foldable for OpShf {
fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)3866 fn fold(&self, sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
3867 let low = f.get_u32_src(self, &self.low);
3868 let high = f.get_u32_src(self, &self.high);
3869 let shift = f.get_u32_src(self, &self.shift);
3870
3871 let bits: u32 = self.data_type.bits().try_into().unwrap();
3872 let shift = if self.wrap {
3873 shift & (bits - 1)
3874 } else {
3875 min(shift, bits)
3876 };
3877
3878 let x = u64::from(low) | (u64::from(high) << 32);
3879 let shifted = if sm.sm() < 70
3880 && self.dst_high
3881 && self.data_type != IntType::I64
3882 {
3883 if self.right {
3884 x.checked_shr(shift).unwrap_or(0) as u64
3885 } else {
3886 x.checked_shl(shift).unwrap_or(0) as u64
3887 }
3888 } else if self.data_type.is_signed() {
3889 if self.right {
3890 (x as i64).checked_shr(shift).unwrap_or(0) as u64
3891 } else {
3892 (x as i64).checked_shl(shift).unwrap_or(0) as u64
3893 }
3894 } else {
3895 if self.right {
3896 x.checked_shr(shift).unwrap_or(0) as u64
3897 } else {
3898 x.checked_shl(shift).unwrap_or(0) as u64
3899 }
3900 };
3901
3902 let dst = if (sm.sm() < 70 && !self.right) || self.dst_high {
3903 (shifted >> 32) as u32
3904 } else {
3905 shifted as u32
3906 };
3907
3908 f.set_u32_dst(self, &self.dst, dst);
3909 }
3910 }
3911
3912 impl DisplayOp for OpShf {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3913 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3914 write!(f, "shf")?;
3915 if self.right {
3916 write!(f, ".r")?;
3917 } else {
3918 write!(f, ".l")?;
3919 }
3920 if self.wrap {
3921 write!(f, ".w")?;
3922 }
3923 write!(f, "{}", self.data_type)?;
3924 if self.dst_high {
3925 write!(f, ".hi")?;
3926 }
3927 write!(f, " {} {} {}", self.low, self.high, self.shift)
3928 }
3929 }
3930 impl_display_for_op!(OpShf);
3931
3932 /// Only used on SM50
3933 #[repr(C)]
3934 #[derive(SrcsAsSlice, DstsAsSlice)]
3935 pub struct OpShl {
3936 #[dst_type(GPR)]
3937 pub dst: Dst,
3938
3939 #[src_type(GPR)]
3940 pub src: Src,
3941
3942 #[src_type(ALU)]
3943 pub shift: Src,
3944
3945 pub wrap: bool,
3946 }
3947
3948 impl DisplayOp for OpShl {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3949 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3950 write!(f, "shl")?;
3951 if self.wrap {
3952 write!(f, ".w")?;
3953 }
3954 write!(f, " {} {}", self.src, self.shift)
3955 }
3956 }
3957
3958 /// Only used on SM50
3959 #[repr(C)]
3960 #[derive(SrcsAsSlice, DstsAsSlice)]
3961 pub struct OpShr {
3962 #[dst_type(GPR)]
3963 pub dst: Dst,
3964
3965 #[src_type(GPR)]
3966 pub src: Src,
3967
3968 #[src_type(ALU)]
3969 pub shift: Src,
3970
3971 pub wrap: bool,
3972 pub signed: bool,
3973 }
3974
3975 impl DisplayOp for OpShr {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result3976 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3977 write!(f, "shr")?;
3978 if self.wrap {
3979 write!(f, ".w")?;
3980 }
3981 if !self.signed {
3982 write!(f, ".u32")?;
3983 }
3984 write!(f, " {} {}", self.src, self.shift)
3985 }
3986 }
3987
3988 #[repr(C)]
3989 pub struct OpF2F {
3990 pub dst: Dst,
3991 pub src: Src,
3992
3993 pub src_type: FloatType,
3994 pub dst_type: FloatType,
3995 pub rnd_mode: FRndMode,
3996 pub ftz: bool,
3997 /// For 16-bit up-conversions, take the high 16 bits of the source register.
3998 /// For 16-bit down-conversions, place the result into the upper 16 bits of
3999 /// the destination register
4000 pub high: bool,
4001 /// Round to the nearest integer rather than nearest float
4002 ///
4003 /// Not available on SM70+
4004 pub integer_rnd: bool,
4005 }
4006
4007 impl AsSlice<Src> for OpF2F {
4008 type Attr = SrcType;
4009
as_slice(&self) -> &[Src]4010 fn as_slice(&self) -> &[Src] {
4011 std::slice::from_ref(&self.src)
4012 }
4013
as_mut_slice(&mut self) -> &mut [Src]4014 fn as_mut_slice(&mut self) -> &mut [Src] {
4015 std::slice::from_mut(&mut self.src)
4016 }
4017
attrs(&self) -> SrcTypeList4018 fn attrs(&self) -> SrcTypeList {
4019 let src_type = match self.src_type {
4020 FloatType::F16 => SrcType::F16,
4021 FloatType::F32 => SrcType::F32,
4022 FloatType::F64 => SrcType::F64,
4023 };
4024 SrcTypeList::Uniform(src_type)
4025 }
4026 }
4027
4028 impl AsSlice<Dst> for OpF2F {
4029 type Attr = DstType;
4030
as_slice(&self) -> &[Dst]4031 fn as_slice(&self) -> &[Dst] {
4032 std::slice::from_ref(&self.dst)
4033 }
4034
as_mut_slice(&mut self) -> &mut [Dst]4035 fn as_mut_slice(&mut self) -> &mut [Dst] {
4036 std::slice::from_mut(&mut self.dst)
4037 }
4038
attrs(&self) -> DstTypeList4039 fn attrs(&self) -> DstTypeList {
4040 let dst_type = match self.dst_type {
4041 FloatType::F16 => DstType::F16,
4042 FloatType::F32 => DstType::F32,
4043 FloatType::F64 => DstType::F64,
4044 };
4045 DstTypeList::Uniform(dst_type)
4046 }
4047 }
4048
4049 impl DisplayOp for OpF2F {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4050 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4051 write!(f, "f2f")?;
4052 if self.ftz {
4053 write!(f, ".ftz")?;
4054 }
4055 if self.integer_rnd {
4056 write!(f, ".int")?;
4057 }
4058 write!(
4059 f,
4060 "{}{}{} {}",
4061 self.dst_type, self.src_type, self.rnd_mode, self.src,
4062 )
4063 }
4064 }
4065 impl_display_for_op!(OpF2F);
4066
4067 #[repr(C)]
4068 #[derive(DstsAsSlice, SrcsAsSlice)]
4069 pub struct OpF2FP {
4070 #[dst_type(GPR)]
4071 pub dst: Dst,
4072
4073 #[src_type(ALU)]
4074 pub srcs: [Src; 2],
4075
4076 pub rnd_mode: FRndMode,
4077 }
4078
4079 impl DisplayOp for OpF2FP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4080 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4081 write!(f, "f2fp.pack_ab")?;
4082 if self.rnd_mode != FRndMode::NearestEven {
4083 write!(f, "{}", self.rnd_mode)?;
4084 }
4085 write!(f, " {}, {}", self.srcs[0], self.srcs[1],)
4086 }
4087 }
4088 impl_display_for_op!(OpF2FP);
4089
4090 #[repr(C)]
4091 #[derive(DstsAsSlice)]
4092 pub struct OpF2I {
4093 #[dst_type(GPR)]
4094 pub dst: Dst,
4095
4096 pub src: Src,
4097
4098 pub src_type: FloatType,
4099 pub dst_type: IntType,
4100 pub rnd_mode: FRndMode,
4101 pub ftz: bool,
4102 }
4103
4104 impl AsSlice<Src> for OpF2I {
4105 type Attr = SrcType;
4106
as_slice(&self) -> &[Src]4107 fn as_slice(&self) -> &[Src] {
4108 std::slice::from_ref(&self.src)
4109 }
4110
as_mut_slice(&mut self) -> &mut [Src]4111 fn as_mut_slice(&mut self) -> &mut [Src] {
4112 std::slice::from_mut(&mut self.src)
4113 }
4114
attrs(&self) -> SrcTypeList4115 fn attrs(&self) -> SrcTypeList {
4116 let src_type = match self.src_type {
4117 FloatType::F16 => SrcType::F16,
4118 FloatType::F32 => SrcType::F32,
4119 FloatType::F64 => SrcType::F64,
4120 };
4121 SrcTypeList::Uniform(src_type)
4122 }
4123 }
4124
4125 impl DisplayOp for OpF2I {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4126 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4127 let ftz = if self.ftz { ".ftz" } else { "" };
4128 write!(
4129 f,
4130 "f2i{}{}{}{ftz} {}",
4131 self.dst_type, self.src_type, self.rnd_mode, self.src,
4132 )
4133 }
4134 }
4135 impl_display_for_op!(OpF2I);
4136
4137 #[repr(C)]
4138 pub struct OpI2F {
4139 pub dst: Dst,
4140 pub src: Src,
4141
4142 pub dst_type: FloatType,
4143 pub src_type: IntType,
4144 pub rnd_mode: FRndMode,
4145 }
4146
4147 impl AsSlice<Src> for OpI2F {
4148 type Attr = SrcType;
4149
as_slice(&self) -> &[Src]4150 fn as_slice(&self) -> &[Src] {
4151 std::slice::from_ref(&self.src)
4152 }
4153
as_mut_slice(&mut self) -> &mut [Src]4154 fn as_mut_slice(&mut self) -> &mut [Src] {
4155 std::slice::from_mut(&mut self.src)
4156 }
4157
attrs(&self) -> SrcTypeList4158 fn attrs(&self) -> SrcTypeList {
4159 if self.src_type.bits() <= 32 {
4160 SrcTypeList::Uniform(SrcType::ALU)
4161 } else {
4162 SrcTypeList::Uniform(SrcType::GPR)
4163 }
4164 }
4165 }
4166
4167 impl AsSlice<Dst> for OpI2F {
4168 type Attr = DstType;
4169
as_slice(&self) -> &[Dst]4170 fn as_slice(&self) -> &[Dst] {
4171 std::slice::from_ref(&self.dst)
4172 }
4173
as_mut_slice(&mut self) -> &mut [Dst]4174 fn as_mut_slice(&mut self) -> &mut [Dst] {
4175 std::slice::from_mut(&mut self.dst)
4176 }
4177
attrs(&self) -> DstTypeList4178 fn attrs(&self) -> DstTypeList {
4179 let dst_type = match self.dst_type {
4180 FloatType::F16 => DstType::F16,
4181 FloatType::F32 => DstType::F32,
4182 FloatType::F64 => DstType::F64,
4183 };
4184 DstTypeList::Uniform(dst_type)
4185 }
4186 }
4187
4188 impl DisplayOp for OpI2F {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4189 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4190 write!(
4191 f,
4192 "i2f{}{}{} {}",
4193 self.dst_type, self.src_type, self.rnd_mode, self.src,
4194 )
4195 }
4196 }
4197 impl_display_for_op!(OpI2F);
4198
4199 /// Not used on SM70+
4200 #[repr(C)]
4201 #[derive(SrcsAsSlice, DstsAsSlice)]
4202 pub struct OpI2I {
4203 #[dst_type(GPR)]
4204 pub dst: Dst,
4205
4206 #[src_type(ALU)]
4207 pub src: Src,
4208
4209 pub src_type: IntType,
4210 pub dst_type: IntType,
4211
4212 pub saturate: bool,
4213 pub abs: bool,
4214 pub neg: bool,
4215 }
4216
4217 impl DisplayOp for OpI2I {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4218 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4219 write!(f, "i2i")?;
4220 if self.saturate {
4221 write!(f, ".sat ")?;
4222 }
4223 write!(f, "{}{} {}", self.dst_type, self.src_type, self.src,)?;
4224 if self.abs {
4225 write!(f, ".abs")?;
4226 }
4227 if self.neg {
4228 write!(f, ".neg")?;
4229 }
4230 Ok(())
4231 }
4232 }
4233 impl_display_for_op!(OpI2I);
4234
4235 #[repr(C)]
4236 #[derive(DstsAsSlice)]
4237 pub struct OpFRnd {
4238 #[dst_type(F32)]
4239 pub dst: Dst,
4240
4241 pub src: Src,
4242
4243 pub dst_type: FloatType,
4244 pub src_type: FloatType,
4245 pub rnd_mode: FRndMode,
4246 pub ftz: bool,
4247 }
4248
4249 impl AsSlice<Src> for OpFRnd {
4250 type Attr = SrcType;
4251
as_slice(&self) -> &[Src]4252 fn as_slice(&self) -> &[Src] {
4253 std::slice::from_ref(&self.src)
4254 }
4255
as_mut_slice(&mut self) -> &mut [Src]4256 fn as_mut_slice(&mut self) -> &mut [Src] {
4257 std::slice::from_mut(&mut self.src)
4258 }
4259
attrs(&self) -> SrcTypeList4260 fn attrs(&self) -> SrcTypeList {
4261 let src_type = match self.src_type {
4262 FloatType::F16 => SrcType::F16,
4263 FloatType::F32 => SrcType::F32,
4264 FloatType::F64 => SrcType::F64,
4265 };
4266 SrcTypeList::Uniform(src_type)
4267 }
4268 }
4269
4270 impl DisplayOp for OpFRnd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4271 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4272 let ftz = if self.ftz { ".ftz" } else { "" };
4273 write!(
4274 f,
4275 "frnd{}{}{}{ftz} {}",
4276 self.dst_type, self.src_type, self.rnd_mode, self.src,
4277 )
4278 }
4279 }
4280 impl_display_for_op!(OpFRnd);
4281
4282 #[repr(C)]
4283 #[derive(SrcsAsSlice, DstsAsSlice)]
4284 pub struct OpMov {
4285 #[dst_type(GPR)]
4286 pub dst: Dst,
4287
4288 #[src_type(ALU)]
4289 pub src: Src,
4290
4291 pub quad_lanes: u8,
4292 }
4293
4294 impl DisplayOp for OpMov {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4295 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4296 if self.quad_lanes == 0xf {
4297 write!(f, "mov {}", self.src)
4298 } else {
4299 write!(f, "mov[{:#x}] {}", self.quad_lanes, self.src)
4300 }
4301 }
4302 }
4303 impl_display_for_op!(OpMov);
4304
4305 #[derive(Copy, Clone)]
4306 pub struct PrmtSelByte(u8);
4307
4308 impl PrmtSelByte {
4309 pub const INVALID: PrmtSelByte = PrmtSelByte(u8::MAX);
4310
new(src_idx: usize, byte_idx: usize, msb: bool) -> PrmtSelByte4311 pub fn new(src_idx: usize, byte_idx: usize, msb: bool) -> PrmtSelByte {
4312 assert!(src_idx < 2);
4313 assert!(byte_idx < 4);
4314
4315 let mut nib = 0;
4316 nib |= (src_idx as u8) << 2;
4317 nib |= byte_idx as u8;
4318 if msb {
4319 nib |= 0x8;
4320 }
4321 PrmtSelByte(nib)
4322 }
4323
src(&self) -> usize4324 pub fn src(&self) -> usize {
4325 ((self.0 >> 2) & 0x1).into()
4326 }
4327
byte(&self) -> usize4328 pub fn byte(&self) -> usize {
4329 (self.0 & 0x3).into()
4330 }
4331
msb(&self) -> bool4332 pub fn msb(&self) -> bool {
4333 (self.0 & 0x8) != 0
4334 }
4335
fold_u32(&self, u: u32) -> u84336 pub fn fold_u32(&self, u: u32) -> u8 {
4337 let mut sb = (u >> (self.byte() * 8)) as u8;
4338 if self.msb() {
4339 sb = ((sb as i8) >> 7) as u8;
4340 }
4341 sb
4342 }
4343 }
4344
4345 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
4346 pub struct PrmtSel(pub u16);
4347
4348 impl PrmtSel {
new(bytes: [PrmtSelByte; 4]) -> PrmtSel4349 pub fn new(bytes: [PrmtSelByte; 4]) -> PrmtSel {
4350 let mut sel = 0;
4351 for i in 0..4 {
4352 assert!(bytes[i].0 <= 0xf);
4353 sel |= u16::from(bytes[i].0) << (i * 4);
4354 }
4355 PrmtSel(sel)
4356 }
4357
get(&self, byte_idx: usize) -> PrmtSelByte4358 pub fn get(&self, byte_idx: usize) -> PrmtSelByte {
4359 assert!(byte_idx < 4);
4360 PrmtSelByte(((self.0 >> (byte_idx * 4)) & 0xf) as u8)
4361 }
4362 }
4363
4364 #[allow(dead_code)]
4365 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
4366 pub enum PrmtMode {
4367 Index,
4368 Forward4Extract,
4369 Backward4Extract,
4370 Replicate8,
4371 EdgeClampLeft,
4372 EdgeClampRight,
4373 Replicate16,
4374 }
4375
4376 impl fmt::Display for PrmtMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4377 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4378 match self {
4379 PrmtMode::Index => Ok(()),
4380 PrmtMode::Forward4Extract => write!(f, ".f4e"),
4381 PrmtMode::Backward4Extract => write!(f, ".b4e"),
4382 PrmtMode::Replicate8 => write!(f, ".rc8"),
4383 PrmtMode::EdgeClampLeft => write!(f, ".ecl"),
4384 PrmtMode::EdgeClampRight => write!(f, ".ecl"),
4385 PrmtMode::Replicate16 => write!(f, ".rc16"),
4386 }
4387 }
4388 }
4389
4390 #[repr(C)]
4391 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
4392 /// Permutes `srcs` into `dst` using `selection`.
4393 pub struct OpPrmt {
4394 #[dst_type(GPR)]
4395 pub dst: Dst,
4396
4397 #[src_type(ALU)]
4398 pub srcs: [Src; 2],
4399
4400 #[src_type(ALU)]
4401 pub sel: Src,
4402
4403 pub mode: PrmtMode,
4404 }
4405
4406 impl OpPrmt {
get_sel(&self) -> Option<PrmtSel>4407 pub fn get_sel(&self) -> Option<PrmtSel> {
4408 // TODO: We could construct a PrmtSel for the other modes but we don't
4409 // use them right now because they're kinda pointless.
4410 if self.mode != PrmtMode::Index {
4411 return None;
4412 }
4413
4414 if let Some(sel) = self.sel.as_u32() {
4415 // The top 16 bits are ignored
4416 Some(PrmtSel(sel as u16))
4417 } else {
4418 None
4419 }
4420 }
4421
as_u32(&self) -> Option<u32>4422 pub fn as_u32(&self) -> Option<u32> {
4423 let Some(sel) = self.get_sel() else {
4424 return None;
4425 };
4426
4427 let mut imm = 0_u32;
4428 for b in 0..4 {
4429 let sel_byte = sel.get(b);
4430 let Some(src_u32) = self.srcs[sel_byte.src()].as_u32() else {
4431 return None;
4432 };
4433
4434 let sb = sel_byte.fold_u32(src_u32);
4435 imm |= u32::from(sb) << (b * 8);
4436 }
4437 Some(imm)
4438 }
4439 }
4440
4441 impl Foldable for OpPrmt {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)4442 fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
4443 let srcs = [
4444 f.get_u32_src(self, &self.srcs[0]),
4445 f.get_u32_src(self, &self.srcs[1]),
4446 ];
4447 let sel = f.get_u32_src(self, &self.sel);
4448
4449 assert!(self.mode == PrmtMode::Index);
4450 let sel = PrmtSel(sel as u16);
4451
4452 let mut dst = 0_u32;
4453 for b in 0..4 {
4454 let sel_byte = sel.get(b);
4455 let src = srcs[sel_byte.src()];
4456 let sb = sel_byte.fold_u32(src);
4457 dst |= u32::from(sb) << (b * 8);
4458 }
4459
4460 f.set_u32_dst(self, &self.dst, dst);
4461 }
4462 }
4463
4464 impl DisplayOp for OpPrmt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4465 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4466 write!(
4467 f,
4468 "prmt{} {} [{}] {}",
4469 self.mode, self.srcs[0], self.sel, self.srcs[1],
4470 )
4471 }
4472 }
4473 impl_display_for_op!(OpPrmt);
4474
4475 #[repr(C)]
4476 #[derive(SrcsAsSlice, DstsAsSlice)]
4477 pub struct OpSel {
4478 #[dst_type(GPR)]
4479 pub dst: Dst,
4480
4481 #[src_type(Pred)]
4482 pub cond: Src,
4483
4484 #[src_type(ALU)]
4485 pub srcs: [Src; 2],
4486 }
4487
4488 impl DisplayOp for OpSel {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4489 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4490 write!(f, "sel {} {} {}", self.cond, self.srcs[0], self.srcs[1],)
4491 }
4492 }
4493 impl_display_for_op!(OpSel);
4494
4495 #[repr(C)]
4496 #[derive(SrcsAsSlice, DstsAsSlice)]
4497 pub struct OpShfl {
4498 #[dst_type(GPR)]
4499 pub dst: Dst,
4500
4501 #[dst_type(Pred)]
4502 pub in_bounds: Dst,
4503
4504 #[src_type(SSA)]
4505 pub src: Src,
4506
4507 #[src_type(ALU)]
4508 pub lane: Src,
4509
4510 #[src_type(ALU)]
4511 pub c: Src,
4512
4513 pub op: ShflOp,
4514 }
4515
4516 impl DisplayOp for OpShfl {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4517 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4518 write!(f, "shfl.{} {} {} {}", self.op, self.src, self.lane, self.c)
4519 }
4520 }
4521 impl_display_for_op!(OpShfl);
4522
4523 #[repr(C)]
4524 #[derive(SrcsAsSlice, DstsAsSlice)]
4525 pub struct OpPLop3 {
4526 #[dst_type(Pred)]
4527 pub dsts: [Dst; 2],
4528
4529 #[src_type(Pred)]
4530 pub srcs: [Src; 3],
4531
4532 pub ops: [LogicOp3; 2],
4533 }
4534
4535 impl DisplayOp for OpPLop3 {
fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4536 fn fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4537 write!(f, "{} {}", self.dsts[0], self.dsts[1])
4538 }
4539
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4540 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4541 write!(
4542 f,
4543 "plop3 {} {} {} {} {}",
4544 self.srcs[0], self.srcs[1], self.srcs[2], self.ops[0], self.ops[1],
4545 )
4546 }
4547 }
4548 impl_display_for_op!(OpPLop3);
4549
4550 #[repr(C)]
4551 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
4552 pub struct OpPSetP {
4553 #[dst_type(Pred)]
4554 pub dsts: [Dst; 2],
4555
4556 pub ops: [PredSetOp; 2],
4557
4558 #[src_type(Pred)]
4559 pub srcs: [Src; 3],
4560 }
4561
4562 impl Foldable for OpPSetP {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)4563 fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
4564 let srcs = [
4565 f.get_pred_src(self, &self.srcs[0]),
4566 f.get_pred_src(self, &self.srcs[1]),
4567 f.get_pred_src(self, &self.srcs[2]),
4568 ];
4569
4570 let tmp = self.ops[0].eval(srcs[0], srcs[1]);
4571 let dst0 = self.ops[1].eval(srcs[2], tmp);
4572
4573 let tmp = self.ops[0].eval(!srcs[0], srcs[1]);
4574 let dst1 = self.ops[1].eval(srcs[2], tmp);
4575
4576 f.set_pred_dst(self, &self.dsts[0], dst0);
4577 f.set_pred_dst(self, &self.dsts[1], dst1);
4578 }
4579 }
4580
4581 impl DisplayOp for OpPSetP {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4582 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4583 write!(
4584 f,
4585 "psetp{}{} {} {} {}",
4586 self.ops[0], self.ops[1], self.srcs[0], self.srcs[1], self.srcs[2],
4587 )
4588 }
4589 }
4590
4591 #[repr(C)]
4592 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
4593 pub struct OpPopC {
4594 #[dst_type(GPR)]
4595 pub dst: Dst,
4596
4597 #[src_type(B32)]
4598 pub src: Src,
4599 }
4600
4601 impl Foldable for OpPopC {
fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>)4602 fn fold(&self, _sm: &dyn ShaderModel, f: &mut OpFoldData<'_>) {
4603 let src = f.get_u32_bnot_src(self, &self.src);
4604 let dst = src.count_ones();
4605 f.set_u32_dst(self, &self.dst, dst);
4606 }
4607 }
4608
4609 impl DisplayOp for OpPopC {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4610 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4611 write!(f, "popc {}", self.src,)
4612 }
4613 }
4614 impl_display_for_op!(OpPopC);
4615
4616 #[repr(C)]
4617 #[derive(SrcsAsSlice, DstsAsSlice)]
4618 pub struct OpR2UR {
4619 #[dst_type(GPR)]
4620 pub dst: Dst,
4621
4622 #[src_type(GPR)]
4623 pub src: Src,
4624 }
4625
4626 impl DisplayOp for OpR2UR {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4627 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4628 write!(f, "r2ur {}", self.src)
4629 }
4630 }
4631 impl_display_for_op!(OpR2UR);
4632
4633 #[repr(C)]
4634 #[derive(SrcsAsSlice, DstsAsSlice)]
4635 pub struct OpTex {
4636 pub dsts: [Dst; 2],
4637 pub fault: Dst,
4638
4639 #[src_type(SSA)]
4640 pub srcs: [Src; 2],
4641
4642 pub dim: TexDim,
4643 pub lod_mode: TexLodMode,
4644 pub z_cmpr: bool,
4645 pub offset: bool,
4646 pub mask: u8,
4647 }
4648
4649 impl DisplayOp for OpTex {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4650 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4651 write!(f, "tex.b{}", self.dim)?;
4652 if self.lod_mode != TexLodMode::Auto {
4653 write!(f, ".{}", self.lod_mode)?;
4654 }
4655 if self.offset {
4656 write!(f, ".aoffi")?;
4657 }
4658 if self.z_cmpr {
4659 write!(f, ".dc")?;
4660 }
4661 write!(f, " {} {}", self.srcs[0], self.srcs[1])
4662 }
4663 }
4664 impl_display_for_op!(OpTex);
4665
4666 #[repr(C)]
4667 #[derive(SrcsAsSlice, DstsAsSlice)]
4668 pub struct OpTld {
4669 pub dsts: [Dst; 2],
4670 pub fault: Dst,
4671
4672 #[src_type(SSA)]
4673 pub srcs: [Src; 2],
4674
4675 pub dim: TexDim,
4676 pub is_ms: bool,
4677 pub lod_mode: TexLodMode,
4678 pub offset: bool,
4679 pub mask: u8,
4680 }
4681
4682 impl DisplayOp for OpTld {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4683 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4684 write!(f, "tld.b{}", self.dim)?;
4685 if self.lod_mode != TexLodMode::Auto {
4686 write!(f, ".{}", self.lod_mode)?;
4687 }
4688 if self.offset {
4689 write!(f, ".aoffi")?;
4690 }
4691 if self.is_ms {
4692 write!(f, ".ms")?;
4693 }
4694 write!(f, " {} {}", self.srcs[0], self.srcs[1])
4695 }
4696 }
4697 impl_display_for_op!(OpTld);
4698
4699 #[repr(C)]
4700 #[derive(SrcsAsSlice, DstsAsSlice)]
4701 pub struct OpTld4 {
4702 pub dsts: [Dst; 2],
4703 pub fault: Dst,
4704
4705 #[src_type(SSA)]
4706 pub srcs: [Src; 2],
4707
4708 pub dim: TexDim,
4709 pub comp: u8,
4710 pub offset_mode: Tld4OffsetMode,
4711 pub z_cmpr: bool,
4712 pub mask: u8,
4713 }
4714
4715 impl DisplayOp for OpTld4 {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4716 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4717 write!(f, "tld4.g.b{}", self.dim)?;
4718 if self.offset_mode != Tld4OffsetMode::None {
4719 write!(f, ".{}", self.offset_mode)?;
4720 }
4721 write!(f, " {} {}", self.srcs[0], self.srcs[1])
4722 }
4723 }
4724 impl_display_for_op!(OpTld4);
4725
4726 #[repr(C)]
4727 #[derive(SrcsAsSlice, DstsAsSlice)]
4728 pub struct OpTmml {
4729 pub dsts: [Dst; 2],
4730
4731 #[src_type(SSA)]
4732 pub srcs: [Src; 2],
4733
4734 pub dim: TexDim,
4735 pub mask: u8,
4736 }
4737
4738 impl DisplayOp for OpTmml {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4739 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4740 write!(
4741 f,
4742 "tmml.b.lod{} {} {}",
4743 self.dim, self.srcs[0], self.srcs[1]
4744 )
4745 }
4746 }
4747 impl_display_for_op!(OpTmml);
4748
4749 #[repr(C)]
4750 #[derive(SrcsAsSlice, DstsAsSlice)]
4751 pub struct OpTxd {
4752 pub dsts: [Dst; 2],
4753 pub fault: Dst,
4754
4755 #[src_type(SSA)]
4756 pub srcs: [Src; 2],
4757
4758 pub dim: TexDim,
4759 pub offset: bool,
4760 pub mask: u8,
4761 }
4762
4763 impl DisplayOp for OpTxd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4764 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4765 write!(f, "txd.b{}", self.dim)?;
4766 if self.offset {
4767 write!(f, ".aoffi")?;
4768 }
4769 write!(f, " {} {}", self.srcs[0], self.srcs[1])
4770 }
4771 }
4772 impl_display_for_op!(OpTxd);
4773
4774 #[repr(C)]
4775 #[derive(SrcsAsSlice, DstsAsSlice)]
4776 pub struct OpTxq {
4777 pub dsts: [Dst; 2],
4778
4779 #[src_type(SSA)]
4780 pub src: Src,
4781
4782 pub query: TexQuery,
4783 pub mask: u8,
4784 }
4785
4786 impl DisplayOp for OpTxq {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4787 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4788 write!(f, "txq.b {} {}", self.src, self.query)
4789 }
4790 }
4791 impl_display_for_op!(OpTxq);
4792
4793 #[repr(C)]
4794 #[derive(SrcsAsSlice, DstsAsSlice)]
4795 pub struct OpSuLd {
4796 pub dst: Dst,
4797 pub fault: Dst,
4798
4799 pub image_dim: ImageDim,
4800 pub mem_order: MemOrder,
4801 pub mem_eviction_priority: MemEvictionPriority,
4802 pub mask: u8,
4803
4804 #[src_type(GPR)]
4805 pub handle: Src,
4806
4807 #[src_type(SSA)]
4808 pub coord: Src,
4809 }
4810
4811 impl DisplayOp for OpSuLd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4812 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4813 write!(
4814 f,
4815 "suld.p{}{}{} [{}] {}",
4816 self.image_dim,
4817 self.mem_order,
4818 self.mem_eviction_priority,
4819 self.coord,
4820 self.handle,
4821 )
4822 }
4823 }
4824 impl_display_for_op!(OpSuLd);
4825
4826 #[repr(C)]
4827 #[derive(SrcsAsSlice, DstsAsSlice)]
4828 pub struct OpSuSt {
4829 pub image_dim: ImageDim,
4830 pub mem_order: MemOrder,
4831 pub mem_eviction_priority: MemEvictionPriority,
4832 pub mask: u8,
4833
4834 #[src_type(GPR)]
4835 pub handle: Src,
4836
4837 #[src_type(SSA)]
4838 pub coord: Src,
4839
4840 #[src_type(SSA)]
4841 pub data: Src,
4842 }
4843
4844 impl DisplayOp for OpSuSt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4845 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4846 write!(
4847 f,
4848 "sust.p{}{}{} [{}] {} {}",
4849 self.image_dim,
4850 self.mem_order,
4851 self.mem_eviction_priority,
4852 self.coord,
4853 self.data,
4854 self.handle,
4855 )
4856 }
4857 }
4858 impl_display_for_op!(OpSuSt);
4859
4860 #[repr(C)]
4861 #[derive(SrcsAsSlice, DstsAsSlice)]
4862 pub struct OpSuAtom {
4863 pub dst: Dst,
4864 pub fault: Dst,
4865
4866 pub image_dim: ImageDim,
4867
4868 pub atom_op: AtomOp,
4869 pub atom_type: AtomType,
4870
4871 pub mem_order: MemOrder,
4872 pub mem_eviction_priority: MemEvictionPriority,
4873
4874 #[src_type(GPR)]
4875 pub handle: Src,
4876
4877 #[src_type(SSA)]
4878 pub coord: Src,
4879
4880 #[src_type(SSA)]
4881 pub data: Src,
4882 }
4883
4884 impl DisplayOp for OpSuAtom {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4885 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4886 write!(
4887 f,
4888 "suatom.p{}{}{}{}{} [{}] {} {}",
4889 self.image_dim,
4890 self.atom_op,
4891 self.atom_type,
4892 self.mem_order,
4893 self.mem_eviction_priority,
4894 self.coord,
4895 self.data,
4896 self.handle,
4897 )
4898 }
4899 }
4900 impl_display_for_op!(OpSuAtom);
4901
4902 #[repr(C)]
4903 #[derive(SrcsAsSlice, DstsAsSlice)]
4904 pub struct OpLd {
4905 pub dst: Dst,
4906
4907 #[src_type(GPR)]
4908 pub addr: Src,
4909
4910 pub offset: i32,
4911 pub access: MemAccess,
4912 }
4913
4914 impl DisplayOp for OpLd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4915 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4916 write!(f, "ld{} [{}", self.access, self.addr)?;
4917 if self.offset > 0 {
4918 write!(f, "+{:#x}", self.offset)?;
4919 }
4920 write!(f, "]")
4921 }
4922 }
4923 impl_display_for_op!(OpLd);
4924
4925 #[allow(dead_code)]
4926 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
4927 pub enum LdcMode {
4928 Indexed,
4929 IndexedLinear,
4930 IndexedSegmented,
4931 IndexedSegmentedLinear,
4932 }
4933
4934 impl fmt::Display for LdcMode {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4935 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4936 match self {
4937 LdcMode::Indexed => Ok(()),
4938 LdcMode::IndexedLinear => write!(f, ".il"),
4939 LdcMode::IndexedSegmented => write!(f, ".is"),
4940 LdcMode::IndexedSegmentedLinear => write!(f, ".isl"),
4941 }
4942 }
4943 }
4944
4945 #[repr(C)]
4946 #[derive(SrcsAsSlice, DstsAsSlice)]
4947 pub struct OpLdc {
4948 pub dst: Dst,
4949
4950 #[src_type(ALU)]
4951 pub cb: Src,
4952
4953 #[src_type(GPR)]
4954 pub offset: Src,
4955
4956 pub mode: LdcMode,
4957 pub mem_type: MemType,
4958 }
4959
4960 impl DisplayOp for OpLdc {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4961 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4962 let SrcRef::CBuf(cb) = self.cb.src_ref else {
4963 panic!("Not a cbuf");
4964 };
4965 write!(f, "ldc{}{} {}[", self.mode, self.mem_type, cb.buf)?;
4966 if self.offset.is_zero() {
4967 write!(f, "+{:#x}", cb.offset)?;
4968 } else if cb.offset == 0 {
4969 write!(f, "{}", self.offset)?;
4970 } else {
4971 write!(f, "{}+{:#x}", self.offset, cb.offset)?;
4972 }
4973 write!(f, "]")
4974 }
4975 }
4976 impl_display_for_op!(OpLdc);
4977
4978 #[repr(C)]
4979 #[derive(SrcsAsSlice, DstsAsSlice)]
4980 pub struct OpSt {
4981 #[src_type(GPR)]
4982 pub addr: Src,
4983
4984 #[src_type(SSA)]
4985 pub data: Src,
4986
4987 pub offset: i32,
4988 pub access: MemAccess,
4989 }
4990
4991 impl DisplayOp for OpSt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result4992 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
4993 write!(f, "st{} [{}", self.access, self.addr)?;
4994 if self.offset > 0 {
4995 write!(f, "+{:#x}", self.offset)?;
4996 }
4997 write!(f, "] {}", self.data)
4998 }
4999 }
5000 impl_display_for_op!(OpSt);
5001
5002 #[repr(C)]
5003 #[derive(SrcsAsSlice, DstsAsSlice)]
5004 pub struct OpAtom {
5005 pub dst: Dst,
5006
5007 #[src_type(GPR)]
5008 pub addr: Src,
5009
5010 #[src_type(GPR)]
5011 pub cmpr: Src,
5012
5013 #[src_type(SSA)]
5014 pub data: Src,
5015
5016 pub atom_op: AtomOp,
5017 pub atom_type: AtomType,
5018
5019 pub addr_offset: i32,
5020
5021 pub mem_space: MemSpace,
5022 pub mem_order: MemOrder,
5023 pub mem_eviction_priority: MemEvictionPriority,
5024 }
5025
5026 impl DisplayOp for OpAtom {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5027 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5028 write!(
5029 f,
5030 "atom{}{}{}{}{}",
5031 self.atom_op,
5032 self.atom_type,
5033 self.mem_space,
5034 self.mem_order,
5035 self.mem_eviction_priority,
5036 )?;
5037 write!(f, " [")?;
5038 if !self.addr.is_zero() {
5039 write!(f, "{}", self.addr)?;
5040 }
5041 if self.addr_offset > 0 {
5042 if !self.addr.is_zero() {
5043 write!(f, "+")?;
5044 }
5045 write!(f, "{:#x}", self.addr_offset)?;
5046 }
5047 write!(f, "]")?;
5048 if self.atom_op == AtomOp::CmpExch(AtomCmpSrc::Separate) {
5049 write!(f, " {}", self.cmpr)?;
5050 }
5051 write!(f, " {}", self.data)
5052 }
5053 }
5054 impl_display_for_op!(OpAtom);
5055
5056 #[repr(C)]
5057 #[derive(SrcsAsSlice, DstsAsSlice)]
5058 pub struct OpAL2P {
5059 pub dst: Dst,
5060
5061 #[src_type(GPR)]
5062 pub offset: Src,
5063
5064 pub access: AttrAccess,
5065 }
5066
5067 impl DisplayOp for OpAL2P {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5068 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5069 write!(f, "al2p")?;
5070 if self.access.output {
5071 write!(f, ".o")?;
5072 }
5073 if self.access.patch {
5074 write!(f, ".p")?;
5075 }
5076 write!(f, " a[{:#x}", self.access.addr)?;
5077 if !self.offset.is_zero() {
5078 write!(f, "+{}", self.offset)?;
5079 }
5080 write!(f, "]")
5081 }
5082 }
5083 impl_display_for_op!(OpAL2P);
5084
5085 #[repr(C)]
5086 #[derive(SrcsAsSlice, DstsAsSlice)]
5087 pub struct OpALd {
5088 pub dst: Dst,
5089
5090 #[src_type(GPR)]
5091 pub vtx: Src,
5092
5093 #[src_type(GPR)]
5094 pub offset: Src,
5095
5096 pub access: AttrAccess,
5097 }
5098
5099 impl DisplayOp for OpALd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5100 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5101 write!(f, "ald")?;
5102 if self.access.output {
5103 write!(f, ".o")?;
5104 }
5105 if self.access.patch {
5106 write!(f, ".p")?;
5107 }
5108 if self.access.phys {
5109 write!(f, ".phys")?;
5110 }
5111 write!(f, " a")?;
5112 if !self.vtx.is_zero() {
5113 write!(f, "[{}]", self.vtx)?;
5114 }
5115 write!(f, "[{:#x}", self.access.addr)?;
5116 if !self.offset.is_zero() {
5117 write!(f, "+{}", self.offset)?;
5118 }
5119 write!(f, "]")
5120 }
5121 }
5122 impl_display_for_op!(OpALd);
5123
5124 #[repr(C)]
5125 #[derive(SrcsAsSlice, DstsAsSlice)]
5126 pub struct OpASt {
5127 #[src_type(GPR)]
5128 pub vtx: Src,
5129
5130 #[src_type(GPR)]
5131 pub offset: Src,
5132
5133 #[src_type(SSA)]
5134 pub data: Src,
5135
5136 pub access: AttrAccess,
5137 }
5138
5139 impl DisplayOp for OpASt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5140 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5141 write!(f, "ast")?;
5142 if self.access.patch {
5143 write!(f, ".p")?;
5144 }
5145 if self.access.phys {
5146 write!(f, ".phys")?;
5147 }
5148 write!(f, " a")?;
5149 if !self.vtx.is_zero() {
5150 write!(f, "[{}]", self.vtx)?;
5151 }
5152 write!(f, "[{:#x}", self.access.addr)?;
5153 if !self.offset.is_zero() {
5154 write!(f, "+{}", self.offset)?;
5155 }
5156 write!(f, "] {}", self.data)
5157 }
5158 }
5159 impl_display_for_op!(OpASt);
5160
5161 #[repr(C)]
5162 #[derive(SrcsAsSlice, DstsAsSlice)]
5163 pub struct OpIpa {
5164 pub dst: Dst,
5165 pub addr: u16,
5166 pub freq: InterpFreq,
5167 pub loc: InterpLoc,
5168 pub inv_w: Src,
5169 pub offset: Src,
5170 }
5171
5172 impl DisplayOp for OpIpa {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5173 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5174 write!(
5175 f,
5176 "ipa{}{} a[{:#x}] {}",
5177 self.freq, self.loc, self.addr, self.inv_w
5178 )?;
5179 if self.loc == InterpLoc::Offset {
5180 write!(f, " {}", self.offset)?;
5181 }
5182 Ok(())
5183 }
5184 }
5185 impl_display_for_op!(OpIpa);
5186
5187 #[repr(C)]
5188 #[derive(SrcsAsSlice, DstsAsSlice)]
5189 pub struct OpLdTram {
5190 pub dst: Dst,
5191 pub addr: u16,
5192 pub use_c: bool,
5193 }
5194
5195 impl DisplayOp for OpLdTram {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5196 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5197 write!(f, "ldtram")?;
5198 if self.use_c {
5199 write!(f, ".c")?;
5200 } else {
5201 write!(f, ".ab")?;
5202 }
5203 write!(f, " a[{:#x}]", self.addr)?;
5204 Ok(())
5205 }
5206 }
5207 impl_display_for_op!(OpLdTram);
5208
5209 #[allow(dead_code)]
5210 #[derive(Copy, Clone, Debug)]
5211 pub enum CCtlOp {
5212 Qry1, // Only available pre-Volta
5213 PF1,
5214 PF1_5, // Only available pre-Volta
5215 PF2,
5216 WB,
5217 IV,
5218 IVAll,
5219 RS,
5220 RSLB, // Only available pre-Volta
5221 IVAllP, // Only available on Volta+
5222 WBAll, // Only available on Volta+
5223 WBAllP, // Only available on Volta+
5224 }
5225
5226 impl CCtlOp {
is_all(&self) -> bool5227 pub fn is_all(&self) -> bool {
5228 match self {
5229 CCtlOp::Qry1
5230 | CCtlOp::PF1
5231 | CCtlOp::PF1_5
5232 | CCtlOp::PF2
5233 | CCtlOp::WB
5234 | CCtlOp::IV
5235 | CCtlOp::RS
5236 | CCtlOp::RSLB => false,
5237 CCtlOp::IVAll | CCtlOp::IVAllP | CCtlOp::WBAll | CCtlOp::WBAllP => {
5238 true
5239 }
5240 }
5241 }
5242 }
5243
5244 impl fmt::Display for CCtlOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5245 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5246 match self {
5247 CCtlOp::Qry1 => write!(f, "qry1"),
5248 CCtlOp::PF1 => write!(f, "pf1"),
5249 CCtlOp::PF1_5 => write!(f, "pf1.5"),
5250 CCtlOp::PF2 => write!(f, "pf2"),
5251 CCtlOp::WB => write!(f, "wb"),
5252 CCtlOp::IV => write!(f, "iv"),
5253 CCtlOp::IVAll => write!(f, "ivall"),
5254 CCtlOp::RS => write!(f, "rs"),
5255 CCtlOp::RSLB => write!(f, "rslb"),
5256 CCtlOp::IVAllP => write!(f, "ivallp"),
5257 CCtlOp::WBAll => write!(f, "wball"),
5258 CCtlOp::WBAllP => write!(f, "wballp"),
5259 }
5260 }
5261 }
5262
5263 #[repr(C)]
5264 #[derive(SrcsAsSlice, DstsAsSlice)]
5265 pub struct OpCCtl {
5266 pub op: CCtlOp,
5267
5268 pub mem_space: MemSpace,
5269
5270 #[src_type(GPR)]
5271 pub addr: Src,
5272
5273 pub addr_offset: i32,
5274 }
5275
5276 impl DisplayOp for OpCCtl {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5277 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5278 write!(f, "cctl{}", self.mem_space)?;
5279 if !self.op.is_all() {
5280 write!(f, " [{}", self.addr)?;
5281 if self.addr_offset > 0 {
5282 write!(f, "+{:#x}", self.addr_offset)?;
5283 }
5284 write!(f, "]")?;
5285 }
5286 Ok(())
5287 }
5288 }
5289 impl_display_for_op!(OpCCtl);
5290
5291 #[repr(C)]
5292 #[derive(SrcsAsSlice, DstsAsSlice)]
5293 pub struct OpMemBar {
5294 pub scope: MemScope,
5295 }
5296
5297 impl DisplayOp for OpMemBar {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5298 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5299 write!(f, "membar.sc.{}", self.scope)
5300 }
5301 }
5302 impl_display_for_op!(OpMemBar);
5303
5304 #[repr(C)]
5305 #[derive(SrcsAsSlice, DstsAsSlice)]
5306 pub struct OpBClear {
5307 pub dst: Dst,
5308 }
5309
5310 impl DisplayOp for OpBClear {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5311 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5312 write!(f, "bclear")
5313 }
5314 }
5315 impl_display_for_op!(OpBClear);
5316
5317 #[repr(C)]
5318 #[derive(SrcsAsSlice, DstsAsSlice)]
5319 pub struct OpBMov {
5320 pub dst: Dst,
5321 pub src: Src,
5322 pub clear: bool,
5323 }
5324
5325 impl DisplayOp for OpBMov {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5326 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5327 write!(f, "bmov.32")?;
5328 if self.clear {
5329 write!(f, ".clear")?;
5330 }
5331 write!(f, " {}", self.src)
5332 }
5333 }
5334 impl_display_for_op!(OpBMov);
5335
5336 #[repr(C)]
5337 #[derive(SrcsAsSlice, DstsAsSlice)]
5338 pub struct OpBreak {
5339 #[dst_type(Bar)]
5340 pub bar_out: Dst,
5341
5342 #[src_type(Bar)]
5343 pub bar_in: Src,
5344
5345 #[src_type(Pred)]
5346 pub cond: Src,
5347 }
5348
5349 impl DisplayOp for OpBreak {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5350 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5351 write!(f, "break {} {}", self.bar_in, self.cond)
5352 }
5353 }
5354 impl_display_for_op!(OpBreak);
5355
5356 #[repr(C)]
5357 #[derive(SrcsAsSlice, DstsAsSlice)]
5358 pub struct OpBSSy {
5359 #[dst_type(Bar)]
5360 pub bar_out: Dst,
5361
5362 #[src_type(Pred)]
5363 pub bar_in: Src,
5364
5365 #[src_type(Pred)]
5366 pub cond: Src,
5367
5368 pub target: Label,
5369 }
5370
5371 impl DisplayOp for OpBSSy {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5372 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5373 write!(f, "bssy {} {} {}", self.bar_in, self.cond, self.target)
5374 }
5375 }
5376 impl_display_for_op!(OpBSSy);
5377
5378 #[repr(C)]
5379 #[derive(SrcsAsSlice, DstsAsSlice)]
5380 pub struct OpBSync {
5381 #[src_type(Bar)]
5382 pub bar: Src,
5383
5384 #[src_type(Pred)]
5385 pub cond: Src,
5386 }
5387
5388 impl DisplayOp for OpBSync {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5389 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5390 write!(f, "bsync {} {}", self.bar, self.cond)
5391 }
5392 }
5393 impl_display_for_op!(OpBSync);
5394
5395 #[repr(C)]
5396 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
5397 pub struct OpBra {
5398 pub target: Label,
5399 }
5400
5401 impl DisplayOp for OpBra {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5402 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5403 write!(f, "bra {}", self.target)
5404 }
5405 }
5406 impl_display_for_op!(OpBra);
5407
5408 #[repr(C)]
5409 #[derive(SrcsAsSlice, DstsAsSlice)]
5410 pub struct OpSSy {
5411 pub target: Label,
5412 }
5413
5414 impl DisplayOp for OpSSy {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5415 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5416 write!(f, "ssy {}", self.target)
5417 }
5418 }
5419 impl_display_for_op!(OpSSy);
5420
5421 #[repr(C)]
5422 #[derive(SrcsAsSlice, DstsAsSlice)]
5423 pub struct OpSync {
5424 pub target: Label,
5425 }
5426
5427 impl DisplayOp for OpSync {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5428 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5429 write!(f, "sync {}", self.target)
5430 }
5431 }
5432 impl_display_for_op!(OpSync);
5433
5434 #[repr(C)]
5435 #[derive(SrcsAsSlice, DstsAsSlice)]
5436 pub struct OpBrk {
5437 pub target: Label,
5438 }
5439
5440 impl DisplayOp for OpBrk {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5441 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5442 write!(f, "brk {}", self.target)
5443 }
5444 }
5445 impl_display_for_op!(OpBrk);
5446
5447 #[repr(C)]
5448 #[derive(SrcsAsSlice, DstsAsSlice)]
5449 pub struct OpPBk {
5450 pub target: Label,
5451 }
5452
5453 impl DisplayOp for OpPBk {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5454 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5455 write!(f, "pbk {}", self.target)
5456 }
5457 }
5458 impl_display_for_op!(OpPBk);
5459
5460 #[repr(C)]
5461 #[derive(SrcsAsSlice, DstsAsSlice)]
5462 pub struct OpCont {
5463 pub target: Label,
5464 }
5465
5466 impl DisplayOp for OpCont {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5467 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5468 write!(f, "cont {}", self.target)
5469 }
5470 }
5471 impl_display_for_op!(OpCont);
5472
5473 #[repr(C)]
5474 #[derive(SrcsAsSlice, DstsAsSlice)]
5475 pub struct OpPCnt {
5476 pub target: Label,
5477 }
5478
5479 impl DisplayOp for OpPCnt {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5480 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5481 write!(f, "pcnt {}", self.target)
5482 }
5483 }
5484 impl_display_for_op!(OpPCnt);
5485
5486 #[repr(C)]
5487 #[derive(Clone, SrcsAsSlice, DstsAsSlice)]
5488 pub struct OpExit {}
5489
5490 impl DisplayOp for OpExit {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5491 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5492 write!(f, "exit")
5493 }
5494 }
5495 impl_display_for_op!(OpExit);
5496
5497 #[repr(C)]
5498 #[derive(SrcsAsSlice, DstsAsSlice)]
5499 pub struct OpWarpSync {
5500 pub mask: u32,
5501 }
5502
5503 impl DisplayOp for OpWarpSync {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5504 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5505 write!(f, "warpsync 0x{:x}", self.mask)
5506 }
5507 }
5508 impl_display_for_op!(OpWarpSync);
5509
5510 #[repr(C)]
5511 #[derive(SrcsAsSlice, DstsAsSlice)]
5512 pub struct OpBar {}
5513
5514 impl DisplayOp for OpBar {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5515 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5516 write!(f, "bar.sync")
5517 }
5518 }
5519 impl_display_for_op!(OpBar);
5520
5521 #[repr(C)]
5522 #[derive(SrcsAsSlice, DstsAsSlice)]
5523 pub struct OpCS2R {
5524 pub dst: Dst,
5525 pub idx: u8,
5526 }
5527
5528 impl DisplayOp for OpCS2R {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5529 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5530 write!(f, "cs2r sr[{:#x}]", self.idx)
5531 }
5532 }
5533 impl_display_for_op!(OpCS2R);
5534
5535 #[repr(C)]
5536 #[derive(SrcsAsSlice, DstsAsSlice)]
5537 pub struct OpIsberd {
5538 #[dst_type(GPR)]
5539 pub dst: Dst,
5540
5541 #[src_type(SSA)]
5542 pub idx: Src,
5543 }
5544
5545 impl DisplayOp for OpIsberd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5546 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5547 write!(f, "isberd [{}]", self.idx)
5548 }
5549 }
5550 impl_display_for_op!(OpIsberd);
5551
5552 #[repr(C)]
5553 #[derive(SrcsAsSlice, DstsAsSlice)]
5554 pub struct OpKill {}
5555
5556 impl DisplayOp for OpKill {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5557 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5558 write!(f, "kill")
5559 }
5560 }
5561 impl_display_for_op!(OpKill);
5562
5563 #[repr(C)]
5564 #[derive(SrcsAsSlice, DstsAsSlice)]
5565 pub struct OpNop {
5566 pub label: Option<Label>,
5567 }
5568
5569 impl DisplayOp for OpNop {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5570 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5571 write!(f, "nop")?;
5572 if let Some(label) = &self.label {
5573 write!(f, " {}", label)?;
5574 }
5575 Ok(())
5576 }
5577 }
5578 impl_display_for_op!(OpNop);
5579
5580 #[allow(dead_code)]
5581 pub enum PixVal {
5582 MsCount,
5583 CovMask,
5584 Covered,
5585 Offset,
5586 CentroidOffset,
5587 MyIndex,
5588 InnerCoverage,
5589 }
5590
5591 impl fmt::Display for PixVal {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5592 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5593 match self {
5594 PixVal::MsCount => write!(f, ".mscount"),
5595 PixVal::CovMask => write!(f, ".covmask"),
5596 PixVal::Covered => write!(f, ".covered"),
5597 PixVal::Offset => write!(f, ".offset"),
5598 PixVal::CentroidOffset => write!(f, ".centroid_offset"),
5599 PixVal::MyIndex => write!(f, ".my_index"),
5600 PixVal::InnerCoverage => write!(f, ".inner_coverage"),
5601 }
5602 }
5603 }
5604
5605 #[repr(C)]
5606 #[derive(SrcsAsSlice, DstsAsSlice)]
5607 pub struct OpPixLd {
5608 pub dst: Dst,
5609 pub val: PixVal,
5610 }
5611
5612 impl DisplayOp for OpPixLd {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5613 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5614 write!(f, "pixld{}", self.val)
5615 }
5616 }
5617 impl_display_for_op!(OpPixLd);
5618
5619 #[repr(C)]
5620 #[derive(SrcsAsSlice, DstsAsSlice)]
5621 pub struct OpS2R {
5622 pub dst: Dst,
5623 pub idx: u8,
5624 }
5625
5626 impl DisplayOp for OpS2R {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5627 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5628 write!(f, "s2r sr[{:#x}]", self.idx)
5629 }
5630 }
5631 impl_display_for_op!(OpS2R);
5632
5633 pub enum VoteOp {
5634 Any,
5635 All,
5636 Eq,
5637 }
5638
5639 impl fmt::Display for VoteOp {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5640 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5641 match self {
5642 VoteOp::Any => write!(f, "any"),
5643 VoteOp::All => write!(f, "all"),
5644 VoteOp::Eq => write!(f, "eq"),
5645 }
5646 }
5647 }
5648
5649 #[repr(C)]
5650 #[derive(SrcsAsSlice, DstsAsSlice)]
5651 pub struct OpVote {
5652 pub op: VoteOp,
5653
5654 #[dst_type(GPR)]
5655 pub ballot: Dst,
5656
5657 #[dst_type(Pred)]
5658 pub vote: Dst,
5659
5660 #[src_type(Pred)]
5661 pub pred: Src,
5662 }
5663
5664 impl DisplayOp for OpVote {
fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5665 fn fmt_dsts(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5666 if self.ballot.is_none() && self.vote.is_none() {
5667 write!(f, "none")
5668 } else {
5669 if !self.ballot.is_none() {
5670 write!(f, "{}", self.ballot)?;
5671 }
5672 if !self.vote.is_none() {
5673 write!(f, "{}", self.vote)?;
5674 }
5675 Ok(())
5676 }
5677 }
5678
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5679 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5680 write!(f, "vote.{} {}", self.op, self.pred)
5681 }
5682 }
5683 impl_display_for_op!(OpVote);
5684
5685 #[repr(C)]
5686 #[derive(SrcsAsSlice, DstsAsSlice)]
5687 pub struct OpUndef {
5688 pub dst: Dst,
5689 }
5690
5691 impl DisplayOp for OpUndef {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5692 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5693 write!(f, "undef {}", self.dst)
5694 }
5695 }
5696 impl_display_for_op!(OpUndef);
5697
5698 #[repr(C)]
5699 #[derive(SrcsAsSlice, DstsAsSlice)]
5700 pub struct OpSrcBar {
5701 pub src: Src,
5702 }
5703
5704 impl DisplayOp for OpSrcBar {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5705 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5706 write!(f, "src_bar {}", self.src)
5707 }
5708 }
5709 impl_display_for_op!(OpSrcBar);
5710
5711 pub struct VecPair<A, B> {
5712 a: Vec<A>,
5713 b: Vec<B>,
5714 }
5715
5716 impl<A, B> VecPair<A, B> {
append(&mut self, other: &mut VecPair<A, B>)5717 pub fn append(&mut self, other: &mut VecPair<A, B>) {
5718 self.a.append(&mut other.a);
5719 self.b.append(&mut other.b);
5720 }
5721
is_empty(&self) -> bool5722 pub fn is_empty(&self) -> bool {
5723 debug_assert!(self.a.len() == self.b.len());
5724 self.a.is_empty()
5725 }
5726
iter(&self) -> Zip<slice::Iter<'_, A>, slice::Iter<'_, B>>5727 pub fn iter(&self) -> Zip<slice::Iter<'_, A>, slice::Iter<'_, B>> {
5728 debug_assert!(self.a.len() == self.b.len());
5729 self.a.iter().zip(self.b.iter())
5730 }
5731
iter_mut( &mut self, ) -> Zip<slice::IterMut<'_, A>, slice::IterMut<'_, B>>5732 pub fn iter_mut(
5733 &mut self,
5734 ) -> Zip<slice::IterMut<'_, A>, slice::IterMut<'_, B>> {
5735 debug_assert!(self.a.len() == self.b.len());
5736 self.a.iter_mut().zip(self.b.iter_mut())
5737 }
5738
len(&self) -> usize5739 pub fn len(&self) -> usize {
5740 debug_assert!(self.a.len() == self.b.len());
5741 self.a.len()
5742 }
5743
new() -> Self5744 pub fn new() -> Self {
5745 Self {
5746 a: Vec::new(),
5747 b: Vec::new(),
5748 }
5749 }
5750
push(&mut self, a: A, b: B)5751 pub fn push(&mut self, a: A, b: B) {
5752 debug_assert!(self.a.len() == self.b.len());
5753 self.a.push(a);
5754 self.b.push(b);
5755 }
5756 }
5757
5758 impl<A: Clone, B: Clone> VecPair<A, B> {
retain(&mut self, mut f: impl FnMut(&A, &B) -> bool)5759 pub fn retain(&mut self, mut f: impl FnMut(&A, &B) -> bool) {
5760 debug_assert!(self.a.len() == self.b.len());
5761 let len = self.a.len();
5762 let mut i = 0_usize;
5763 while i < len {
5764 if !f(&self.a[i], &self.b[i]) {
5765 break;
5766 }
5767 i += 1;
5768 }
5769
5770 let mut new_len = i;
5771
5772 // Don't check this one twice.
5773 i += 1;
5774
5775 while i < len {
5776 // This could be more efficient but it's good enough for our
5777 // purposes since everything we're storing is small and has a
5778 // trivial Drop.
5779 if f(&self.a[i], &self.b[i]) {
5780 self.a[new_len] = self.a[i].clone();
5781 self.b[new_len] = self.b[i].clone();
5782 new_len += 1;
5783 }
5784 i += 1;
5785 }
5786
5787 if new_len < len {
5788 self.a.truncate(new_len);
5789 self.b.truncate(new_len);
5790 }
5791 }
5792 }
5793
5794 pub struct PhiAllocator {
5795 count: u32,
5796 }
5797
5798 impl PhiAllocator {
new() -> PhiAllocator5799 pub fn new() -> PhiAllocator {
5800 PhiAllocator { count: 0 }
5801 }
5802
alloc(&mut self) -> u325803 pub fn alloc(&mut self) -> u32 {
5804 let idx = self.count;
5805 self.count = idx + 1;
5806 idx
5807 }
5808 }
5809
5810 #[repr(C)]
5811 #[derive(DstsAsSlice)]
5812 pub struct OpPhiSrcs {
5813 pub srcs: VecPair<u32, Src>,
5814 }
5815
5816 impl OpPhiSrcs {
new() -> OpPhiSrcs5817 pub fn new() -> OpPhiSrcs {
5818 OpPhiSrcs {
5819 srcs: VecPair::new(),
5820 }
5821 }
5822 }
5823
5824 impl AsSlice<Src> for OpPhiSrcs {
5825 type Attr = SrcType;
5826
as_slice(&self) -> &[Src]5827 fn as_slice(&self) -> &[Src] {
5828 &self.srcs.b
5829 }
5830
as_mut_slice(&mut self) -> &mut [Src]5831 fn as_mut_slice(&mut self) -> &mut [Src] {
5832 &mut self.srcs.b
5833 }
5834
attrs(&self) -> SrcTypeList5835 fn attrs(&self) -> SrcTypeList {
5836 SrcTypeList::Uniform(SrcType::GPR)
5837 }
5838 }
5839
5840 impl DisplayOp for OpPhiSrcs {
fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result5841 fn fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
5842 Ok(())
5843 }
5844
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5845 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5846 write!(f, "phi_src ")?;
5847 for (i, (id, src)) in self.srcs.iter().enumerate() {
5848 if i > 0 {
5849 write!(f, ", ")?;
5850 }
5851 write!(f, "φ{} = {}", id, src)?;
5852 }
5853 Ok(())
5854 }
5855 }
5856 impl_display_for_op!(OpPhiSrcs);
5857
5858 #[repr(C)]
5859 #[derive(SrcsAsSlice)]
5860 pub struct OpPhiDsts {
5861 pub dsts: VecPair<u32, Dst>,
5862 }
5863
5864 impl OpPhiDsts {
new() -> OpPhiDsts5865 pub fn new() -> OpPhiDsts {
5866 OpPhiDsts {
5867 dsts: VecPair::new(),
5868 }
5869 }
5870 }
5871
5872 impl AsSlice<Dst> for OpPhiDsts {
5873 type Attr = DstType;
5874
as_slice(&self) -> &[Dst]5875 fn as_slice(&self) -> &[Dst] {
5876 &self.dsts.b
5877 }
5878
as_mut_slice(&mut self) -> &mut [Dst]5879 fn as_mut_slice(&mut self) -> &mut [Dst] {
5880 &mut self.dsts.b
5881 }
5882
attrs(&self) -> DstTypeList5883 fn attrs(&self) -> DstTypeList {
5884 DstTypeList::Uniform(DstType::Vec)
5885 }
5886 }
5887
5888 impl DisplayOp for OpPhiDsts {
fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result5889 fn fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
5890 Ok(())
5891 }
5892
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5893 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5894 write!(f, "phi_dst ")?;
5895 for (i, (id, dst)) in self.dsts.iter().enumerate() {
5896 if i > 0 {
5897 write!(f, ", ")?;
5898 }
5899 write!(f, "{} = φ{}", dst, id)?;
5900 }
5901 Ok(())
5902 }
5903 }
5904 impl_display_for_op!(OpPhiDsts);
5905
5906 #[repr(C)]
5907 #[derive(SrcsAsSlice, DstsAsSlice)]
5908 pub struct OpCopy {
5909 pub dst: Dst,
5910 pub src: Src,
5911 }
5912
5913 impl DisplayOp for OpCopy {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5914 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5915 write!(f, "copy {}", self.src)
5916 }
5917 }
5918 impl_display_for_op!(OpCopy);
5919
5920 #[repr(C)]
5921 #[derive(SrcsAsSlice, DstsAsSlice)]
5922 /// Copies a value and pins its destination in the register file
5923 pub struct OpPin {
5924 pub dst: Dst,
5925 #[src_type(SSA)]
5926 pub src: Src,
5927 }
5928
5929 impl DisplayOp for OpPin {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5930 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5931 write!(f, "pin {}", self.src)
5932 }
5933 }
5934 impl_display_for_op!(OpPin);
5935
5936 #[repr(C)]
5937 #[derive(SrcsAsSlice, DstsAsSlice)]
5938 /// Copies a pinned value to an unpinned value
5939 pub struct OpUnpin {
5940 pub dst: Dst,
5941 #[src_type(SSA)]
5942 pub src: Src,
5943 }
5944
5945 impl DisplayOp for OpUnpin {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5946 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5947 write!(f, "unpin {}", self.src)
5948 }
5949 }
5950 impl_display_for_op!(OpUnpin);
5951
5952 #[repr(C)]
5953 #[derive(SrcsAsSlice, DstsAsSlice)]
5954 pub struct OpSwap {
5955 pub dsts: [Dst; 2],
5956 pub srcs: [Src; 2],
5957 }
5958
5959 impl DisplayOp for OpSwap {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result5960 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
5961 write!(f, "swap {} {}", self.srcs[0], self.srcs[1])
5962 }
5963 }
5964 impl_display_for_op!(OpSwap);
5965
5966 #[repr(C)]
5967 pub struct OpParCopy {
5968 pub dsts_srcs: VecPair<Dst, Src>,
5969 pub tmp: Option<RegRef>,
5970 }
5971
5972 impl OpParCopy {
new() -> OpParCopy5973 pub fn new() -> OpParCopy {
5974 OpParCopy {
5975 dsts_srcs: VecPair::new(),
5976 tmp: None,
5977 }
5978 }
5979
is_empty(&self) -> bool5980 pub fn is_empty(&self) -> bool {
5981 self.dsts_srcs.is_empty()
5982 }
5983
push(&mut self, dst: Dst, src: Src)5984 pub fn push(&mut self, dst: Dst, src: Src) {
5985 self.dsts_srcs.push(dst, src);
5986 }
5987 }
5988
5989 impl AsSlice<Src> for OpParCopy {
5990 type Attr = SrcType;
5991
as_slice(&self) -> &[Src]5992 fn as_slice(&self) -> &[Src] {
5993 &self.dsts_srcs.b
5994 }
5995
as_mut_slice(&mut self) -> &mut [Src]5996 fn as_mut_slice(&mut self) -> &mut [Src] {
5997 &mut self.dsts_srcs.b
5998 }
5999
attrs(&self) -> SrcTypeList6000 fn attrs(&self) -> SrcTypeList {
6001 SrcTypeList::Uniform(SrcType::GPR)
6002 }
6003 }
6004
6005 impl AsSlice<Dst> for OpParCopy {
6006 type Attr = DstType;
6007
as_slice(&self) -> &[Dst]6008 fn as_slice(&self) -> &[Dst] {
6009 &self.dsts_srcs.a
6010 }
6011
as_mut_slice(&mut self) -> &mut [Dst]6012 fn as_mut_slice(&mut self) -> &mut [Dst] {
6013 &mut self.dsts_srcs.a
6014 }
6015
attrs(&self) -> DstTypeList6016 fn attrs(&self) -> DstTypeList {
6017 DstTypeList::Uniform(DstType::Vec)
6018 }
6019 }
6020
6021 impl DisplayOp for OpParCopy {
fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result6022 fn fmt_dsts(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
6023 Ok(())
6024 }
6025
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6026 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6027 write!(f, "par_copy")?;
6028 for (i, (dst, src)) in self.dsts_srcs.iter().enumerate() {
6029 if i > 0 {
6030 write!(f, ",")?;
6031 }
6032 write!(f, " {} = {}", dst, src)?;
6033 }
6034 Ok(())
6035 }
6036 }
6037 impl_display_for_op!(OpParCopy);
6038
6039 #[repr(C)]
6040 #[derive(DstsAsSlice)]
6041 pub struct OpRegOut {
6042 pub srcs: Vec<Src>,
6043 }
6044
6045 impl AsSlice<Src> for OpRegOut {
6046 type Attr = SrcType;
6047
as_slice(&self) -> &[Src]6048 fn as_slice(&self) -> &[Src] {
6049 &self.srcs
6050 }
6051
as_mut_slice(&mut self) -> &mut [Src]6052 fn as_mut_slice(&mut self) -> &mut [Src] {
6053 &mut self.srcs
6054 }
6055
attrs(&self) -> SrcTypeList6056 fn attrs(&self) -> SrcTypeList {
6057 SrcTypeList::Uniform(SrcType::GPR)
6058 }
6059 }
6060
6061 impl DisplayOp for OpRegOut {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6062 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6063 write!(f, "reg_out {{")?;
6064 for (i, src) in self.srcs.iter().enumerate() {
6065 if i > 0 {
6066 write!(f, ",")?;
6067 }
6068 write!(f, " {}", src)?;
6069 }
6070 write!(f, " }}")
6071 }
6072 }
6073 impl_display_for_op!(OpRegOut);
6074
6075 #[derive(Copy, Clone, Debug, PartialEq)]
6076 pub enum OutType {
6077 Emit,
6078 Cut,
6079 EmitThenCut,
6080 }
6081
6082 impl fmt::Display for OutType {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6083 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6084 match self {
6085 OutType::Emit => write!(f, "emit"),
6086 OutType::Cut => write!(f, "cut"),
6087 OutType::EmitThenCut => write!(f, "emit_then_cut"),
6088 }
6089 }
6090 }
6091
6092 #[repr(C)]
6093 #[derive(SrcsAsSlice, DstsAsSlice)]
6094 pub struct OpOut {
6095 pub dst: Dst,
6096
6097 #[src_type(SSA)]
6098 pub handle: Src,
6099
6100 #[src_type(ALU)]
6101 pub stream: Src,
6102
6103 pub out_type: OutType,
6104 }
6105
6106 impl DisplayOp for OpOut {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6107 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6108 write!(f, "out.{} {} {}", self.out_type, self.handle, self.stream)
6109 }
6110 }
6111 impl_display_for_op!(OpOut);
6112
6113 #[repr(C)]
6114 #[derive(SrcsAsSlice, DstsAsSlice)]
6115 pub struct OpOutFinal {
6116 #[src_type(SSA)]
6117 pub handle: Src,
6118 }
6119
6120 impl DisplayOp for OpOutFinal {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6121 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6122 write!(f, "out.final {{ {} }}", self.handle)
6123 }
6124 }
6125 impl_display_for_op!(OpOutFinal);
6126
6127 /// Describes an annotation on an instruction.
6128 #[repr(C)]
6129 #[derive(SrcsAsSlice, DstsAsSlice)]
6130 pub struct OpAnnotate {
6131 /// The annotation
6132 pub annotation: String,
6133 }
6134
6135 impl DisplayOp for OpAnnotate {
fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6136 fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6137 write!(f, "// {}", self.annotation)
6138 }
6139 }
6140
6141 impl fmt::Display for OpAnnotate {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6142 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6143 self.fmt_op(f)
6144 }
6145 }
6146
6147 #[derive(DisplayOp, DstsAsSlice, SrcsAsSlice, FromVariants)]
6148 pub enum Op {
6149 FAdd(OpFAdd),
6150 FFma(OpFFma),
6151 FMnMx(OpFMnMx),
6152 FMul(OpFMul),
6153 Rro(OpRro),
6154 MuFu(OpMuFu),
6155 FSet(OpFSet),
6156 FSetP(OpFSetP),
6157 FSwzAdd(OpFSwzAdd),
6158 DAdd(OpDAdd),
6159 DFma(OpDFma),
6160 DMnMx(OpDMnMx),
6161 DMul(OpDMul),
6162 DSetP(OpDSetP),
6163 HAdd2(OpHAdd2),
6164 HFma2(OpHFma2),
6165 HMul2(OpHMul2),
6166 HSet2(OpHSet2),
6167 HSetP2(OpHSetP2),
6168 HMnMx2(OpHMnMx2),
6169 BMsk(OpBMsk),
6170 BRev(OpBRev),
6171 Bfe(OpBfe),
6172 Flo(OpFlo),
6173 IAbs(OpIAbs),
6174 IAdd2(OpIAdd2),
6175 IAdd2X(OpIAdd2X),
6176 IAdd3(OpIAdd3),
6177 IAdd3X(OpIAdd3X),
6178 IDp4(OpIDp4),
6179 IMad(OpIMad),
6180 IMad64(OpIMad64),
6181 IMul(OpIMul),
6182 IMnMx(OpIMnMx),
6183 ISetP(OpISetP),
6184 Lop2(OpLop2),
6185 Lop3(OpLop3),
6186 PopC(OpPopC),
6187 Shf(OpShf),
6188 Shl(OpShl),
6189 Shr(OpShr),
6190 F2F(OpF2F),
6191 F2FP(OpF2FP),
6192 F2I(OpF2I),
6193 I2F(OpI2F),
6194 I2I(OpI2I),
6195 FRnd(OpFRnd),
6196 Mov(OpMov),
6197 Prmt(OpPrmt),
6198 Sel(OpSel),
6199 Shfl(OpShfl),
6200 PLop3(OpPLop3),
6201 PSetP(OpPSetP),
6202 R2UR(OpR2UR),
6203 Tex(OpTex),
6204 Tld(OpTld),
6205 Tld4(OpTld4),
6206 Tmml(OpTmml),
6207 Txd(OpTxd),
6208 Txq(OpTxq),
6209 SuLd(OpSuLd),
6210 SuSt(OpSuSt),
6211 SuAtom(OpSuAtom),
6212 Ld(OpLd),
6213 Ldc(OpLdc),
6214 St(OpSt),
6215 Atom(OpAtom),
6216 AL2P(OpAL2P),
6217 ALd(OpALd),
6218 ASt(OpASt),
6219 Ipa(OpIpa),
6220 LdTram(OpLdTram),
6221 CCtl(OpCCtl),
6222 MemBar(OpMemBar),
6223 BClear(OpBClear),
6224 BMov(OpBMov),
6225 Break(OpBreak),
6226 BSSy(OpBSSy),
6227 BSync(OpBSync),
6228 Bra(OpBra),
6229 SSy(OpSSy),
6230 Sync(OpSync),
6231 Brk(OpBrk),
6232 PBk(OpPBk),
6233 Cont(OpCont),
6234 PCnt(OpPCnt),
6235 Exit(OpExit),
6236 WarpSync(OpWarpSync),
6237 Bar(OpBar),
6238 CS2R(OpCS2R),
6239 Isberd(OpIsberd),
6240 Kill(OpKill),
6241 Nop(OpNop),
6242 PixLd(OpPixLd),
6243 S2R(OpS2R),
6244 Vote(OpVote),
6245 Undef(OpUndef),
6246 SrcBar(OpSrcBar),
6247 PhiSrcs(OpPhiSrcs),
6248 PhiDsts(OpPhiDsts),
6249 Copy(OpCopy),
6250 Pin(OpPin),
6251 Unpin(OpUnpin),
6252 Swap(OpSwap),
6253 ParCopy(OpParCopy),
6254 RegOut(OpRegOut),
6255 Out(OpOut),
6256 OutFinal(OpOutFinal),
6257 Annotate(OpAnnotate),
6258 }
6259 impl_display_for_op!(Op);
6260
6261 impl Op {
is_branch(&self) -> bool6262 pub fn is_branch(&self) -> bool {
6263 match self {
6264 Op::Bra(_)
6265 | Op::Sync(_)
6266 | Op::Brk(_)
6267 | Op::Cont(_)
6268 | Op::Exit(_) => true,
6269 _ => false,
6270 }
6271 }
6272 }
6273
6274 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
6275 pub enum PredRef {
6276 None,
6277 SSA(SSAValue),
6278 Reg(RegRef),
6279 }
6280
6281 impl PredRef {
6282 #[allow(dead_code)]
as_reg(&self) -> Option<&RegRef>6283 pub fn as_reg(&self) -> Option<&RegRef> {
6284 match self {
6285 PredRef::Reg(r) => Some(r),
6286 _ => None,
6287 }
6288 }
6289
6290 #[allow(dead_code)]
as_ssa(&self) -> Option<&SSAValue>6291 pub fn as_ssa(&self) -> Option<&SSAValue> {
6292 match self {
6293 PredRef::SSA(r) => Some(r),
6294 _ => None,
6295 }
6296 }
6297
is_none(&self) -> bool6298 pub fn is_none(&self) -> bool {
6299 matches!(self, PredRef::None)
6300 }
6301
iter_ssa(&self) -> slice::Iter<'_, SSAValue>6302 pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
6303 match self {
6304 PredRef::None | PredRef::Reg(_) => &[],
6305 PredRef::SSA(ssa) => slice::from_ref(ssa),
6306 }
6307 .iter()
6308 }
6309
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>6310 pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
6311 match self {
6312 PredRef::None | PredRef::Reg(_) => &mut [],
6313 PredRef::SSA(ssa) => slice::from_mut(ssa),
6314 }
6315 .iter_mut()
6316 }
6317 }
6318
6319 impl From<RegRef> for PredRef {
from(reg: RegRef) -> PredRef6320 fn from(reg: RegRef) -> PredRef {
6321 PredRef::Reg(reg)
6322 }
6323 }
6324
6325 impl From<SSAValue> for PredRef {
from(ssa: SSAValue) -> PredRef6326 fn from(ssa: SSAValue) -> PredRef {
6327 PredRef::SSA(ssa)
6328 }
6329 }
6330
6331 impl fmt::Display for PredRef {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6332 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6333 match self {
6334 PredRef::None => write!(f, "pT"),
6335 PredRef::SSA(ssa) => ssa.fmt_plain(f),
6336 PredRef::Reg(reg) => reg.fmt(f),
6337 }
6338 }
6339 }
6340
6341 #[derive(Clone, Copy)]
6342 pub struct Pred {
6343 pub pred_ref: PredRef,
6344 pub pred_inv: bool,
6345 }
6346
6347 impl Pred {
is_true(&self) -> bool6348 pub fn is_true(&self) -> bool {
6349 self.pred_ref.is_none() && !self.pred_inv
6350 }
6351
is_false(&self) -> bool6352 pub fn is_false(&self) -> bool {
6353 self.pred_ref.is_none() && self.pred_inv
6354 }
6355
iter_ssa(&self) -> slice::Iter<'_, SSAValue>6356 pub fn iter_ssa(&self) -> slice::Iter<'_, SSAValue> {
6357 self.pred_ref.iter_ssa()
6358 }
6359
iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue>6360 pub fn iter_ssa_mut(&mut self) -> slice::IterMut<'_, SSAValue> {
6361 self.pred_ref.iter_ssa_mut()
6362 }
6363
bnot(self) -> Self6364 pub fn bnot(self) -> Self {
6365 Pred {
6366 pred_ref: self.pred_ref,
6367 pred_inv: !self.pred_inv,
6368 }
6369 }
6370 }
6371
6372 impl From<bool> for Pred {
from(b: bool) -> Self6373 fn from(b: bool) -> Self {
6374 Pred {
6375 pred_ref: PredRef::None,
6376 pred_inv: !b,
6377 }
6378 }
6379 }
6380
6381 impl<T: Into<PredRef>> From<T> for Pred {
from(p: T) -> Self6382 fn from(p: T) -> Self {
6383 Pred {
6384 pred_ref: p.into(),
6385 pred_inv: false,
6386 }
6387 }
6388 }
6389
6390 impl fmt::Display for Pred {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6391 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6392 if self.pred_inv {
6393 write!(f, "!")?;
6394 }
6395 self.pred_ref.fmt(f)
6396 }
6397 }
6398
6399 pub const MIN_INSTR_DELAY: u8 = 1;
6400 pub const MAX_INSTR_DELAY: u8 = 15;
6401
6402 pub struct InstrDeps {
6403 pub delay: u8,
6404 pub yld: bool,
6405 wr_bar: i8,
6406 rd_bar: i8,
6407 pub wt_bar_mask: u8,
6408 pub reuse_mask: u8,
6409 }
6410
6411 impl InstrDeps {
new() -> InstrDeps6412 pub fn new() -> InstrDeps {
6413 InstrDeps {
6414 delay: 0,
6415 yld: false,
6416 wr_bar: -1,
6417 rd_bar: -1,
6418 wt_bar_mask: 0,
6419 reuse_mask: 0,
6420 }
6421 }
6422
rd_bar(&self) -> Option<u8>6423 pub fn rd_bar(&self) -> Option<u8> {
6424 if self.rd_bar < 0 {
6425 None
6426 } else {
6427 Some(self.rd_bar.try_into().unwrap())
6428 }
6429 }
6430
wr_bar(&self) -> Option<u8>6431 pub fn wr_bar(&self) -> Option<u8> {
6432 if self.wr_bar < 0 {
6433 None
6434 } else {
6435 Some(self.wr_bar.try_into().unwrap())
6436 }
6437 }
6438
set_delay(&mut self, delay: u8)6439 pub fn set_delay(&mut self, delay: u8) {
6440 assert!(delay <= MAX_INSTR_DELAY);
6441 self.delay = delay;
6442 }
6443
set_yield(&mut self, yld: bool)6444 pub fn set_yield(&mut self, yld: bool) {
6445 self.yld = yld;
6446 }
6447
set_rd_bar(&mut self, idx: u8)6448 pub fn set_rd_bar(&mut self, idx: u8) {
6449 assert!(idx < 6);
6450 self.rd_bar = idx.try_into().unwrap();
6451 }
6452
set_wr_bar(&mut self, idx: u8)6453 pub fn set_wr_bar(&mut self, idx: u8) {
6454 assert!(idx < 6);
6455 self.wr_bar = idx.try_into().unwrap();
6456 }
6457
add_wt_bar(&mut self, idx: u8)6458 pub fn add_wt_bar(&mut self, idx: u8) {
6459 self.add_wt_bar_mask(1 << idx);
6460 }
6461
add_wt_bar_mask(&mut self, bar_mask: u8)6462 pub fn add_wt_bar_mask(&mut self, bar_mask: u8) {
6463 assert!(bar_mask < 1 << 6);
6464 self.wt_bar_mask |= bar_mask;
6465 }
6466
6467 #[allow(dead_code)]
add_reuse(&mut self, idx: u8)6468 pub fn add_reuse(&mut self, idx: u8) {
6469 assert!(idx < 6);
6470 self.reuse_mask |= 1_u8 << idx;
6471 }
6472 }
6473
6474 impl fmt::Display for InstrDeps {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6475 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6476 if self.delay > 0 {
6477 write!(f, " delay={}", self.delay)?;
6478 }
6479 if self.wt_bar_mask != 0 {
6480 write!(f, " wt={:06b}", self.wt_bar_mask)?;
6481 }
6482 if self.rd_bar >= 0 {
6483 write!(f, " rd:{}", self.rd_bar)?;
6484 }
6485 if self.wr_bar >= 0 {
6486 write!(f, " wr:{}", self.wr_bar)?;
6487 }
6488 if self.reuse_mask != 0 {
6489 write!(f, " reuse={:06b}", self.reuse_mask)?;
6490 }
6491 if self.yld {
6492 write!(f, " yld")?;
6493 }
6494 Ok(())
6495 }
6496 }
6497
6498 pub struct Instr {
6499 pub pred: Pred,
6500 pub op: Op,
6501 pub deps: InstrDeps,
6502 }
6503
6504 impl Instr {
new(op: impl Into<Op>) -> Instr6505 pub fn new(op: impl Into<Op>) -> Instr {
6506 Instr {
6507 op: op.into(),
6508 pred: true.into(),
6509 deps: InstrDeps::new(),
6510 }
6511 }
6512
new_boxed(op: impl Into<Op>) -> Box<Self>6513 pub fn new_boxed(op: impl Into<Op>) -> Box<Self> {
6514 Box::new(Instr::new(op))
6515 }
6516
dsts(&self) -> &[Dst]6517 pub fn dsts(&self) -> &[Dst] {
6518 self.op.dsts_as_slice()
6519 }
6520
dsts_mut(&mut self) -> &mut [Dst]6521 pub fn dsts_mut(&mut self) -> &mut [Dst] {
6522 self.op.dsts_as_mut_slice()
6523 }
6524
srcs(&self) -> &[Src]6525 pub fn srcs(&self) -> &[Src] {
6526 self.op.srcs_as_slice()
6527 }
6528
srcs_mut(&mut self) -> &mut [Src]6529 pub fn srcs_mut(&mut self) -> &mut [Src] {
6530 self.op.srcs_as_mut_slice()
6531 }
6532
src_types(&self) -> SrcTypeList6533 pub fn src_types(&self) -> SrcTypeList {
6534 self.op.src_types()
6535 }
6536
for_each_ssa_use(&self, mut f: impl FnMut(&SSAValue))6537 pub fn for_each_ssa_use(&self, mut f: impl FnMut(&SSAValue)) {
6538 for ssa in self.pred.iter_ssa() {
6539 f(ssa);
6540 }
6541 for src in self.srcs() {
6542 for ssa in src.iter_ssa() {
6543 f(ssa);
6544 }
6545 }
6546 }
6547
for_each_ssa_use_mut(&mut self, mut f: impl FnMut(&mut SSAValue))6548 pub fn for_each_ssa_use_mut(&mut self, mut f: impl FnMut(&mut SSAValue)) {
6549 for ssa in self.pred.iter_ssa_mut() {
6550 f(ssa);
6551 }
6552 for src in self.srcs_mut() {
6553 for ssa in src.iter_ssa_mut() {
6554 f(ssa);
6555 }
6556 }
6557 }
6558
for_each_ssa_def(&self, mut f: impl FnMut(&SSAValue))6559 pub fn for_each_ssa_def(&self, mut f: impl FnMut(&SSAValue)) {
6560 for dst in self.dsts() {
6561 for ssa in dst.iter_ssa() {
6562 f(ssa);
6563 }
6564 }
6565 }
6566
for_each_ssa_def_mut(&mut self, mut f: impl FnMut(&mut SSAValue))6567 pub fn for_each_ssa_def_mut(&mut self, mut f: impl FnMut(&mut SSAValue)) {
6568 for dst in self.dsts_mut() {
6569 for ssa in dst.iter_ssa_mut() {
6570 f(ssa);
6571 }
6572 }
6573 }
6574
is_branch(&self) -> bool6575 pub fn is_branch(&self) -> bool {
6576 self.op.is_branch()
6577 }
6578
uses_global_mem(&self) -> bool6579 pub fn uses_global_mem(&self) -> bool {
6580 match &self.op {
6581 Op::Atom(op) => op.mem_space != MemSpace::Local,
6582 Op::Ld(op) => op.access.space != MemSpace::Local,
6583 Op::St(op) => op.access.space != MemSpace::Local,
6584 Op::SuAtom(_) | Op::SuLd(_) | Op::SuSt(_) => true,
6585 _ => false,
6586 }
6587 }
6588
writes_global_mem(&self) -> bool6589 pub fn writes_global_mem(&self) -> bool {
6590 match &self.op {
6591 Op::Atom(op) => matches!(op.mem_space, MemSpace::Global(_)),
6592 Op::St(op) => matches!(op.access.space, MemSpace::Global(_)),
6593 Op::SuAtom(_) | Op::SuSt(_) => true,
6594 _ => false,
6595 }
6596 }
6597
can_eliminate(&self) -> bool6598 pub fn can_eliminate(&self) -> bool {
6599 match &self.op {
6600 Op::ASt(_)
6601 | Op::SuSt(_)
6602 | Op::SuAtom(_)
6603 | Op::St(_)
6604 | Op::Atom(_)
6605 | Op::CCtl(_)
6606 | Op::MemBar(_)
6607 | Op::Kill(_)
6608 | Op::Nop(_)
6609 | Op::BSync(_)
6610 | Op::Bra(_)
6611 | Op::SSy(_)
6612 | Op::Sync(_)
6613 | Op::Brk(_)
6614 | Op::PBk(_)
6615 | Op::Cont(_)
6616 | Op::PCnt(_)
6617 | Op::Exit(_)
6618 | Op::WarpSync(_)
6619 | Op::Bar(_)
6620 | Op::RegOut(_)
6621 | Op::Out(_)
6622 | Op::OutFinal(_)
6623 | Op::Annotate(_) => false,
6624 Op::BMov(op) => !op.clear,
6625 _ => true,
6626 }
6627 }
6628
is_uniform(&self) -> bool6629 pub fn is_uniform(&self) -> bool {
6630 match &self.op {
6631 Op::PhiDsts(_) => false,
6632 op => op.is_uniform(),
6633 }
6634 }
6635
has_fixed_latency(&self, sm: u8) -> bool6636 pub fn has_fixed_latency(&self, sm: u8) -> bool {
6637 match &self.op {
6638 // Float ALU
6639 Op::F2FP(_)
6640 | Op::FAdd(_)
6641 | Op::FFma(_)
6642 | Op::FMnMx(_)
6643 | Op::FMul(_)
6644 | Op::FSet(_)
6645 | Op::FSetP(_)
6646 | Op::HAdd2(_)
6647 | Op::HFma2(_)
6648 | Op::HMul2(_)
6649 | Op::HSet2(_)
6650 | Op::HSetP2(_)
6651 | Op::HMnMx2(_)
6652 | Op::FSwzAdd(_) => true,
6653
6654 // Multi-function unit is variable latency
6655 Op::Rro(_) | Op::MuFu(_) => false,
6656
6657 // Double-precision float ALU
6658 Op::DAdd(_)
6659 | Op::DFma(_)
6660 | Op::DMnMx(_)
6661 | Op::DMul(_)
6662 | Op::DSetP(_) => false,
6663
6664 // Integer ALU
6665 Op::BRev(_) | Op::Flo(_) | Op::PopC(_) => false,
6666 Op::IMad(_) | Op::IMul(_) => sm >= 70,
6667 Op::BMsk(_)
6668 | Op::IAbs(_)
6669 | Op::IAdd2(_)
6670 | Op::IAdd2X(_)
6671 | Op::IAdd3(_)
6672 | Op::IAdd3X(_)
6673 | Op::IDp4(_)
6674 | Op::IMad64(_)
6675 | Op::IMnMx(_)
6676 | Op::ISetP(_)
6677 | Op::Lop2(_)
6678 | Op::Lop3(_)
6679 | Op::Shf(_)
6680 | Op::Shl(_)
6681 | Op::Shr(_)
6682 | Op::Bfe(_) => true,
6683
6684 // Conversions are variable latency?!?
6685 Op::F2F(_) | Op::F2I(_) | Op::I2F(_) | Op::I2I(_) | Op::FRnd(_) => {
6686 false
6687 }
6688
6689 // Move ops
6690 Op::Mov(_) | Op::Prmt(_) | Op::Sel(_) => true,
6691 Op::Shfl(_) => false,
6692
6693 // Predicate ops
6694 Op::PLop3(_) | Op::PSetP(_) => true,
6695
6696 // Uniform ops
6697 Op::R2UR(_) => false,
6698
6699 // Texture ops
6700 Op::Tex(_)
6701 | Op::Tld(_)
6702 | Op::Tld4(_)
6703 | Op::Tmml(_)
6704 | Op::Txd(_)
6705 | Op::Txq(_) => false,
6706
6707 // Surface ops
6708 Op::SuLd(_) | Op::SuSt(_) | Op::SuAtom(_) => false,
6709
6710 // Memory ops
6711 Op::Ld(_)
6712 | Op::Ldc(_)
6713 | Op::St(_)
6714 | Op::Atom(_)
6715 | Op::AL2P(_)
6716 | Op::ALd(_)
6717 | Op::ASt(_)
6718 | Op::Ipa(_)
6719 | Op::CCtl(_)
6720 | Op::LdTram(_)
6721 | Op::MemBar(_) => false,
6722
6723 // Control-flow ops
6724 Op::BClear(_) | Op::Break(_) | Op::BSSy(_) | Op::BSync(_) => true,
6725 Op::SSy(_)
6726 | Op::Sync(_)
6727 | Op::Brk(_)
6728 | Op::PBk(_)
6729 | Op::Cont(_)
6730 | Op::PCnt(_) => true,
6731 Op::Bra(_) | Op::Exit(_) => true,
6732 Op::WarpSync(_) => false,
6733
6734 // The barrier half is HW scoreboarded by the GPR isn't. When
6735 // moving from a GPR to a barrier, we still need a token for WaR
6736 // hazards.
6737 Op::BMov(_) => false,
6738
6739 // Geometry ops
6740 Op::Out(_) | Op::OutFinal(_) => false,
6741
6742 // Miscellaneous ops
6743 Op::Bar(_)
6744 | Op::CS2R(_)
6745 | Op::Isberd(_)
6746 | Op::Kill(_)
6747 | Op::PixLd(_)
6748 | Op::S2R(_) => false,
6749 Op::Nop(_) | Op::Vote(_) => true,
6750
6751 // Virtual ops
6752 Op::Undef(_)
6753 | Op::SrcBar(_)
6754 | Op::PhiSrcs(_)
6755 | Op::PhiDsts(_)
6756 | Op::Copy(_)
6757 | Op::Pin(_)
6758 | Op::Unpin(_)
6759 | Op::Swap(_)
6760 | Op::ParCopy(_)
6761 | Op::RegOut(_)
6762 | Op::Annotate(_) => {
6763 panic!("Not a hardware opcode")
6764 }
6765 }
6766 }
6767
needs_yield(&self) -> bool6768 pub fn needs_yield(&self) -> bool {
6769 matches!(&self.op, Op::Bar(_) | Op::BSync(_))
6770 }
6771
fmt_pred(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6772 fn fmt_pred(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6773 if !self.pred.is_true() {
6774 write!(f, "@{} ", self.pred)?;
6775 }
6776 Ok(())
6777 }
6778 }
6779
6780 impl fmt::Display for Instr {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6781 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6782 write!(f, "{} {}{}", Fmt(|f| self.fmt_pred(f)), self.op, self.deps)
6783 }
6784 }
6785
6786 impl<T: Into<Op>> From<T> for Instr {
from(value: T) -> Self6787 fn from(value: T) -> Self {
6788 Self::new(value)
6789 }
6790 }
6791
6792 pub type MappedInstrs = SmallVec<Box<Instr>>;
6793
6794 pub struct BasicBlock {
6795 pub label: Label,
6796
6797 /// Whether or not this block is uniform
6798 ///
6799 /// If true, then all non-exited lanes in a warp which execute this block
6800 /// are guaranteed to execute it together
6801 pub uniform: bool,
6802
6803 pub instrs: Vec<Box<Instr>>,
6804 }
6805
6806 impl BasicBlock {
map_instrs( &mut self, mut map: impl FnMut(Box<Instr>) -> MappedInstrs, )6807 pub fn map_instrs(
6808 &mut self,
6809 mut map: impl FnMut(Box<Instr>) -> MappedInstrs,
6810 ) {
6811 let mut instrs = Vec::new();
6812 for i in self.instrs.drain(..) {
6813 match map(i) {
6814 MappedInstrs::None => (),
6815 MappedInstrs::One(i) => {
6816 instrs.push(i);
6817 }
6818 MappedInstrs::Many(mut v) => {
6819 instrs.append(&mut v);
6820 }
6821 }
6822 }
6823 self.instrs = instrs;
6824 }
6825
phi_dsts_ip(&self) -> Option<usize>6826 pub fn phi_dsts_ip(&self) -> Option<usize> {
6827 for (ip, instr) in self.instrs.iter().enumerate() {
6828 match &instr.op {
6829 Op::Annotate(_) => (),
6830 Op::PhiDsts(_) => return Some(ip),
6831 _ => break,
6832 }
6833 }
6834 None
6835 }
6836
phi_dsts(&self) -> Option<&OpPhiDsts>6837 pub fn phi_dsts(&self) -> Option<&OpPhiDsts> {
6838 self.phi_dsts_ip().map(|ip| match &self.instrs[ip].op {
6839 Op::PhiDsts(phi) => phi,
6840 _ => panic!("Expected to find the phi"),
6841 })
6842 }
6843
6844 #[allow(dead_code)]
phi_dsts_mut(&mut self) -> Option<&mut OpPhiDsts>6845 pub fn phi_dsts_mut(&mut self) -> Option<&mut OpPhiDsts> {
6846 self.phi_dsts_ip().map(|ip| match &mut self.instrs[ip].op {
6847 Op::PhiDsts(phi) => phi,
6848 _ => panic!("Expected to find the phi"),
6849 })
6850 }
6851
phi_srcs_ip(&self) -> Option<usize>6852 pub fn phi_srcs_ip(&self) -> Option<usize> {
6853 for (ip, instr) in self.instrs.iter().enumerate().rev() {
6854 match &instr.op {
6855 Op::Annotate(_) => (),
6856 Op::PhiSrcs(_) => return Some(ip),
6857 _ if instr.is_branch() => (),
6858 _ => break,
6859 }
6860 }
6861 None
6862 }
phi_srcs(&self) -> Option<&OpPhiSrcs>6863 pub fn phi_srcs(&self) -> Option<&OpPhiSrcs> {
6864 self.phi_srcs_ip().map(|ip| match &self.instrs[ip].op {
6865 Op::PhiSrcs(phi) => phi,
6866 _ => panic!("Expected to find the phi"),
6867 })
6868 }
6869
phi_srcs_mut(&mut self) -> Option<&mut OpPhiSrcs>6870 pub fn phi_srcs_mut(&mut self) -> Option<&mut OpPhiSrcs> {
6871 self.phi_srcs_ip().map(|ip| match &mut self.instrs[ip].op {
6872 Op::PhiSrcs(phi) => phi,
6873 _ => panic!("Expected to find the phi"),
6874 })
6875 }
6876
branch(&self) -> Option<&Instr>6877 pub fn branch(&self) -> Option<&Instr> {
6878 if let Some(i) = self.instrs.last() {
6879 if i.is_branch() {
6880 Some(i)
6881 } else {
6882 None
6883 }
6884 } else {
6885 None
6886 }
6887 }
6888
branch_ip(&self) -> Option<usize>6889 pub fn branch_ip(&self) -> Option<usize> {
6890 if let Some(i) = self.instrs.last() {
6891 if i.is_branch() {
6892 Some(self.instrs.len() - 1)
6893 } else {
6894 None
6895 }
6896 } else {
6897 None
6898 }
6899 }
6900
6901 #[allow(dead_code)]
branch_mut(&mut self) -> Option<&mut Instr>6902 pub fn branch_mut(&mut self) -> Option<&mut Instr> {
6903 if let Some(i) = self.instrs.last_mut() {
6904 if i.is_branch() {
6905 Some(i)
6906 } else {
6907 None
6908 }
6909 } else {
6910 None
6911 }
6912 }
6913
falls_through(&self) -> bool6914 pub fn falls_through(&self) -> bool {
6915 if let Some(i) = self.branch() {
6916 !i.pred.is_true()
6917 } else {
6918 true
6919 }
6920 }
6921 }
6922
6923 pub struct Function {
6924 pub ssa_alloc: SSAValueAllocator,
6925 pub phi_alloc: PhiAllocator,
6926 pub blocks: CFG<BasicBlock>,
6927 }
6928
6929 impl Function {
map_instrs( &mut self, mut map: impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs, )6930 pub fn map_instrs(
6931 &mut self,
6932 mut map: impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs,
6933 ) {
6934 let alloc = &mut self.ssa_alloc;
6935 for b in &mut self.blocks {
6936 b.map_instrs(|i| map(i, alloc));
6937 }
6938 }
6939 }
6940
6941 impl fmt::Display for Function {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result6942 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6943 let mut pred_width = 0;
6944 let mut dsts_width = 0;
6945 let mut op_width = 0;
6946
6947 let mut blocks = Vec::new();
6948 for b in &self.blocks {
6949 let mut instrs = Vec::new();
6950 for i in &b.instrs {
6951 let mut pred = String::new();
6952 write!(pred, "{}", Fmt(|f| i.fmt_pred(f)))?;
6953 let mut dsts = String::new();
6954 write!(dsts, "{}", Fmt(|f| i.op.fmt_dsts(f)))?;
6955 let mut op = String::new();
6956 write!(op, "{}", Fmt(|f| i.op.fmt_op(f)))?;
6957 let mut deps = String::new();
6958 write!(deps, "{}", i.deps)?;
6959
6960 pred_width = max(pred_width, pred.len());
6961 dsts_width = max(dsts_width, dsts.len());
6962 op_width = max(op_width, op.len());
6963 let is_annotation = matches!(i.op, Op::Annotate(_));
6964
6965 instrs.push((pred, dsts, op, deps, is_annotation));
6966 }
6967 blocks.push(instrs);
6968 }
6969
6970 for (i, mut b) in blocks.drain(..).enumerate() {
6971 let u = if self.blocks[i].uniform { ".u" } else { "" };
6972 write!(f, "block{u} {} {} [", i, self.blocks[i].label)?;
6973 for (pi, p) in self.blocks.pred_indices(i).iter().enumerate() {
6974 if pi > 0 {
6975 write!(f, ", ")?;
6976 }
6977 write!(f, "{}", p)?;
6978 }
6979 write!(f, "] -> {{\n")?;
6980
6981 for (pred, dsts, op, deps, is_annotation) in b.drain(..) {
6982 let eq_sym = if dsts.is_empty() { " " } else { "=" };
6983 if is_annotation {
6984 write!(f, "\n{}\n", op)?;
6985 } else if deps.is_empty() {
6986 write!(
6987 f,
6988 "{:<pred_width$} {:<dsts_width$} {} {}\n",
6989 pred, dsts, eq_sym, op,
6990 )?;
6991 } else {
6992 write!(
6993 f,
6994 "{:<pred_width$} {:<dsts_width$} {} \
6995 {:<op_width$} //{}\n",
6996 pred, dsts, eq_sym, op, deps,
6997 )?;
6998 }
6999 }
7000
7001 write!(f, "}} -> [")?;
7002 for (si, s) in self.blocks.succ_indices(i).iter().enumerate() {
7003 if si > 0 {
7004 write!(f, ", ")?;
7005 }
7006 write!(f, "{}", s)?;
7007 }
7008 write!(f, "]\n")?;
7009 }
7010 Ok(())
7011 }
7012 }
7013
7014 #[derive(Debug)]
7015 pub struct ComputeShaderInfo {
7016 pub local_size: [u16; 3],
7017 pub smem_size: u16,
7018 }
7019
7020 #[derive(Debug)]
7021 pub struct FragmentShaderInfo {
7022 pub uses_kill: bool,
7023 pub does_interlock: bool,
7024 pub post_depth_coverage: bool,
7025 pub early_fragment_tests: bool,
7026 pub uses_sample_shading: bool,
7027 }
7028
7029 #[derive(Debug)]
7030 pub struct GeometryShaderInfo {
7031 pub passthrough_enable: bool,
7032 pub stream_out_mask: u8,
7033 pub threads_per_input_primitive: u8,
7034 pub output_topology: OutputTopology,
7035 pub max_output_vertex_count: u16,
7036 }
7037
7038 impl Default for GeometryShaderInfo {
default() -> Self7039 fn default() -> Self {
7040 Self {
7041 passthrough_enable: false,
7042 stream_out_mask: 0,
7043 threads_per_input_primitive: 0,
7044 output_topology: OutputTopology::LineStrip,
7045 max_output_vertex_count: 0,
7046 }
7047 }
7048 }
7049
7050 #[derive(Debug)]
7051 pub struct TessellationInitShaderInfo {
7052 pub per_patch_attribute_count: u8,
7053 pub threads_per_patch: u8,
7054 }
7055
7056 #[repr(u8)]
7057 #[derive(Clone, Copy, Debug)]
7058 pub enum TessellationDomain {
7059 Isoline = NAK_TS_DOMAIN_ISOLINE,
7060 Triangle = NAK_TS_DOMAIN_TRIANGLE,
7061 Quad = NAK_TS_DOMAIN_QUAD,
7062 }
7063
7064 #[repr(u8)]
7065 #[derive(Clone, Copy, Debug)]
7066 pub enum TessellationSpacing {
7067 Integer = NAK_TS_SPACING_INTEGER,
7068 FractionalOdd = NAK_TS_SPACING_FRACT_ODD,
7069 FractionalEven = NAK_TS_SPACING_FRACT_EVEN,
7070 }
7071
7072 #[repr(u8)]
7073 #[derive(Clone, Copy, Debug)]
7074 pub enum TessellationPrimitives {
7075 Points = NAK_TS_PRIMS_POINTS,
7076 Lines = NAK_TS_PRIMS_LINES,
7077 TrianglesCW = NAK_TS_PRIMS_TRIANGLES_CW,
7078 TrianglesCCW = NAK_TS_PRIMS_TRIANGLES_CCW,
7079 }
7080
7081 #[derive(Debug)]
7082 pub struct TessellationShaderInfo {
7083 pub domain: TessellationDomain,
7084 pub spacing: TessellationSpacing,
7085 pub primitives: TessellationPrimitives,
7086 }
7087
7088 #[derive(Debug)]
7089 pub enum ShaderStageInfo {
7090 Compute(ComputeShaderInfo),
7091 Vertex,
7092 Fragment(FragmentShaderInfo),
7093 Geometry(GeometryShaderInfo),
7094 TessellationInit(TessellationInitShaderInfo),
7095 Tessellation(TessellationShaderInfo),
7096 }
7097
7098 #[derive(Debug, Default)]
7099 pub struct SysValInfo {
7100 pub ab: u32,
7101 pub c: u16,
7102 }
7103
7104 #[derive(Debug)]
7105 pub struct VtgIoInfo {
7106 pub sysvals_in: SysValInfo,
7107 pub sysvals_in_d: u8,
7108 pub sysvals_out: SysValInfo,
7109 pub sysvals_out_d: u8,
7110 pub attr_in: [u32; 4],
7111 pub attr_out: [u32; 4],
7112 pub store_req_start: u8,
7113 pub store_req_end: u8,
7114 pub clip_enable: u8,
7115 pub cull_enable: u8,
7116 pub xfb: Option<Box<nak_xfb_info>>,
7117 }
7118
7119 impl VtgIoInfo {
mark_attrs(&mut self, addrs: Range<u16>, written: bool)7120 fn mark_attrs(&mut self, addrs: Range<u16>, written: bool) {
7121 let sysvals = if written {
7122 &mut self.sysvals_out
7123 } else {
7124 &mut self.sysvals_in
7125 };
7126
7127 let sysvals_d = if written {
7128 &mut self.sysvals_out_d
7129 } else {
7130 &mut self.sysvals_in_d
7131 };
7132
7133 let mut attr = BitMutView::new(if written {
7134 &mut self.attr_out
7135 } else {
7136 &mut self.attr_in
7137 });
7138
7139 let mut addrs = addrs;
7140 addrs.start &= !3;
7141 for addr in addrs.step_by(4) {
7142 if addr < 0x080 {
7143 sysvals.ab |= 1 << (addr / 4);
7144 } else if addr < 0x280 {
7145 let attr_idx = (addr - 0x080) as usize / 4;
7146 attr.set_bit(attr_idx, true);
7147 } else if addr < 0x2c0 {
7148 panic!("FF color I/O not supported");
7149 } else if addr < 0x300 {
7150 sysvals.c |= 1 << ((addr - 0x2c0) / 4);
7151 } else if addr >= 0x3a0 && addr < 0x3c0 {
7152 *sysvals_d |= 1 << ((addr - 0x3a0) / 4);
7153 }
7154 }
7155 }
7156
mark_attrs_read(&mut self, addrs: Range<u16>)7157 pub fn mark_attrs_read(&mut self, addrs: Range<u16>) {
7158 self.mark_attrs(addrs, false);
7159 }
7160
mark_attrs_written(&mut self, addrs: Range<u16>)7161 pub fn mark_attrs_written(&mut self, addrs: Range<u16>) {
7162 self.mark_attrs(addrs, true);
7163 }
7164
attr_written(&self, addr: u16) -> bool7165 pub fn attr_written(&self, addr: u16) -> bool {
7166 if addr < 0x080 {
7167 self.sysvals_out.ab & (1 << (addr / 4)) != 0
7168 } else if addr < 0x280 {
7169 let attr_idx = (addr - 0x080) as usize / 4;
7170 BitView::new(&self.attr_out).get_bit(attr_idx)
7171 } else if addr < 0x2c0 {
7172 panic!("FF color I/O not supported");
7173 } else if addr < 0x300 {
7174 self.sysvals_out.c & (1 << ((addr - 0x2c0) / 4)) != 0
7175 } else if addr >= 0x3a0 && addr < 0x3c0 {
7176 self.sysvals_out_d & (1 << ((addr - 0x3a0) / 4)) != 0
7177 } else {
7178 panic!("Unknown I/O address");
7179 }
7180 }
7181
mark_store_req(&mut self, addrs: Range<u16>)7182 pub fn mark_store_req(&mut self, addrs: Range<u16>) {
7183 let start = (addrs.start / 4).try_into().unwrap();
7184 let end = ((addrs.end - 1) / 4).try_into().unwrap();
7185 self.store_req_start = min(self.store_req_start, start);
7186 self.store_req_end = max(self.store_req_end, end);
7187 }
7188 }
7189
7190 #[derive(Debug)]
7191 pub struct FragmentIoInfo {
7192 pub sysvals_in: SysValInfo,
7193 pub sysvals_in_d: [PixelImap; 8],
7194 pub attr_in: [PixelImap; 128],
7195 pub barycentric_attr_in: [u32; 4],
7196
7197 pub reads_sample_mask: bool,
7198 pub writes_color: u32,
7199 pub writes_sample_mask: bool,
7200 pub writes_depth: bool,
7201 }
7202
7203 impl FragmentIoInfo {
mark_attr_read(&mut self, addr: u16, interp: PixelImap)7204 pub fn mark_attr_read(&mut self, addr: u16, interp: PixelImap) {
7205 if addr < 0x080 {
7206 self.sysvals_in.ab |= 1 << (addr / 4);
7207 } else if addr < 0x280 {
7208 let attr_idx = (addr - 0x080) as usize / 4;
7209 self.attr_in[attr_idx] = interp;
7210 } else if addr < 0x2c0 {
7211 panic!("FF color I/O not supported");
7212 } else if addr < 0x300 {
7213 self.sysvals_in.c |= 1 << ((addr - 0x2c0) / 4);
7214 } else if addr >= 0x3a0 && addr < 0x3c0 {
7215 let attr_idx = (addr - 0x3a0) as usize / 4;
7216 self.sysvals_in_d[attr_idx] = interp;
7217 }
7218 }
7219
mark_barycentric_attr_in(&mut self, addr: u16)7220 pub fn mark_barycentric_attr_in(&mut self, addr: u16) {
7221 assert!(addr >= 0x80 && addr < 0x280);
7222
7223 let mut attr = BitMutView::new(&mut self.barycentric_attr_in);
7224
7225 let attr_idx = (addr - 0x080) as usize / 4;
7226 attr.set_bit(attr_idx, true);
7227 }
7228 }
7229
7230 #[derive(Debug)]
7231 pub enum ShaderIoInfo {
7232 None,
7233 Vtg(VtgIoInfo),
7234 Fragment(FragmentIoInfo),
7235 }
7236
7237 #[derive(Debug)]
7238 pub struct ShaderInfo {
7239 pub num_gprs: u8,
7240 pub num_control_barriers: u8,
7241 pub num_instrs: u32,
7242 pub slm_size: u32,
7243 pub max_crs_depth: u32,
7244 pub uses_global_mem: bool,
7245 pub writes_global_mem: bool,
7246 pub uses_fp64: bool,
7247 pub stage: ShaderStageInfo,
7248 pub io: ShaderIoInfo,
7249 }
7250
7251 pub trait ShaderModel {
sm(&self) -> u87252 fn sm(&self) -> u8;
num_regs(&self, file: RegFile) -> u327253 fn num_regs(&self, file: RegFile) -> u32;
hw_reserved_gprs(&self) -> u327254 fn hw_reserved_gprs(&self) -> u32;
crs_size(&self, max_crs_depth: u32) -> u327255 fn crs_size(&self, max_crs_depth: u32) -> u32;
7256
op_can_be_uniform(&self, op: &Op) -> bool7257 fn op_can_be_uniform(&self, op: &Op) -> bool;
7258
legalize_op(&self, b: &mut LegalizeBuilder, op: &mut Op)7259 fn legalize_op(&self, b: &mut LegalizeBuilder, op: &mut Op);
encode_shader(&self, s: &Shader<'_>) -> Vec<u32>7260 fn encode_shader(&self, s: &Shader<'_>) -> Vec<u32>;
7261 }
7262
7263 /// For compute shaders, large values of local_size impose an additional limit
7264 /// on the number of GPRs per thread
gpr_limit_from_local_size(local_size: &[u16; 3]) -> u327265 pub fn gpr_limit_from_local_size(local_size: &[u16; 3]) -> u32 {
prev_multiple_of(x: u32, y: u32) -> u327266 fn prev_multiple_of(x: u32, y: u32) -> u32 {
7267 (x / y) * y
7268 }
7269
7270 let local_size = local_size[0] * local_size[1] * local_size[2];
7271 // Warps are allocated in multiples of 4
7272 // Multiply that by 32 threads/warp
7273 let local_size = local_size.next_multiple_of(4 * 32) as u32;
7274 let total_regs: u32 = 65536;
7275
7276 let out = total_regs / local_size;
7277 // GPRs are allocated in multiples of 8
7278 let out = prev_multiple_of(out, 8);
7279 min(out, 255)
7280 }
7281
7282 pub struct Shader<'a> {
7283 pub sm: &'a dyn ShaderModel,
7284 pub info: ShaderInfo,
7285 pub functions: Vec<Function>,
7286 }
7287
7288 impl Shader<'_> {
for_each_instr(&self, f: &mut impl FnMut(&Instr))7289 pub fn for_each_instr(&self, f: &mut impl FnMut(&Instr)) {
7290 for func in &self.functions {
7291 for b in &func.blocks {
7292 for i in &b.instrs {
7293 f(i);
7294 }
7295 }
7296 }
7297 }
7298
map_instrs( &mut self, mut map: impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs, )7299 pub fn map_instrs(
7300 &mut self,
7301 mut map: impl FnMut(Box<Instr>, &mut SSAValueAllocator) -> MappedInstrs,
7302 ) {
7303 for f in &mut self.functions {
7304 f.map_instrs(&mut map);
7305 }
7306 }
7307
7308 /// Remove all annotations, presumably before encoding the shader.
remove_annotations(&mut self)7309 pub fn remove_annotations(&mut self) {
7310 self.map_instrs(|instr: Box<Instr>, _| -> MappedInstrs {
7311 if matches!(instr.op, Op::Annotate(_)) {
7312 MappedInstrs::None
7313 } else {
7314 MappedInstrs::One(instr)
7315 }
7316 })
7317 }
7318
gather_info(&mut self)7319 pub fn gather_info(&mut self) {
7320 let mut num_instrs = 0;
7321 let mut uses_global_mem = false;
7322 let mut writes_global_mem = false;
7323
7324 self.for_each_instr(&mut |instr| {
7325 num_instrs += 1;
7326
7327 if !uses_global_mem {
7328 uses_global_mem = instr.uses_global_mem();
7329 }
7330
7331 if !writes_global_mem {
7332 writes_global_mem = instr.writes_global_mem();
7333 }
7334 });
7335
7336 self.info.num_instrs = num_instrs;
7337 self.info.uses_global_mem = uses_global_mem;
7338 self.info.writes_global_mem = writes_global_mem;
7339 }
7340 }
7341
7342 impl fmt::Display for Shader<'_> {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result7343 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
7344 for func in &self.functions {
7345 write!(f, "{}", func)?;
7346 }
7347 Ok(())
7348 }
7349 }
7350