• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright © 2023 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3 
4 use crate::ir::*;
5 use crate::legalize::{
6     src_is_reg, swap_srcs_if_not_reg, LegalizeBuildHelpers, LegalizeBuilder,
7 };
8 use bitview::*;
9 
10 use std::collections::HashMap;
11 use std::ops::Range;
12 
13 pub struct ShaderModel50 {
14     sm: u8,
15 }
16 
17 impl ShaderModel50 {
new(sm: u8) -> Self18     pub fn new(sm: u8) -> Self {
19         assert!(sm >= 50 && sm < 70);
20         Self { sm }
21     }
22 }
23 
24 impl ShaderModel for ShaderModel50 {
sm(&self) -> u825     fn sm(&self) -> u8 {
26         self.sm
27     }
28 
num_regs(&self, file: RegFile) -> u3229     fn num_regs(&self, file: RegFile) -> u32 {
30         match file {
31             RegFile::GPR => 255,
32             RegFile::UGPR => 0,
33             RegFile::Pred => 7,
34             RegFile::UPred => 0,
35             RegFile::Carry => 1,
36             RegFile::Bar => 0,
37             RegFile::Mem => RegRef::MAX_IDX + 1,
38         }
39     }
40 
hw_reserved_gprs(&self) -> u3241     fn hw_reserved_gprs(&self) -> u32 {
42         0
43     }
44 
crs_size(&self, max_crs_depth: u32) -> u3245     fn crs_size(&self, max_crs_depth: u32) -> u32 {
46         if max_crs_depth <= 16 {
47             0
48         } else if max_crs_depth <= 32 {
49             1024
50         } else {
51             ((max_crs_depth + 32) * 16).next_multiple_of(512)
52         }
53     }
54 
op_can_be_uniform(&self, _op: &Op) -> bool55     fn op_can_be_uniform(&self, _op: &Op) -> bool {
56         false
57     }
58 
legalize_op(&self, b: &mut LegalizeBuilder, op: &mut Op)59     fn legalize_op(&self, b: &mut LegalizeBuilder, op: &mut Op) {
60         as_sm50_op_mut(op).legalize(b);
61     }
62 
encode_shader(&self, s: &Shader<'_>) -> Vec<u32>63     fn encode_shader(&self, s: &Shader<'_>) -> Vec<u32> {
64         encode_sm50_shader(self, s)
65     }
66 }
67 
68 trait SM50Op {
legalize(&mut self, b: &mut LegalizeBuilder)69     fn legalize(&mut self, b: &mut LegalizeBuilder);
encode(&self, e: &mut SM50Encoder<'_>)70     fn encode(&self, e: &mut SM50Encoder<'_>);
71 }
72 
73 struct SM50Encoder<'a> {
74     sm: &'a ShaderModel50,
75     ip: usize,
76     labels: &'a HashMap<Label, usize>,
77     inst: [u32; 2],
78     sched: u32,
79 }
80 
81 impl BitViewable for SM50Encoder<'_> {
bits(&self) -> usize82     fn bits(&self) -> usize {
83         BitView::new(&self.inst).bits()
84     }
85 
get_bit_range_u64(&self, range: Range<usize>) -> u6486     fn get_bit_range_u64(&self, range: Range<usize>) -> u64 {
87         BitView::new(&self.inst).get_bit_range_u64(range)
88     }
89 }
90 
91 impl BitMutViewable for SM50Encoder<'_> {
set_bit_range_u64(&mut self, range: Range<usize>, val: u64)92     fn set_bit_range_u64(&mut self, range: Range<usize>, val: u64) {
93         BitMutView::new(&mut self.inst).set_bit_range_u64(range, val);
94     }
95 }
96 
97 impl SetFieldU64 for SM50Encoder<'_> {
set_field_u64(&mut self, range: Range<usize>, val: u64)98     fn set_field_u64(&mut self, range: Range<usize>, val: u64) {
99         BitMutView::new(&mut self.inst).set_field_u64(range, val);
100     }
101 }
102 
103 impl SM50Encoder<'_> {
set_opcode(&mut self, opcode: u16)104     fn set_opcode(&mut self, opcode: u16) {
105         self.set_field(48..64, opcode);
106     }
107 
set_pred_reg(&mut self, range: Range<usize>, reg: RegRef)108     fn set_pred_reg(&mut self, range: Range<usize>, reg: RegRef) {
109         assert!(range.len() == 3);
110         assert!(reg.file() == RegFile::Pred);
111         assert!(reg.base_idx() <= 7);
112         assert!(reg.comps() == 1);
113         self.set_field(range, reg.base_idx());
114     }
115 
set_pred(&mut self, pred: &Pred)116     fn set_pred(&mut self, pred: &Pred) {
117         assert!(!pred.is_false());
118         self.set_pred_reg(
119             16..19,
120             match pred.pred_ref {
121                 PredRef::None => RegRef::zero(RegFile::Pred, 1),
122                 PredRef::Reg(reg) => reg,
123                 PredRef::SSA(_) => panic!("SSA values must be lowered"),
124             },
125         );
126         self.set_bit(19, pred.pred_inv);
127     }
128 
set_instr_deps(&mut self, deps: &InstrDeps)129     fn set_instr_deps(&mut self, deps: &InstrDeps) {
130         let mut sched = BitMutView::new(&mut self.sched);
131 
132         sched.set_field(0..4, deps.delay);
133         sched.set_bit(4, deps.yld);
134         sched.set_field(5..8, deps.wr_bar().unwrap_or(7));
135         sched.set_field(8..11, deps.rd_bar().unwrap_or(7));
136         sched.set_field(11..17, deps.wt_bar_mask);
137         sched.set_field(17..21, deps.reuse_mask);
138     }
139 
set_reg(&mut self, range: Range<usize>, reg: RegRef)140     fn set_reg(&mut self, range: Range<usize>, reg: RegRef) {
141         assert!(range.len() == 8);
142         assert!(reg.file() == RegFile::GPR);
143         self.set_field(range, reg.base_idx());
144     }
145 
set_reg_src_ref(&mut self, range: Range<usize>, src_ref: SrcRef)146     fn set_reg_src_ref(&mut self, range: Range<usize>, src_ref: SrcRef) {
147         match src_ref {
148             SrcRef::Zero => self.set_reg(range, RegRef::zero(RegFile::GPR, 1)),
149             SrcRef::Reg(reg) => self.set_reg(range, reg),
150             _ => panic!("Not a register"),
151         }
152     }
153 
set_reg_src(&mut self, range: Range<usize>, src: Src)154     fn set_reg_src(&mut self, range: Range<usize>, src: Src) {
155         assert!(src.src_mod.is_none());
156         self.set_reg_src_ref(range, src.src_ref);
157     }
158 
set_reg_fmod_src( &mut self, range: Range<usize>, abs_bit: usize, neg_bit: usize, src: Src, )159     fn set_reg_fmod_src(
160         &mut self,
161         range: Range<usize>,
162         abs_bit: usize,
163         neg_bit: usize,
164         src: Src,
165     ) {
166         self.set_reg_src_ref(range, src.src_ref);
167         self.set_bit(abs_bit, src.src_mod.has_fabs());
168         self.set_bit(neg_bit, src.src_mod.has_fneg());
169     }
170 
set_reg_ineg_src( &mut self, range: Range<usize>, neg_bit: usize, src: Src, )171     fn set_reg_ineg_src(
172         &mut self,
173         range: Range<usize>,
174         neg_bit: usize,
175         src: Src,
176     ) {
177         self.set_reg_src_ref(range, src.src_ref);
178         self.set_bit(neg_bit, src.src_mod.is_ineg());
179     }
180 
set_reg_bnot_src( &mut self, range: Range<usize>, not_bit: usize, src: Src, )181     fn set_reg_bnot_src(
182         &mut self,
183         range: Range<usize>,
184         not_bit: usize,
185         src: Src,
186     ) {
187         self.set_reg_src_ref(range, src.src_ref);
188         self.set_bit(not_bit, src.src_mod.is_bnot());
189     }
190 
set_pred_dst(&mut self, range: Range<usize>, dst: Dst)191     fn set_pred_dst(&mut self, range: Range<usize>, dst: Dst) {
192         match dst {
193             Dst::None => {
194                 self.set_pred_reg(range, RegRef::zero(RegFile::Pred, 1));
195             }
196             Dst::Reg(reg) => self.set_pred_reg(range, reg),
197             _ => panic!("Not a register"),
198         }
199     }
200 
set_pred_src(&mut self, range: Range<usize>, not_bit: usize, src: Src)201     fn set_pred_src(&mut self, range: Range<usize>, not_bit: usize, src: Src) {
202         // The default for predicates is true
203         let true_reg = RegRef::new(RegFile::Pred, 7, 1);
204 
205         let (not, reg) = match src.src_ref {
206             SrcRef::True => (false, true_reg),
207             SrcRef::False => (true, true_reg),
208             SrcRef::Reg(reg) => (false, reg),
209             _ => panic!("Not a register"),
210         };
211         self.set_pred_reg(range, reg);
212         self.set_bit(not_bit, not ^ src.src_mod.is_bnot());
213     }
214 
set_dst(&mut self, dst: Dst)215     fn set_dst(&mut self, dst: Dst) {
216         let reg = match dst {
217             Dst::None => RegRef::zero(RegFile::GPR, 1),
218             Dst::Reg(reg) => reg,
219             _ => panic!("invalid dst {dst}"),
220         };
221         self.set_reg(0..8, reg);
222     }
223 
set_src_imm32(&mut self, range: Range<usize>, u: u32)224     fn set_src_imm32(&mut self, range: Range<usize>, u: u32) {
225         assert!(range.len() == 32);
226         self.set_field(range, u);
227     }
228 
set_src_imm_i20( &mut self, range: Range<usize>, sign_bit: usize, i: u32, )229     fn set_src_imm_i20(
230         &mut self,
231         range: Range<usize>,
232         sign_bit: usize,
233         i: u32,
234     ) {
235         assert!(range.len() == 19);
236         assert!((i & 0xfff80000) == 0 || (i & 0xfff80000) == 0xfff80000);
237 
238         self.set_field(range, i & 0x7ffff);
239         self.set_field(sign_bit..sign_bit + 1, (i & 0x80000) >> 19);
240     }
241 
set_src_imm_f20( &mut self, range: Range<usize>, sign_bit: usize, f: u32, )242     fn set_src_imm_f20(
243         &mut self,
244         range: Range<usize>,
245         sign_bit: usize,
246         f: u32,
247     ) {
248         assert!(range.len() == 19);
249         assert!((f & 0x00000fff) == 0);
250 
251         self.set_field(range, (f >> 12) & 0x7ffff);
252         self.set_field(sign_bit..sign_bit + 1, f >> 31);
253     }
254 
set_src_cb(&mut self, range: Range<usize>, cb: &CBufRef)255     fn set_src_cb(&mut self, range: Range<usize>, cb: &CBufRef) {
256         let mut v = BitMutView::new_subset(self, range);
257 
258         assert!(cb.offset % 4 == 0);
259 
260         v.set_field(0..14, cb.offset >> 2);
261         if let CBuf::Binding(idx) = cb.buf {
262             v.set_field(14..19, idx);
263         } else {
264             panic!("Must be a bound constant buffer");
265         }
266     }
267 
set_cb_fmod_src( &mut self, range: Range<usize>, abs_bit: usize, neg_bit: usize, src: Src, )268     fn set_cb_fmod_src(
269         &mut self,
270         range: Range<usize>,
271         abs_bit: usize,
272         neg_bit: usize,
273         src: Src,
274     ) {
275         if let SrcRef::CBuf(cb) = &src.src_ref {
276             self.set_src_cb(range, cb);
277         } else {
278             panic!("Not a CBuf source");
279         }
280 
281         self.set_bit(abs_bit, src.src_mod.has_fabs());
282         self.set_bit(neg_bit, src.src_mod.has_fneg());
283     }
284 
set_cb_ineg_src( &mut self, range: Range<usize>, neg_bit: usize, src: Src, )285     fn set_cb_ineg_src(
286         &mut self,
287         range: Range<usize>,
288         neg_bit: usize,
289         src: Src,
290     ) {
291         if let SrcRef::CBuf(cb) = &src.src_ref {
292             self.set_src_cb(range, cb);
293         } else {
294             panic!("Not a CBuf source");
295         }
296 
297         self.set_bit(neg_bit, src.src_mod.is_ineg());
298     }
299 
set_cb_bnot_src( &mut self, range: Range<usize>, not_bit: usize, src: Src, )300     fn set_cb_bnot_src(
301         &mut self,
302         range: Range<usize>,
303         not_bit: usize,
304         src: Src,
305     ) {
306         if let SrcRef::CBuf(cb) = &src.src_ref {
307             self.set_src_cb(range, cb);
308         } else {
309             panic!("Not a CBuf source");
310         }
311 
312         self.set_bit(not_bit, src.src_mod.is_bnot());
313     }
314 }
315 
316 //
317 // Legalization helpers
318 //
319 
320 pub trait SM50LegalizeBuildHelpers: LegalizeBuildHelpers {
copy_alu_src_if_fabs(&mut self, src: &mut Src, src_type: SrcType)321     fn copy_alu_src_if_fabs(&mut self, src: &mut Src, src_type: SrcType) {
322         if src.src_mod.has_fabs() {
323             self.copy_alu_src_and_lower_fmod(src, src_type);
324         }
325     }
326 
copy_alu_src_if_i20_overflow( &mut self, src: &mut Src, reg_file: RegFile, src_type: SrcType, )327     fn copy_alu_src_if_i20_overflow(
328         &mut self,
329         src: &mut Src,
330         reg_file: RegFile,
331         src_type: SrcType,
332     ) {
333         if src.as_imm_not_i20().is_some() {
334             self.copy_alu_src(src, reg_file, src_type);
335         }
336     }
337 
copy_alu_src_if_f20_overflow( &mut self, src: &mut Src, reg_file: RegFile, src_type: SrcType, )338     fn copy_alu_src_if_f20_overflow(
339         &mut self,
340         src: &mut Src,
341         reg_file: RegFile,
342         src_type: SrcType,
343     ) {
344         if src.as_imm_not_f20().is_some() {
345             self.copy_alu_src(src, reg_file, src_type);
346         }
347     }
348 }
349 
350 impl SM50LegalizeBuildHelpers for LegalizeBuilder<'_> {}
351 
352 /// Helper to legalize extended or external instructions
353 ///
354 /// These are instructions which reach out external units such as load/store
355 /// and texture ops.  They typically can't take anything but GPRs and are the
356 /// only types of instructions that support vectors.
357 ///
legalize_ext_instr(op: &mut impl SrcsAsSlice, _b: &mut LegalizeBuilder)358 fn legalize_ext_instr(op: &mut impl SrcsAsSlice, _b: &mut LegalizeBuilder) {
359     let src_types = op.src_types();
360     for (i, src) in op.srcs_as_mut_slice().iter_mut().enumerate() {
361         match src_types[i] {
362             SrcType::SSA => {
363                 assert!(src.as_ssa().is_some());
364             }
365             SrcType::GPR => {
366                 assert!(src_is_reg(src, RegFile::GPR));
367             }
368             SrcType::ALU
369             | SrcType::F16
370             | SrcType::F16v2
371             | SrcType::F32
372             | SrcType::F64
373             | SrcType::I32
374             | SrcType::B32 => {
375                 panic!("ALU srcs must be legalized explicitly");
376             }
377             SrcType::Pred => {
378                 panic!("Predicates must be legalized explicitly");
379             }
380             SrcType::Carry => {
381                 panic!("Carry values must be legalized explicitly");
382             }
383             SrcType::Bar => panic!("Barrier regs are Volta+"),
384         }
385     }
386 }
387 
388 //
389 // Implementations of SM50Op for each op we support on Maxwell/Pascal
390 //
391 
392 impl SM50Encoder<'_> {
set_rnd_mode(&mut self, range: Range<usize>, rnd_mode: FRndMode)393     fn set_rnd_mode(&mut self, range: Range<usize>, rnd_mode: FRndMode) {
394         assert!(range.len() == 2);
395         self.set_field(
396             range,
397             match rnd_mode {
398                 FRndMode::NearestEven => 0_u8,
399                 FRndMode::NegInf => 1_u8,
400                 FRndMode::PosInf => 2_u8,
401                 FRndMode::Zero => 3_u8,
402             },
403         );
404     }
405 }
406 
407 impl SM50Op for OpFAdd {
legalize(&mut self, b: &mut LegalizeBuilder)408     fn legalize(&mut self, b: &mut LegalizeBuilder) {
409         use RegFile::GPR;
410         let [src0, src1] = &mut self.srcs;
411         swap_srcs_if_not_reg(src0, src1, GPR);
412         b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32);
413     }
414 
encode(&self, e: &mut SM50Encoder<'_>)415     fn encode(&self, e: &mut SM50Encoder<'_>) {
416         if let Some(imm32) = self.srcs[1].as_imm_not_f20() {
417             e.set_opcode(0x0800);
418             e.set_dst(self.dst);
419             e.set_reg_fmod_src(8..16, 54, 56, self.srcs[0]);
420             e.set_src_imm32(20..52, imm32);
421             e.set_bit(55, self.ftz);
422         } else {
423             match &self.srcs[1].src_ref {
424                 SrcRef::Zero | SrcRef::Reg(_) => {
425                     e.set_opcode(0x5c58);
426                     e.set_reg_fmod_src(20..28, 49, 45, self.srcs[1]);
427                 }
428                 SrcRef::Imm32(imm32) => {
429                     e.set_opcode(0x3858);
430                     e.set_src_imm_f20(20..39, 56, *imm32);
431                     assert!(self.srcs[1].src_mod.is_none());
432                 }
433                 SrcRef::CBuf(_) => {
434                     e.set_opcode(0x4c58);
435                     e.set_cb_fmod_src(20..39, 49, 45, self.srcs[1]);
436                 }
437                 src => panic!("Invalid fadd src1: {src}"),
438             }
439 
440             e.set_dst(self.dst);
441             e.set_reg_fmod_src(8..16, 46, 48, self.srcs[0]);
442 
443             e.set_rnd_mode(39..41, self.rnd_mode);
444             e.set_bit(44, self.ftz);
445             e.set_bit(50, self.saturate);
446         }
447     }
448 }
449 
450 impl SM50Op for OpFFma {
legalize(&mut self, b: &mut LegalizeBuilder)451     fn legalize(&mut self, b: &mut LegalizeBuilder) {
452         use RegFile::GPR;
453         let [src0, src1, src2] = &mut self.srcs;
454         b.copy_alu_src_if_fabs(src0, SrcType::F32);
455         b.copy_alu_src_if_fabs(src1, SrcType::F32);
456         b.copy_alu_src_if_fabs(src2, SrcType::F32);
457         swap_srcs_if_not_reg(src0, src1, GPR);
458         b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32);
459         b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F32);
460         if src_is_reg(src1, GPR) {
461             b.copy_alu_src_if_imm(src2, GPR, SrcType::F32);
462         } else {
463             b.copy_alu_src_if_not_reg(src2, GPR, SrcType::F32);
464         }
465     }
466 
encode(&self, e: &mut SM50Encoder<'_>)467     fn encode(&self, e: &mut SM50Encoder<'_>) {
468         // ffma doesn't have any abs flags.
469         assert!(!self.srcs[0].src_mod.has_fabs());
470         assert!(!self.srcs[1].src_mod.has_fabs());
471         assert!(!self.srcs[2].src_mod.has_fabs());
472 
473         // There is one fneg bit shared by the two fmul sources
474         let fneg_fmul =
475             self.srcs[0].src_mod.has_fneg() ^ self.srcs[1].src_mod.has_fneg();
476         let fneg_src2 = self.srcs[2].src_mod.has_fneg();
477 
478         match &self.srcs[2].src_ref {
479             SrcRef::Zero | SrcRef::Reg(_) => {
480                 match &self.srcs[1].src_ref {
481                     SrcRef::Zero | SrcRef::Reg(_) => {
482                         e.set_opcode(0x5980);
483                         e.set_reg_src_ref(20..28, self.srcs[1].src_ref);
484                     }
485                     SrcRef::Imm32(imm32) => {
486                         e.set_opcode(0x3280);
487 
488                         // Technically, ffma also supports a 32-bit immediate,
489                         // but only in the case where the destination is the
490                         // same as src2.  We don't support that right now.
491                         e.set_src_imm_f20(20..39, 56, *imm32);
492                     }
493                     SrcRef::CBuf(cb) => {
494                         e.set_opcode(0x4980);
495                         e.set_src_cb(20..39, cb);
496                     }
497                     src => panic!("Invalid ffma src1: {src}"),
498                 }
499 
500                 e.set_reg_src_ref(39..47, self.srcs[2].src_ref);
501             }
502             SrcRef::CBuf(cb) => {
503                 e.set_opcode(0x5180);
504                 e.set_src_cb(20..39, cb);
505                 e.set_reg_src_ref(39..47, self.srcs[1].src_ref);
506             }
507             src => panic!("Invalid ffma src2: {src}"),
508         }
509 
510         e.set_dst(self.dst);
511         e.set_reg_src_ref(8..16, self.srcs[0].src_ref);
512 
513         e.set_bit(48, fneg_fmul);
514         e.set_bit(49, fneg_src2);
515         e.set_bit(50, self.saturate);
516         e.set_rnd_mode(51..53, self.rnd_mode);
517 
518         e.set_bit(53, self.ftz);
519         e.set_bit(54, self.dnz);
520     }
521 }
522 
523 impl SM50Op for OpFMnMx {
legalize(&mut self, b: &mut LegalizeBuilder)524     fn legalize(&mut self, b: &mut LegalizeBuilder) {
525         use RegFile::GPR;
526         let [src0, src1] = &mut self.srcs;
527         swap_srcs_if_not_reg(src0, src1, GPR);
528         b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32);
529         b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F32);
530     }
531 
encode(&self, e: &mut SM50Encoder<'_>)532     fn encode(&self, e: &mut SM50Encoder<'_>) {
533         match &self.srcs[1].src_ref {
534             SrcRef::Zero | SrcRef::Reg(_) => {
535                 e.set_opcode(0x5c60);
536                 e.set_reg_fmod_src(20..28, 49, 45, self.srcs[1]);
537             }
538             SrcRef::Imm32(imm32) => {
539                 e.set_opcode(0x3860);
540                 e.set_src_imm_f20(20..39, 56, *imm32);
541                 assert!(self.srcs[1].src_mod.is_none());
542             }
543             SrcRef::CBuf(_) => {
544                 e.set_opcode(0x4c60);
545                 e.set_cb_fmod_src(20..39, 49, 45, self.srcs[1]);
546             }
547             src => panic!("Invalid fmnmx src2: {src}"),
548         }
549 
550         e.set_reg_fmod_src(8..16, 46, 48, self.srcs[0]);
551         e.set_dst(self.dst);
552         e.set_pred_src(39..42, 42, self.min);
553         e.set_bit(44, self.ftz);
554     }
555 }
556 
557 impl SM50Op for OpFMul {
legalize(&mut self, b: &mut LegalizeBuilder)558     fn legalize(&mut self, b: &mut LegalizeBuilder) {
559         use RegFile::GPR;
560         let [src0, src1] = &mut self.srcs;
561         b.copy_alu_src_if_fabs(src0, SrcType::F32);
562         b.copy_alu_src_if_fabs(src1, SrcType::F32);
563         swap_srcs_if_not_reg(src0, src1, GPR);
564         b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32);
565     }
566 
encode(&self, e: &mut SM50Encoder<'_>)567     fn encode(&self, e: &mut SM50Encoder<'_>) {
568         // fmul doesn't have any abs flags.
569         assert!(!self.srcs[0].src_mod.has_fabs());
570         assert!(!self.srcs[1].src_mod.has_fabs());
571 
572         // There is one fneg bit shared by both sources
573         let fneg =
574             self.srcs[0].src_mod.has_fneg() ^ self.srcs[1].src_mod.has_fneg();
575 
576         if let Some(mut imm32) = self.srcs[1].as_imm_not_f20() {
577             e.set_opcode(0x1e00);
578 
579             e.set_bit(53, self.ftz);
580             e.set_bit(54, self.dnz);
581             e.set_bit(55, self.saturate);
582 
583             if fneg {
584                 // Flip the immediate sign bit
585                 imm32 ^= 0x80000000;
586             }
587             e.set_src_imm32(20..52, imm32);
588         } else {
589             match &self.srcs[1].src_ref {
590                 SrcRef::Zero | SrcRef::Reg(_) => {
591                     e.set_opcode(0x5c68);
592                     e.set_reg_src(20..28, self.srcs[1]);
593                 }
594                 SrcRef::Imm32(imm32) => {
595                     e.set_opcode(0x3868);
596                     e.set_src_imm_f20(20..39, 56, *imm32);
597                 }
598                 SrcRef::CBuf(cbuf) => {
599                     e.set_opcode(0x4c68);
600                     e.set_src_cb(20..39, cbuf);
601                 }
602                 src => panic!("Invalid fmul src1: {src}"),
603             }
604 
605             e.set_rnd_mode(39..41, self.rnd_mode);
606             e.set_field(41..44, 0x0_u8); // TODO: PDIV
607             e.set_bit(44, self.ftz);
608             e.set_bit(45, self.dnz);
609             e.set_bit(48, fneg);
610             e.set_bit(50, self.saturate);
611         }
612 
613         e.set_reg_src_ref(8..16, self.srcs[0].src_ref);
614         e.set_dst(self.dst);
615     }
616 }
617 
618 impl SM50Op for OpRro {
legalize(&mut self, b: &mut LegalizeBuilder)619     fn legalize(&mut self, b: &mut LegalizeBuilder) {
620         use RegFile::GPR;
621         b.copy_alu_src_if_f20_overflow(&mut self.src, GPR, SrcType::F32);
622     }
623 
encode(&self, e: &mut SM50Encoder<'_>)624     fn encode(&self, e: &mut SM50Encoder<'_>) {
625         match &self.src.src_ref {
626             SrcRef::Zero | SrcRef::Reg(_) => {
627                 e.set_opcode(0x5c90);
628                 e.set_reg_fmod_src(20..28, 49, 45, self.src);
629             }
630             SrcRef::Imm32(imm32) => {
631                 e.set_opcode(0x3890);
632                 e.set_src_imm_f20(20..39, 56, *imm32);
633                 assert!(self.src.src_mod.is_none());
634             }
635             SrcRef::CBuf(_) => {
636                 e.set_opcode(0x4c90);
637                 e.set_cb_fmod_src(20..39, 49, 45, self.src);
638             }
639             src => panic!("Invalid rro src: {src}"),
640         }
641 
642         e.set_dst(self.dst);
643         e.set_field(
644             39..40,
645             match self.op {
646                 RroOp::SinCos => 0u8,
647                 RroOp::Exp2 => 1u8,
648             },
649         );
650     }
651 }
652 
653 impl SM50Op for OpMuFu {
legalize(&mut self, b: &mut LegalizeBuilder)654     fn legalize(&mut self, b: &mut LegalizeBuilder) {
655         b.copy_alu_src_if_not_reg(&mut self.src, RegFile::GPR, SrcType::GPR);
656     }
657 
encode(&self, e: &mut SM50Encoder<'_>)658     fn encode(&self, e: &mut SM50Encoder<'_>) {
659         e.set_opcode(0x5080);
660 
661         e.set_dst(self.dst);
662         e.set_reg_fmod_src(8..16, 46, 48, self.src);
663 
664         e.set_field(
665             20..24,
666             match self.op {
667                 MuFuOp::Cos => 0_u8,
668                 MuFuOp::Sin => 1_u8,
669                 MuFuOp::Exp2 => 2_u8,
670                 MuFuOp::Log2 => 3_u8,
671                 MuFuOp::Rcp => 4_u8,
672                 MuFuOp::Rsq => 5_u8,
673                 MuFuOp::Rcp64H => 6_u8,
674                 MuFuOp::Rsq64H => 7_u8,
675                 // SQRT is only on SM52 and later
676                 MuFuOp::Sqrt if e.sm.sm >= 52 => 8_u8,
677                 MuFuOp::Sqrt => panic!("MUFU.SQRT not supported on SM50"),
678                 MuFuOp::Tanh => panic!("MUFU.TANH not supported on SM50"),
679             },
680         );
681     }
682 }
683 
684 impl SM50Encoder<'_> {
set_float_cmp_op(&mut self, range: Range<usize>, op: FloatCmpOp)685     fn set_float_cmp_op(&mut self, range: Range<usize>, op: FloatCmpOp) {
686         assert!(range.len() == 4);
687         self.set_field(
688             range,
689             match op {
690                 FloatCmpOp::OrdLt => 0x01_u8,
691                 FloatCmpOp::OrdEq => 0x02_u8,
692                 FloatCmpOp::OrdLe => 0x03_u8,
693                 FloatCmpOp::OrdGt => 0x04_u8,
694                 FloatCmpOp::OrdNe => 0x05_u8,
695                 FloatCmpOp::OrdGe => 0x06_u8,
696                 FloatCmpOp::UnordLt => 0x09_u8,
697                 FloatCmpOp::UnordEq => 0x0a_u8,
698                 FloatCmpOp::UnordLe => 0x0b_u8,
699                 FloatCmpOp::UnordGt => 0x0c_u8,
700                 FloatCmpOp::UnordNe => 0x0d_u8,
701                 FloatCmpOp::UnordGe => 0x0e_u8,
702                 FloatCmpOp::IsNum => 0x07_u8,
703                 FloatCmpOp::IsNan => 0x08_u8,
704             },
705         );
706     }
707 
set_pred_set_op(&mut self, range: Range<usize>, op: PredSetOp)708     fn set_pred_set_op(&mut self, range: Range<usize>, op: PredSetOp) {
709         assert!(range.len() == 2);
710         self.set_field(
711             range,
712             match op {
713                 PredSetOp::And => 0_u8,
714                 PredSetOp::Or => 1_u8,
715                 PredSetOp::Xor => 2_u8,
716             },
717         );
718     }
719 
set_int_cmp_op(&mut self, range: Range<usize>, op: IntCmpOp)720     fn set_int_cmp_op(&mut self, range: Range<usize>, op: IntCmpOp) {
721         assert!(range.len() == 3);
722         self.set_field(
723             range,
724             match op {
725                 IntCmpOp::Eq => 2_u8,
726                 IntCmpOp::Ne => 5_u8,
727                 IntCmpOp::Lt => 1_u8,
728                 IntCmpOp::Le => 3_u8,
729                 IntCmpOp::Gt => 4_u8,
730                 IntCmpOp::Ge => 6_u8,
731             },
732         );
733     }
734 }
735 
736 impl SM50Op for OpFSet {
legalize(&mut self, b: &mut LegalizeBuilder)737     fn legalize(&mut self, b: &mut LegalizeBuilder) {
738         use RegFile::GPR;
739         let [src0, src1] = &mut self.srcs;
740         if swap_srcs_if_not_reg(src0, src1, GPR) {
741             self.cmp_op = self.cmp_op.flip();
742         }
743         b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32);
744         b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F32);
745     }
746 
encode(&self, e: &mut SM50Encoder<'_>)747     fn encode(&self, e: &mut SM50Encoder<'_>) {
748         match &self.srcs[1].src_ref {
749             SrcRef::Zero | SrcRef::Reg(_) => {
750                 e.set_opcode(0x5800);
751                 e.set_reg_fmod_src(20..28, 44, 53, self.srcs[1]);
752             }
753             SrcRef::Imm32(imm32) => {
754                 e.set_opcode(0x3000);
755                 e.set_src_imm_f20(20..39, 56, *imm32);
756                 assert!(self.srcs[1].src_mod.is_none());
757             }
758             SrcRef::CBuf(_) => {
759                 e.set_opcode(0x4800);
760                 e.set_cb_fmod_src(20..39, 44, 6, self.srcs[1]);
761             }
762             src => panic!("Invalid fset src1: {src}"),
763         }
764 
765         e.set_reg_fmod_src(8..16, 54, 43, self.srcs[0]);
766         e.set_pred_src(39..42, 42, SrcRef::True.into());
767         e.set_float_cmp_op(48..52, self.cmp_op);
768         e.set_bit(52, true); // bool float
769         e.set_bit(55, self.ftz);
770         e.set_dst(self.dst);
771     }
772 }
773 
774 impl SM50Op for OpFSetP {
legalize(&mut self, b: &mut LegalizeBuilder)775     fn legalize(&mut self, b: &mut LegalizeBuilder) {
776         use RegFile::GPR;
777         let [src0, src1] = &mut self.srcs;
778         if swap_srcs_if_not_reg(src0, src1, GPR) {
779             self.cmp_op = self.cmp_op.flip();
780         }
781         b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32);
782         b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F32);
783     }
784 
encode(&self, e: &mut SM50Encoder<'_>)785     fn encode(&self, e: &mut SM50Encoder<'_>) {
786         match &self.srcs[1].src_ref {
787             SrcRef::Zero | SrcRef::Reg(_) => {
788                 e.set_opcode(0x5bb0);
789                 e.set_reg_fmod_src(20..28, 44, 6, self.srcs[1]);
790             }
791             SrcRef::Imm32(imm32) => {
792                 e.set_opcode(0x36b0);
793                 e.set_src_imm_f20(20..39, 56, *imm32);
794                 assert!(self.srcs[1].src_mod.is_none());
795             }
796             SrcRef::CBuf(_) => {
797                 e.set_opcode(0x4bb0);
798                 e.set_cb_fmod_src(20..39, 44, 6, self.srcs[1]);
799             }
800             src => panic!("Invalid fsetp src1: {src}"),
801         }
802 
803         e.set_pred_dst(3..6, self.dst);
804         e.set_pred_dst(0..3, Dst::None); // dst1
805         e.set_reg_fmod_src(8..16, 7, 43, self.srcs[0]);
806         e.set_pred_src(39..42, 42, self.accum);
807         e.set_pred_set_op(45..47, self.set_op);
808         e.set_bit(47, self.ftz);
809         e.set_float_cmp_op(48..52, self.cmp_op);
810     }
811 }
812 
813 impl SM50Op for OpFSwzAdd {
legalize(&mut self, b: &mut LegalizeBuilder)814     fn legalize(&mut self, b: &mut LegalizeBuilder) {
815         use RegFile::GPR;
816         b.copy_alu_src_if_not_reg(&mut self.srcs[0], GPR, SrcType::GPR);
817         b.copy_alu_src_if_not_reg(&mut self.srcs[1], GPR, SrcType::GPR);
818     }
819 
encode(&self, e: &mut SM50Encoder<'_>)820     fn encode(&self, e: &mut SM50Encoder<'_>) {
821         e.set_opcode(0x50f8);
822 
823         e.set_dst(self.dst);
824         e.set_reg_src(8..16, self.srcs[0]);
825         e.set_reg_src(20..28, self.srcs[1]);
826 
827         e.set_field(
828             39..41,
829             match self.rnd_mode {
830                 FRndMode::NearestEven => 0u8,
831                 FRndMode::NegInf => 1u8,
832                 FRndMode::PosInf => 2u8,
833                 FRndMode::Zero => 3u8,
834             },
835         );
836 
837         for (i, op) in self.ops.iter().enumerate() {
838             e.set_field(
839                 28 + i * 2..28 + (i + 1) * 2,
840                 match op {
841                     FSwzAddOp::Add => 0u8,
842                     FSwzAddOp::SubLeft => 1u8,
843                     FSwzAddOp::SubRight => 2u8,
844                     FSwzAddOp::MoveLeft => 3u8,
845                 },
846             );
847         }
848 
849         e.set_bit(38, false); /* .NDV */
850         e.set_bit(44, self.ftz);
851         e.set_bit(47, false); /* dst.CC */
852     }
853 }
854 
855 impl SM50Op for OpDAdd {
legalize(&mut self, b: &mut LegalizeBuilder)856     fn legalize(&mut self, b: &mut LegalizeBuilder) {
857         use RegFile::GPR;
858         let [src0, src1] = &mut self.srcs;
859         swap_srcs_if_not_reg(src0, src1, GPR);
860         b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64);
861         b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64);
862     }
863 
encode(&self, e: &mut SM50Encoder<'_>)864     fn encode(&self, e: &mut SM50Encoder<'_>) {
865         match &self.srcs[1].src_ref {
866             SrcRef::Zero | SrcRef::Reg(_) => {
867                 e.set_opcode(0x5c70);
868                 e.set_reg_fmod_src(20..28, 49, 45, self.srcs[1]);
869             }
870             SrcRef::Imm32(imm32) => {
871                 e.set_opcode(0x3870);
872                 e.set_src_imm_f20(20..39, 56, *imm32);
873                 assert!(self.srcs[1].src_mod.is_none());
874             }
875             SrcRef::CBuf(_) => {
876                 e.set_opcode(0x4c70);
877                 e.set_cb_fmod_src(20..39, 49, 45, self.srcs[1]);
878             }
879             src => panic!("Invalid dadd src1: {src}"),
880         }
881 
882         e.set_dst(self.dst);
883         e.set_reg_fmod_src(8..16, 46, 48, self.srcs[0]);
884         e.set_rnd_mode(39..41, self.rnd_mode);
885     }
886 }
887 
888 impl SM50Op for OpDFma {
legalize(&mut self, b: &mut LegalizeBuilder)889     fn legalize(&mut self, b: &mut LegalizeBuilder) {
890         use RegFile::GPR;
891         let [src0, src1, src2] = &mut self.srcs;
892         b.copy_alu_src_if_fabs(src0, SrcType::F64);
893         b.copy_alu_src_if_fabs(src1, SrcType::F64);
894         b.copy_alu_src_if_fabs(src2, SrcType::F64);
895         swap_srcs_if_not_reg(src0, src1, GPR);
896         b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64);
897         b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64);
898         if src_is_reg(src1, GPR) {
899             b.copy_alu_src_if_imm(src2, GPR, SrcType::F64);
900         } else {
901             b.copy_alu_src_if_not_reg(src2, GPR, SrcType::F64);
902         }
903     }
904 
encode(&self, e: &mut SM50Encoder<'_>)905     fn encode(&self, e: &mut SM50Encoder<'_>) {
906         // dfma doesn't have any abs flags.
907         assert!(!self.srcs[0].src_mod.has_fabs());
908         assert!(!self.srcs[1].src_mod.has_fabs());
909         assert!(!self.srcs[2].src_mod.has_fabs());
910 
911         // There is one fneg bit shared by the two fmul sources
912         let fneg_fmul =
913             self.srcs[0].src_mod.has_fneg() ^ self.srcs[1].src_mod.has_fneg();
914         let fneg_src2 = self.srcs[2].src_mod.has_fneg();
915 
916         match &self.srcs[2].src_ref {
917             SrcRef::Zero | SrcRef::Reg(_) => {
918                 match &self.srcs[1].src_ref {
919                     SrcRef::Zero | SrcRef::Reg(_) => {
920                         e.set_opcode(0x5b70);
921                         e.set_reg_src_ref(20..28, self.srcs[1].src_ref);
922                     }
923                     SrcRef::Imm32(imm32) => {
924                         e.set_opcode(0x3670);
925                         e.set_src_imm_f20(20..39, 56, *imm32);
926                     }
927                     SrcRef::CBuf(cb) => {
928                         e.set_opcode(0x4b70);
929                         e.set_src_cb(20..39, cb);
930                     }
931                     src => panic!("Invalid dfma src1: {src}"),
932                 }
933 
934                 e.set_reg_src_ref(39..47, self.srcs[2].src_ref);
935             }
936             SrcRef::CBuf(cb) => {
937                 e.set_opcode(0x5370);
938                 e.set_src_cb(20..39, cb);
939                 e.set_reg_src_ref(39..47, self.srcs[1].src_ref);
940             }
941             src => panic!("Invalid dfma src2: {src}"),
942         }
943 
944         e.set_dst(self.dst);
945         e.set_reg_src_ref(8..16, self.srcs[0].src_ref);
946 
947         e.set_bit(48, fneg_fmul);
948         e.set_bit(49, fneg_src2);
949 
950         e.set_rnd_mode(50..52, self.rnd_mode);
951     }
952 }
953 
954 impl SM50Op for OpDMnMx {
legalize(&mut self, b: &mut LegalizeBuilder)955     fn legalize(&mut self, b: &mut LegalizeBuilder) {
956         use RegFile::GPR;
957         let [src0, src1] = &mut self.srcs;
958         swap_srcs_if_not_reg(src0, src1, GPR);
959         b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64);
960         b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64);
961     }
962 
encode(&self, e: &mut SM50Encoder<'_>)963     fn encode(&self, e: &mut SM50Encoder<'_>) {
964         match &self.srcs[1].src_ref {
965             SrcRef::Zero | SrcRef::Reg(_) => {
966                 e.set_opcode(0x5c50);
967                 e.set_reg_fmod_src(20..28, 49, 45, self.srcs[1]);
968             }
969             SrcRef::Imm32(imm32) => {
970                 e.set_opcode(0x3850);
971                 e.set_src_imm_f20(20..39, 56, *imm32);
972                 assert!(self.srcs[1].src_mod.is_none());
973             }
974             SrcRef::CBuf(_) => {
975                 e.set_opcode(0x4c50);
976                 e.set_cb_fmod_src(20..39, 49, 45, self.srcs[1]);
977             }
978             src => panic!("Invalid dmnmx src1: {src}"),
979         }
980 
981         e.set_reg_fmod_src(8..16, 46, 48, self.srcs[0]);
982         e.set_dst(self.dst);
983         e.set_pred_src(39..42, 42, self.min);
984     }
985 }
986 
987 impl SM50Op for OpDMul {
legalize(&mut self, b: &mut LegalizeBuilder)988     fn legalize(&mut self, b: &mut LegalizeBuilder) {
989         use RegFile::GPR;
990         let [src0, src1] = &mut self.srcs;
991         b.copy_alu_src_if_fabs(src0, SrcType::F64);
992         b.copy_alu_src_if_fabs(src1, SrcType::F64);
993         swap_srcs_if_not_reg(src0, src1, GPR);
994         b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64);
995         b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64);
996     }
997 
encode(&self, e: &mut SM50Encoder<'_>)998     fn encode(&self, e: &mut SM50Encoder<'_>) {
999         assert!(!self.srcs[0].src_mod.has_fabs());
1000         assert!(!self.srcs[1].src_mod.has_fabs());
1001 
1002         // There is one fneg bit shared by both sources
1003         let fneg =
1004             self.srcs[0].src_mod.has_fneg() ^ self.srcs[1].src_mod.has_fneg();
1005 
1006         match &self.srcs[1].src_ref {
1007             SrcRef::Zero | SrcRef::Reg(_) => {
1008                 e.set_opcode(0x5c80);
1009                 e.set_reg_src_ref(20..28, self.srcs[1].src_ref);
1010             }
1011             SrcRef::Imm32(imm32) => {
1012                 e.set_opcode(0x3880);
1013                 e.set_src_imm_f20(20..39, 56, *imm32);
1014             }
1015             SrcRef::CBuf(cb) => {
1016                 e.set_opcode(0x4c80);
1017                 e.set_src_cb(20..39, cb);
1018             }
1019             src => panic!("Invalid dmul src1: {src}"),
1020         }
1021 
1022         e.set_dst(self.dst);
1023         e.set_reg_src_ref(8..16, self.srcs[0].src_ref);
1024 
1025         e.set_rnd_mode(39..41, self.rnd_mode);
1026         e.set_bit(48, fneg);
1027     }
1028 }
1029 
1030 impl SM50Op for OpDSetP {
legalize(&mut self, b: &mut LegalizeBuilder)1031     fn legalize(&mut self, b: &mut LegalizeBuilder) {
1032         use RegFile::GPR;
1033         let [src0, src1] = &mut self.srcs;
1034         if swap_srcs_if_not_reg(src0, src1, GPR) {
1035             self.cmp_op = self.cmp_op.flip();
1036         }
1037         b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64);
1038         b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64);
1039     }
1040 
encode(&self, e: &mut SM50Encoder<'_>)1041     fn encode(&self, e: &mut SM50Encoder<'_>) {
1042         match &self.srcs[1].src_ref {
1043             SrcRef::Zero | SrcRef::Reg(_) => {
1044                 e.set_opcode(0x5b80);
1045                 e.set_reg_fmod_src(20..28, 44, 6, self.srcs[1]);
1046             }
1047             SrcRef::Imm32(imm32) => {
1048                 e.set_opcode(0x3680);
1049                 e.set_src_imm_f20(20..39, 56, *imm32);
1050                 assert!(self.srcs[1].src_mod.is_none());
1051             }
1052             SrcRef::CBuf(_) => {
1053                 e.set_opcode(0x4b80);
1054                 e.set_reg_fmod_src(20..39, 44, 6, self.srcs[1]);
1055             }
1056             src => panic!("Invalid dsetp src1: {src}"),
1057         }
1058 
1059         e.set_pred_dst(3..6, self.dst);
1060         e.set_pred_dst(0..3, Dst::None); // dst1
1061         e.set_pred_src(39..42, 42, self.accum);
1062         e.set_pred_set_op(45..47, self.set_op);
1063         e.set_float_cmp_op(48..52, self.cmp_op);
1064         e.set_reg_fmod_src(8..16, 7, 43, self.srcs[0]);
1065     }
1066 }
1067 
1068 impl SM50Op for OpBfe {
legalize(&mut self, b: &mut LegalizeBuilder)1069     fn legalize(&mut self, b: &mut LegalizeBuilder) {
1070         use RegFile::GPR;
1071         b.copy_alu_src_if_not_reg(&mut self.base, GPR, SrcType::ALU);
1072     }
1073 
encode(&self, e: &mut SM50Encoder<'_>)1074     fn encode(&self, e: &mut SM50Encoder<'_>) {
1075         match &self.range.src_ref {
1076             SrcRef::Zero | SrcRef::Reg(_) => {
1077                 e.set_opcode(0x5c00);
1078                 e.set_reg_src(20..28, self.range);
1079             }
1080             SrcRef::Imm32(imm32) => {
1081                 e.set_opcode(0x3800);
1082                 // Only the bottom 16 bits of the immediate matter
1083                 e.set_src_imm_i20(20..39, 56, *imm32 & 0xffff);
1084             }
1085             SrcRef::CBuf(cbuf) => {
1086                 e.set_opcode(0x4c00);
1087                 e.set_src_cb(20..39, cbuf);
1088             }
1089             src => panic!("Invalid bfe range: {src}"),
1090         }
1091 
1092         if self.signed {
1093             e.set_bit(48, true);
1094         }
1095 
1096         if self.reverse {
1097             e.set_bit(40, true);
1098         }
1099 
1100         e.set_reg_src(8..16, self.base);
1101         e.set_dst(self.dst);
1102     }
1103 }
1104 
1105 impl SM50Op for OpFlo {
legalize(&mut self, b: &mut LegalizeBuilder)1106     fn legalize(&mut self, b: &mut LegalizeBuilder) {
1107         use RegFile::GPR;
1108         b.copy_alu_src_if_i20_overflow(&mut self.src, GPR, SrcType::ALU);
1109     }
1110 
encode(&self, e: &mut SM50Encoder<'_>)1111     fn encode(&self, e: &mut SM50Encoder<'_>) {
1112         match &self.src.src_ref {
1113             SrcRef::Zero | SrcRef::Reg(_) => {
1114                 e.set_opcode(0x5c30);
1115                 e.set_reg_src_ref(20..28, self.src.src_ref);
1116             }
1117             SrcRef::Imm32(imm32) => {
1118                 e.set_opcode(0x3830);
1119                 e.set_src_imm_i20(20..39, 56, *imm32);
1120                 assert!(self.src.src_mod.is_none());
1121             }
1122             SrcRef::CBuf(cb) => {
1123                 e.set_opcode(0x4c30);
1124                 e.set_src_cb(20..39, cb);
1125             }
1126             src => panic!("Invalid flo src: {src}"),
1127         }
1128 
1129         e.set_dst(self.dst);
1130         e.set_bit(40, self.src.src_mod.is_bnot());
1131         e.set_bit(48, self.signed);
1132         e.set_bit(41, self.return_shift_amount);
1133         e.set_bit(47, false); /* dst.CC */
1134     }
1135 }
1136 
1137 impl SM50Op for OpIAdd2 {
legalize(&mut self, b: &mut LegalizeBuilder)1138     fn legalize(&mut self, b: &mut LegalizeBuilder) {
1139         use RegFile::GPR;
1140         let [src0, src1] = &mut self.srcs;
1141         swap_srcs_if_not_reg(src0, src1, GPR);
1142         if src0.src_mod.is_ineg() && src1.src_mod.is_ineg() {
1143             assert!(self.carry_out.is_none());
1144             let val = b.alloc_ssa(GPR, 1);
1145             b.push_op(OpIAdd2 {
1146                 dst: val.into(),
1147                 carry_out: Dst::None,
1148                 srcs: [Src::new_zero(), *src0],
1149             });
1150             *src0 = val.into();
1151         }
1152         b.copy_alu_src_if_not_reg(src0, GPR, SrcType::I32);
1153         if !self.carry_out.is_none() {
1154             b.copy_alu_src_if_ineg_imm(src1, GPR, SrcType::I32);
1155         }
1156     }
1157 
encode(&self, e: &mut SM50Encoder<'_>)1158     fn encode(&self, e: &mut SM50Encoder<'_>) {
1159         // Hardware requires at least one of these be unmodified.  Otherwise, it
1160         // encodes as iadd.po which isn't what we want.
1161         assert!(
1162             self.srcs[0].src_mod.is_none() || self.srcs[1].src_mod.is_none()
1163         );
1164 
1165         let carry_out = match self.carry_out {
1166             Dst::Reg(reg) if reg.file() == RegFile::Carry => true,
1167             Dst::None => false,
1168             dst => panic!("Invalid iadd carry_out: {dst}"),
1169         };
1170 
1171         if let Some(imm32) = self.srcs[1].as_imm_not_i20() {
1172             e.set_opcode(0x1c00);
1173 
1174             e.set_dst(self.dst);
1175             e.set_reg_ineg_src(8..16, 56, self.srcs[0]);
1176             e.set_src_imm32(20..52, imm32);
1177 
1178             e.set_bit(52, carry_out);
1179             e.set_bit(53, false); // .X
1180         } else {
1181             match &self.srcs[1].src_ref {
1182                 SrcRef::Zero | SrcRef::Reg(_) => {
1183                     e.set_opcode(0x5c10);
1184                     e.set_reg_ineg_src(20..28, 48, self.srcs[1]);
1185                 }
1186                 SrcRef::Imm32(imm32) => {
1187                     e.set_opcode(0x3810);
1188                     e.set_src_imm_i20(20..39, 56, *imm32);
1189                     assert!(self.srcs[1].src_mod.is_none());
1190                 }
1191                 SrcRef::CBuf(_) => {
1192                     e.set_opcode(0x4c10);
1193                     e.set_cb_ineg_src(20..39, 48, self.srcs[1]);
1194                 }
1195                 src => panic!("Invalid iadd src1: {src}"),
1196             }
1197 
1198             e.set_dst(self.dst);
1199             e.set_reg_ineg_src(8..16, 49, self.srcs[0]);
1200 
1201             e.set_bit(43, false); // .X
1202             e.set_bit(47, carry_out);
1203         }
1204     }
1205 }
1206 
1207 impl SM50Op for OpIAdd2X {
legalize(&mut self, b: &mut LegalizeBuilder)1208     fn legalize(&mut self, b: &mut LegalizeBuilder) {
1209         use RegFile::GPR;
1210         let [src0, src1] = &mut self.srcs;
1211         swap_srcs_if_not_reg(src0, src1, GPR);
1212         b.copy_alu_src_if_not_reg(src0, GPR, SrcType::I32);
1213     }
1214 
encode(&self, e: &mut SM50Encoder<'_>)1215     fn encode(&self, e: &mut SM50Encoder<'_>) {
1216         match self.carry_in.src_ref {
1217             SrcRef::Reg(reg) if reg.file() == RegFile::Carry => (),
1218             src => panic!("Invalid iadd.x carry_in: {src}"),
1219         }
1220 
1221         let carry_out = match self.carry_out {
1222             Dst::Reg(reg) if reg.file() == RegFile::Carry => true,
1223             Dst::None => false,
1224             dst => panic!("Invalid iadd.x carry_out: {dst}"),
1225         };
1226 
1227         if let Some(imm32) = self.srcs[1].as_imm_not_i20() {
1228             e.set_opcode(0x1c00);
1229 
1230             e.set_dst(self.dst);
1231             e.set_reg_bnot_src(8..16, 56, self.srcs[0]);
1232             e.set_src_imm32(20..52, imm32);
1233 
1234             e.set_bit(52, carry_out);
1235             e.set_bit(53, true); // .X
1236         } else {
1237             match &self.srcs[1].src_ref {
1238                 SrcRef::Zero | SrcRef::Reg(_) => {
1239                     e.set_opcode(0x5c10);
1240                     e.set_reg_bnot_src(20..28, 48, self.srcs[1]);
1241                 }
1242                 SrcRef::Imm32(imm32) => {
1243                     e.set_opcode(0x3810);
1244                     e.set_src_imm_i20(20..39, 56, *imm32);
1245                     assert!(self.srcs[1].src_mod.is_none());
1246                 }
1247                 SrcRef::CBuf(_) => {
1248                     e.set_opcode(0x4c10);
1249                     e.set_cb_bnot_src(20..39, 48, self.srcs[1]);
1250                 }
1251                 src => panic!("Invalid iadd.x src1: {src}"),
1252             }
1253 
1254             e.set_dst(self.dst);
1255             e.set_reg_bnot_src(8..16, 49, self.srcs[0]);
1256 
1257             e.set_bit(43, true); // .X
1258             e.set_bit(47, carry_out);
1259         }
1260     }
1261 }
1262 
1263 impl SM50Op for OpIMad {
legalize(&mut self, b: &mut LegalizeBuilder)1264     fn legalize(&mut self, b: &mut LegalizeBuilder) {
1265         use RegFile::GPR;
1266         let [src0, src1, src2] = &mut self.srcs;
1267         swap_srcs_if_not_reg(src0, src1, GPR);
1268         b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU);
1269         b.copy_alu_src_if_i20_overflow(src1, GPR, SrcType::ALU);
1270         if src_is_reg(src1, GPR) {
1271             b.copy_alu_src_if_imm(src2, GPR, SrcType::ALU);
1272         } else {
1273             b.copy_alu_src_if_not_reg(src2, GPR, SrcType::ALU);
1274         }
1275     }
1276 
encode(&self, e: &mut SM50Encoder<'_>)1277     fn encode(&self, e: &mut SM50Encoder<'_>) {
1278         // There is one ineg bit shared by the two imul sources
1279         let ineg_imul =
1280             self.srcs[0].src_mod.is_ineg() ^ self.srcs[1].src_mod.is_ineg();
1281         let ineg_src2 = self.srcs[2].src_mod.is_ineg();
1282 
1283         match &self.srcs[2].src_ref {
1284             SrcRef::Zero | SrcRef::Reg(_) => {
1285                 match &self.srcs[1].src_ref {
1286                     SrcRef::Zero | SrcRef::Reg(_) => {
1287                         e.set_opcode(0x5a00);
1288                         e.set_reg_src_ref(20..28, self.srcs[1].src_ref);
1289                     }
1290                     SrcRef::Imm32(imm32) => {
1291                         e.set_opcode(0x3400);
1292                         e.set_src_imm_i20(20..39, 56, *imm32);
1293                     }
1294                     SrcRef::CBuf(cb) => {
1295                         e.set_opcode(0x4a00);
1296                         e.set_src_cb(20..39, cb);
1297                     }
1298                     src => panic!("Invalid imad src1: {src}"),
1299                 }
1300 
1301                 e.set_reg_src_ref(39..47, self.srcs[2].src_ref);
1302             }
1303             SrcRef::CBuf(cb) => {
1304                 e.set_opcode(0x5200);
1305                 e.set_src_cb(20..39, cb);
1306                 e.set_reg_src_ref(39..47, self.srcs[1].src_ref);
1307             }
1308             src => panic!("Invalid imad src2: {src}"),
1309         }
1310 
1311         e.set_dst(self.dst);
1312         e.set_reg_src(8..16, self.srcs[0]);
1313 
1314         e.set_bit(48, self.signed); // src0 signed
1315         e.set_bit(51, ineg_imul);
1316         e.set_bit(52, ineg_src2);
1317         e.set_bit(53, self.signed); // src1 signed
1318     }
1319 }
1320 
1321 impl SM50Op for OpIMul {
legalize(&mut self, b: &mut LegalizeBuilder)1322     fn legalize(&mut self, b: &mut LegalizeBuilder) {
1323         use RegFile::GPR;
1324         let [src0, src1] = &mut self.srcs;
1325         if swap_srcs_if_not_reg(src0, src1, GPR) {
1326             self.signed.swap(0, 1);
1327         }
1328         b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU);
1329     }
1330 
encode(&self, e: &mut SM50Encoder<'_>)1331     fn encode(&self, e: &mut SM50Encoder<'_>) {
1332         assert!(self.srcs[0].src_mod.is_none());
1333         assert!(self.srcs[1].src_mod.is_none());
1334 
1335         if let Some(i) = self.srcs[1].as_imm_not_i20() {
1336             e.set_opcode(0x1fc0);
1337             e.set_src_imm32(20..52, i);
1338 
1339             e.set_bit(53, self.high);
1340             e.set_bit(54, self.signed[0]);
1341             e.set_bit(55, self.signed[1]);
1342         } else {
1343             match &self.srcs[1].src_ref {
1344                 SrcRef::Zero | SrcRef::Reg(_) => {
1345                     e.set_opcode(0x5c38);
1346                     e.set_reg_src(20..28, self.srcs[1]);
1347                 }
1348                 SrcRef::Imm32(imm32) => {
1349                     e.set_opcode(0x3838);
1350                     e.set_src_imm_i20(20..39, 56, *imm32);
1351                 }
1352                 SrcRef::CBuf(cb) => {
1353                     e.set_opcode(0x4c38);
1354                     e.set_src_cb(20..39, cb);
1355                 }
1356                 src => panic!("Invalid imul src1: {src}"),
1357             };
1358 
1359             e.set_bit(39, self.high);
1360             e.set_bit(40, self.signed[0]);
1361             e.set_bit(41, self.signed[1]);
1362         }
1363 
1364         e.set_dst(self.dst);
1365         e.set_reg_src(8..16, self.srcs[0]);
1366     }
1367 }
1368 
1369 impl SM50Op for OpIMnMx {
legalize(&mut self, b: &mut LegalizeBuilder)1370     fn legalize(&mut self, b: &mut LegalizeBuilder) {
1371         use RegFile::GPR;
1372         let [src0, src1] = &mut self.srcs;
1373         swap_srcs_if_not_reg(src0, src1, GPR);
1374         b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU);
1375         b.copy_alu_src_if_i20_overflow(src1, GPR, SrcType::ALU);
1376     }
1377 
encode(&self, e: &mut SM50Encoder<'_>)1378     fn encode(&self, e: &mut SM50Encoder<'_>) {
1379         match &self.srcs[1].src_ref {
1380             SrcRef::Zero | SrcRef::Reg(_) => {
1381                 e.set_opcode(0x5c20);
1382                 e.set_reg_src(20..28, self.srcs[1]);
1383             }
1384             SrcRef::Imm32(imm32) => {
1385                 e.set_opcode(0x3820);
1386                 e.set_src_imm_i20(20..39, 56, *imm32);
1387                 assert!(self.srcs[1].src_mod.is_none());
1388             }
1389             SrcRef::CBuf(cb) => {
1390                 e.set_opcode(0x4c20);
1391                 e.set_src_cb(20..39, cb);
1392             }
1393             src => panic!("Invalid imnmx src1: {src}"),
1394         }
1395 
1396         e.set_dst(self.dst);
1397         e.set_reg_src(8..16, self.srcs[0]);
1398         e.set_pred_src(39..42, 42, self.min);
1399         e.set_bit(47, false); // .CC
1400         e.set_bit(
1401             48,
1402             match self.cmp_type {
1403                 IntCmpType::U32 => false,
1404                 IntCmpType::I32 => true,
1405             },
1406         );
1407     }
1408 }
1409 
1410 impl SM50Op for OpISetP {
legalize(&mut self, b: &mut LegalizeBuilder)1411     fn legalize(&mut self, b: &mut LegalizeBuilder) {
1412         use RegFile::GPR;
1413         let [src0, src1] = &mut self.srcs;
1414         if swap_srcs_if_not_reg(src0, src1, GPR) {
1415             self.cmp_op = self.cmp_op.flip();
1416         }
1417         b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU);
1418         b.copy_alu_src_if_i20_overflow(src1, GPR, SrcType::ALU);
1419     }
1420 
encode(&self, e: &mut SM50Encoder<'_>)1421     fn encode(&self, e: &mut SM50Encoder<'_>) {
1422         match &self.srcs[1].src_ref {
1423             SrcRef::Zero | SrcRef::Reg(_) => {
1424                 e.set_opcode(0x5b60);
1425                 e.set_reg_src(20..28, self.srcs[1]);
1426             }
1427             SrcRef::Imm32(imm32) => {
1428                 e.set_opcode(0x3660);
1429                 e.set_src_imm_i20(20..39, 56, *imm32);
1430                 assert!(self.srcs[1].src_mod.is_none());
1431             }
1432             SrcRef::CBuf(cb) => {
1433                 e.set_opcode(0x4b60);
1434                 e.set_src_cb(20..39, cb);
1435             }
1436             src => panic!("Invalid isetp src1: {src}"),
1437         }
1438 
1439         e.set_pred_dst(0..3, Dst::None); // dst1
1440         e.set_pred_dst(3..6, self.dst);
1441         e.set_reg_src(8..16, self.srcs[0]);
1442         e.set_pred_src(39..42, 42, self.accum);
1443 
1444         // isetp.x seems to take the accumulator into account and we don't fully
1445         // understand how.  Until we do, disallow it.
1446         assert!(!self.ex);
1447         e.set_bit(43, self.ex);
1448         e.set_pred_set_op(45..47, self.set_op);
1449 
1450         e.set_field(
1451             48..49,
1452             match self.cmp_type {
1453                 IntCmpType::U32 => 0_u32,
1454                 IntCmpType::I32 => 1_u32,
1455             },
1456         );
1457         e.set_int_cmp_op(49..52, self.cmp_op);
1458     }
1459 }
1460 
1461 impl SM50Op for OpLop2 {
legalize(&mut self, b: &mut LegalizeBuilder)1462     fn legalize(&mut self, b: &mut LegalizeBuilder) {
1463         use RegFile::GPR;
1464         let [src0, src1] = &mut self.srcs;
1465         match self.op {
1466             LogicOp2::PassB => {
1467                 *src0 = 0.into();
1468                 b.copy_alu_src_if_i20_overflow(src1, GPR, SrcType::ALU);
1469             }
1470             LogicOp2::And | LogicOp2::Or | LogicOp2::Xor => {
1471                 swap_srcs_if_not_reg(src0, src1, GPR);
1472                 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU);
1473             }
1474         }
1475     }
1476 
encode(&self, e: &mut SM50Encoder<'_>)1477     fn encode(&self, e: &mut SM50Encoder<'_>) {
1478         if let Some(imm32) = self.srcs[1].as_imm_not_i20() {
1479             e.set_opcode(0x0400);
1480 
1481             e.set_dst(self.dst);
1482             e.set_reg_bnot_src(8..16, 55, self.srcs[0]);
1483             e.set_src_imm32(20..52, imm32);
1484             e.set_field(
1485                 53..55,
1486                 match self.op {
1487                     LogicOp2::And => 0_u8,
1488                     LogicOp2::Or => 1_u8,
1489                     LogicOp2::Xor => 2_u8,
1490                     LogicOp2::PassB => {
1491                         panic!("PASS_B is not supported for LOP32I");
1492                     }
1493                 },
1494             );
1495             e.set_bit(56, self.srcs[1].src_mod.is_bnot());
1496         } else {
1497             match &self.srcs[1].src_ref {
1498                 SrcRef::Zero | SrcRef::Reg(_) => {
1499                     e.set_opcode(0x5c40);
1500                     e.set_reg_bnot_src(20..28, 40, self.srcs[1]);
1501                 }
1502                 SrcRef::Imm32(imm32) => {
1503                     e.set_opcode(0x3840);
1504                     e.set_src_imm_i20(20..39, 56, *imm32);
1505                     assert!(self.srcs[1].src_mod.is_none());
1506                 }
1507                 SrcRef::CBuf(_) => {
1508                     e.set_opcode(0x4c40);
1509                     e.set_cb_bnot_src(20..39, 40, self.srcs[1]);
1510                 }
1511                 src => panic!("Invalid lop2 src1: {src}"),
1512             }
1513 
1514             e.set_dst(self.dst);
1515             e.set_reg_bnot_src(8..16, 39, self.srcs[0]);
1516 
1517             e.set_field(
1518                 41..43,
1519                 match self.op {
1520                     LogicOp2::And => 0_u8,
1521                     LogicOp2::Or => 1_u8,
1522                     LogicOp2::Xor => 2_u8,
1523                     LogicOp2::PassB => 3_u8,
1524                 },
1525             );
1526 
1527             e.set_pred_dst(48..51, Dst::None);
1528         }
1529     }
1530 }
1531 
1532 impl SM50Op for OpPopC {
legalize(&mut self, b: &mut LegalizeBuilder)1533     fn legalize(&mut self, b: &mut LegalizeBuilder) {
1534         use RegFile::GPR;
1535         b.copy_alu_src_if_i20_overflow(&mut self.src, GPR, SrcType::ALU);
1536     }
1537 
encode(&self, e: &mut SM50Encoder<'_>)1538     fn encode(&self, e: &mut SM50Encoder<'_>) {
1539         match &self.src.src_ref {
1540             SrcRef::Zero | SrcRef::Reg(_) => {
1541                 e.set_opcode(0x5c08);
1542                 e.set_reg_bnot_src(20..28, 40, self.src);
1543             }
1544             SrcRef::Imm32(imm32) => {
1545                 e.set_opcode(0x3808);
1546                 e.set_src_imm_i20(20..39, 56, *imm32);
1547                 e.set_bit(40, self.src.src_mod.is_bnot());
1548             }
1549             SrcRef::CBuf(_) => {
1550                 e.set_opcode(0x4c08);
1551                 e.set_cb_bnot_src(20..39, 40, self.src);
1552             }
1553             src => panic!("Invalid popc src1: {src}"),
1554         }
1555 
1556         e.set_dst(self.dst);
1557     }
1558 }
1559 
1560 impl SM50Op for OpShf {
legalize(&mut self, b: &mut LegalizeBuilder)1561     fn legalize(&mut self, b: &mut LegalizeBuilder) {
1562         use RegFile::GPR;
1563         b.copy_alu_src_if_not_reg(&mut self.high, GPR, SrcType::ALU);
1564         b.copy_alu_src_if_not_reg(&mut self.low, GPR, SrcType::GPR);
1565         b.copy_alu_src_if_not_reg_or_imm(&mut self.shift, GPR, SrcType::GPR);
1566         b.copy_alu_src_if_i20_overflow(&mut self.shift, GPR, SrcType::GPR);
1567     }
1568 
encode(&self, e: &mut SM50Encoder<'_>)1569     fn encode(&self, e: &mut SM50Encoder<'_>) {
1570         match &self.shift.src_ref {
1571             SrcRef::Zero | SrcRef::Reg(_) => {
1572                 e.set_opcode(if self.right { 0x5cf8 } else { 0x5bf8 });
1573                 e.set_reg_src(20..28, self.shift);
1574             }
1575             SrcRef::Imm32(imm32) => {
1576                 e.set_opcode(if self.right { 0x38f8 } else { 0x36f8 });
1577                 e.set_src_imm_i20(20..39, 56, *imm32);
1578                 assert!(self.shift.src_mod.is_none());
1579             }
1580             src => panic!("Invalid shf shift: {src}"),
1581         }
1582 
1583         e.set_field(
1584             37..39,
1585             match self.data_type {
1586                 IntType::I32 => 0_u8,
1587                 IntType::U32 => 0_u8,
1588                 IntType::U64 => 2_u8,
1589                 IntType::I64 => 3_u8,
1590                 _ => panic!("Invalid shift data type"),
1591             },
1592         );
1593 
1594         e.set_dst(self.dst);
1595         e.set_reg_src(8..16, self.low);
1596         e.set_reg_src(39..47, self.high);
1597 
1598         e.set_bit(47, false); // .CC
1599 
1600         // If we're shifting left, the HW will throw an illegal instrucction
1601         // encoding error if we set .high and will give us the high part anyway
1602         // if we don't.  This makes everything a bit more consistent.
1603         assert!(self.right || self.dst_high);
1604         e.set_bit(48, self.dst_high && self.right); // .high
1605 
1606         e.set_bit(49, false); // .X
1607         e.set_bit(50, self.wrap);
1608     }
1609 }
1610 
1611 impl SM50Op for OpShl {
legalize(&mut self, b: &mut LegalizeBuilder)1612     fn legalize(&mut self, b: &mut LegalizeBuilder) {
1613         use RegFile::GPR;
1614         b.copy_alu_src_if_not_reg(&mut self.src, GPR, SrcType::GPR);
1615         b.copy_alu_src_if_i20_overflow(&mut self.shift, GPR, SrcType::ALU);
1616     }
1617 
encode(&self, e: &mut SM50Encoder<'_>)1618     fn encode(&self, e: &mut SM50Encoder<'_>) {
1619         e.set_dst(self.dst);
1620         e.set_reg_src(8..16, self.src);
1621         match &self.shift.src_ref {
1622             SrcRef::Zero | SrcRef::Reg(_) => {
1623                 e.set_opcode(0x5c48);
1624                 e.set_reg_src(20..28, self.shift);
1625             }
1626             SrcRef::Imm32(imm32) => {
1627                 e.set_opcode(0x3848);
1628                 e.set_src_imm_i20(20..39, 56, *imm32);
1629             }
1630             SrcRef::CBuf(cb) => {
1631                 e.set_opcode(0x4c48);
1632                 e.set_src_cb(20..39, cb);
1633             }
1634             src => panic!("Invalid shl shift: {src}"),
1635         }
1636 
1637         e.set_bit(39, self.wrap);
1638     }
1639 }
1640 
1641 impl SM50Op for OpShr {
legalize(&mut self, b: &mut LegalizeBuilder)1642     fn legalize(&mut self, b: &mut LegalizeBuilder) {
1643         use RegFile::GPR;
1644         b.copy_alu_src_if_not_reg(&mut self.src, GPR, SrcType::GPR);
1645         b.copy_alu_src_if_i20_overflow(&mut self.shift, GPR, SrcType::ALU);
1646     }
1647 
encode(&self, e: &mut SM50Encoder<'_>)1648     fn encode(&self, e: &mut SM50Encoder<'_>) {
1649         e.set_dst(self.dst);
1650         e.set_reg_src(8..16, self.src);
1651         match &self.shift.src_ref {
1652             SrcRef::Zero | SrcRef::Reg(_) => {
1653                 e.set_opcode(0x5c28);
1654                 e.set_reg_src(20..28, self.shift);
1655             }
1656             SrcRef::Imm32(imm32) => {
1657                 e.set_opcode(0x3828);
1658                 e.set_src_imm_i20(20..39, 56, *imm32);
1659             }
1660             SrcRef::CBuf(cb) => {
1661                 e.set_opcode(0x4c28);
1662                 e.set_src_cb(20..39, cb);
1663             }
1664             src => panic!("Invalid shr shift: {src}"),
1665         }
1666 
1667         e.set_bit(39, self.wrap);
1668         e.set_bit(48, self.signed);
1669     }
1670 }
1671 
1672 impl SM50Op for OpF2F {
legalize(&mut self, b: &mut LegalizeBuilder)1673     fn legalize(&mut self, b: &mut LegalizeBuilder) {
1674         use RegFile::GPR;
1675         b.copy_alu_src_if_f20_overflow(&mut self.src, GPR, SrcType::ALU);
1676     }
1677 
encode(&self, e: &mut SM50Encoder<'_>)1678     fn encode(&self, e: &mut SM50Encoder<'_>) {
1679         match &self.src.src_ref {
1680             SrcRef::Zero | SrcRef::Reg(_) => {
1681                 e.set_opcode(0x5ca8);
1682                 e.set_reg_fmod_src(20..28, 49, 45, self.src);
1683             }
1684             SrcRef::Imm32(imm32) => {
1685                 e.set_opcode(0x38a8);
1686                 e.set_src_imm_i20(20..39, 56, *imm32);
1687                 assert!(self.src.src_mod.is_none());
1688             }
1689             SrcRef::CBuf(_) => {
1690                 e.set_opcode(0x4ca8);
1691                 e.set_cb_fmod_src(20..39, 49, 45, self.src);
1692             }
1693             src => panic!("Invalid f2f src: {src}"),
1694         }
1695 
1696         // We can't span 32 bits
1697         assert!(
1698             (self.dst_type.bits() <= 32 && self.src_type.bits() <= 32)
1699                 || (self.dst_type.bits() >= 32 && self.src_type.bits() >= 32)
1700         );
1701         e.set_field(8..10, (self.dst_type.bits() / 8).ilog2());
1702         e.set_field(10..12, (self.src_type.bits() / 8).ilog2());
1703 
1704         e.set_rnd_mode(39..41, self.rnd_mode);
1705         e.set_bit(41, self.high);
1706         e.set_bit(42, self.integer_rnd);
1707         e.set_bit(44, self.ftz);
1708         e.set_bit(50, false); // saturate
1709 
1710         e.set_dst(self.dst);
1711     }
1712 }
1713 
1714 impl SM50Op for OpF2I {
legalize(&mut self, b: &mut LegalizeBuilder)1715     fn legalize(&mut self, b: &mut LegalizeBuilder) {
1716         use RegFile::GPR;
1717         b.copy_alu_src_if_f20_overflow(&mut self.src, GPR, SrcType::ALU);
1718     }
1719 
encode(&self, e: &mut SM50Encoder<'_>)1720     fn encode(&self, e: &mut SM50Encoder<'_>) {
1721         match &self.src.src_ref {
1722             SrcRef::Zero | SrcRef::Reg(_) => {
1723                 e.set_opcode(0x5cb0);
1724                 e.set_reg_fmod_src(20..28, 49, 45, self.src);
1725             }
1726             SrcRef::Imm32(imm32) => {
1727                 e.set_opcode(0x38b0);
1728                 e.set_src_imm_f20(20..39, 56, *imm32);
1729                 assert!(self.src.src_mod.is_none());
1730             }
1731             SrcRef::CBuf(_) => {
1732                 e.set_opcode(0x4cb0);
1733                 e.set_cb_fmod_src(20..39, 49, 45, self.src);
1734             }
1735             src => panic!("Invalid f2i src: {src}"),
1736         }
1737 
1738         e.set_dst(self.dst);
1739 
1740         // We can't span 32 bits
1741         assert!(
1742             (self.dst_type.bits() <= 32 && self.src_type.bits() <= 32)
1743                 || (self.dst_type.bits() >= 32 && self.src_type.bits() >= 32)
1744         );
1745         e.set_field(8..10, (self.dst_type.bits() / 8).ilog2());
1746         e.set_field(10..12, (self.src_type.bits() / 8).ilog2());
1747         e.set_bit(12, self.dst_type.is_signed());
1748 
1749         e.set_rnd_mode(39..41, self.rnd_mode);
1750         e.set_bit(44, self.ftz);
1751         e.set_bit(47, false); // .CC
1752     }
1753 }
1754 
1755 impl SM50Op for OpI2F {
legalize(&mut self, b: &mut LegalizeBuilder)1756     fn legalize(&mut self, b: &mut LegalizeBuilder) {
1757         use RegFile::GPR;
1758         b.copy_alu_src_if_i20_overflow(&mut self.src, GPR, SrcType::ALU);
1759     }
1760 
encode(&self, e: &mut SM50Encoder<'_>)1761     fn encode(&self, e: &mut SM50Encoder<'_>) {
1762         match &self.src.src_ref {
1763             SrcRef::Zero | SrcRef::Reg(_) => {
1764                 e.set_opcode(0x5cb8);
1765                 e.set_reg_ineg_src(20..28, 45, self.src);
1766             }
1767             SrcRef::Imm32(imm32) => {
1768                 e.set_opcode(0x38b8);
1769                 e.set_src_imm_i20(20..39, 56, *imm32);
1770                 assert!(self.src.src_mod.is_none());
1771             }
1772             SrcRef::CBuf(_) => {
1773                 e.set_opcode(0x4cb8);
1774                 e.set_cb_ineg_src(20..39, 45, self.src);
1775             }
1776             src => panic!("Invalid i2f src: {src}"),
1777         }
1778 
1779         e.set_dst(self.dst);
1780 
1781         // We can't span 32 bits
1782         assert!(
1783             (self.dst_type.bits() <= 32 && self.src_type.bits() <= 32)
1784                 || (self.dst_type.bits() >= 32 && self.src_type.bits() >= 32)
1785         );
1786         e.set_field(8..10, (self.dst_type.bits() / 8).ilog2());
1787         e.set_field(10..12, (self.src_type.bits() / 8).ilog2());
1788         e.set_bit(13, self.src_type.is_signed());
1789 
1790         e.set_rnd_mode(39..41, self.rnd_mode);
1791         e.set_field(41..43, 0_u8); // TODO: subop
1792         e.set_bit(49, false); // iabs
1793     }
1794 }
1795 
1796 impl SM50Op for OpI2I {
legalize(&mut self, b: &mut LegalizeBuilder)1797     fn legalize(&mut self, b: &mut LegalizeBuilder) {
1798         use RegFile::GPR;
1799         b.copy_alu_src_if_i20_overflow(&mut self.src, GPR, SrcType::ALU);
1800     }
1801 
encode(&self, e: &mut SM50Encoder<'_>)1802     fn encode(&self, e: &mut SM50Encoder<'_>) {
1803         match &self.src.src_ref {
1804             SrcRef::Zero | SrcRef::Reg(_) => {
1805                 e.set_opcode(0x5ce0);
1806                 e.set_reg_src(20..28, self.src);
1807             }
1808             SrcRef::Imm32(imm32) => {
1809                 e.set_opcode(0x38e0);
1810                 e.set_src_imm_i20(20..39, 56, *imm32);
1811             }
1812             SrcRef::CBuf(cbuf) => {
1813                 e.set_opcode(0x4ce0);
1814                 e.set_src_cb(20..39, cbuf);
1815             }
1816             src => panic!("Invalid i2i src: {src}"),
1817         }
1818 
1819         e.set_dst(self.dst);
1820 
1821         // We can't span 32 bits
1822         assert!(
1823             (self.dst_type.bits() <= 32 && self.src_type.bits() <= 32)
1824                 || (self.dst_type.bits() >= 32 && self.src_type.bits() >= 32)
1825         );
1826         e.set_field(8..10, (self.dst_type.bits() / 8).ilog2());
1827         e.set_field(10..12, (self.src_type.bits() / 8).ilog2());
1828         e.set_bit(12, self.dst_type.is_signed());
1829         e.set_bit(13, self.src_type.is_signed());
1830 
1831         e.set_field(41..43, 0u8); // src.B1-3
1832         e.set_bit(45, self.neg);
1833         e.set_bit(47, false); // dst.CC
1834         e.set_bit(49, self.abs);
1835         e.set_bit(50, self.saturate);
1836     }
1837 }
1838 
1839 impl SM50Op for OpMov {
legalize(&mut self, _b: &mut LegalizeBuilder)1840     fn legalize(&mut self, _b: &mut LegalizeBuilder) {
1841         // Nothing to do
1842     }
1843 
encode(&self, e: &mut SM50Encoder<'_>)1844     fn encode(&self, e: &mut SM50Encoder<'_>) {
1845         match &self.src.src_ref {
1846             SrcRef::Zero | SrcRef::Reg(_) => {
1847                 e.set_opcode(0x5c98);
1848                 e.set_reg_src(20..28, self.src);
1849                 e.set_field(39..43, self.quad_lanes);
1850             }
1851             SrcRef::Imm32(imm32) => {
1852                 e.set_opcode(0x0100);
1853                 e.set_src_imm32(20..52, *imm32);
1854                 e.set_field(12..16, self.quad_lanes);
1855             }
1856             SrcRef::CBuf(cb) => {
1857                 e.set_opcode(0x4c98);
1858                 e.set_src_cb(20..39, cb);
1859                 e.set_field(39..43, self.quad_lanes);
1860             }
1861             src => panic!("Invalid mov src: {src}"),
1862         }
1863 
1864         e.set_dst(self.dst);
1865     }
1866 }
1867 
1868 impl SM50Op for OpPrmt {
legalize(&mut self, b: &mut LegalizeBuilder)1869     fn legalize(&mut self, b: &mut LegalizeBuilder) {
1870         use RegFile::GPR;
1871         b.copy_alu_src_if_not_reg(&mut self.srcs[0], GPR, SrcType::GPR);
1872         b.copy_alu_src_if_not_reg(&mut self.srcs[1], GPR, SrcType::GPR);
1873     }
1874 
encode(&self, e: &mut SM50Encoder<'_>)1875     fn encode(&self, e: &mut SM50Encoder<'_>) {
1876         match &self.sel.src_ref {
1877             SrcRef::Zero | SrcRef::Reg(_) => {
1878                 e.set_opcode(0x5bc0);
1879                 e.set_reg_src(20..28, self.sel);
1880             }
1881             SrcRef::Imm32(imm32) => {
1882                 e.set_opcode(0x36c0);
1883                 // Only the bottom 16 bits matter
1884                 e.set_src_imm_i20(20..39, 56, *imm32 & 0xffff);
1885             }
1886             SrcRef::CBuf(cb) => {
1887                 e.set_opcode(0x4bc0);
1888                 e.set_src_cb(20..39, cb);
1889             }
1890             src => panic!("Invalid prmt selector: {src}"),
1891         }
1892 
1893         e.set_dst(self.dst);
1894         e.set_reg_src(8..16, self.srcs[0]);
1895         e.set_reg_src(39..47, self.srcs[1]);
1896         e.set_field(
1897             48..51,
1898             match self.mode {
1899                 PrmtMode::Index => 0_u8,
1900                 PrmtMode::Forward4Extract => 1_u8,
1901                 PrmtMode::Backward4Extract => 2_u8,
1902                 PrmtMode::Replicate8 => 3_u8,
1903                 PrmtMode::EdgeClampLeft => 4_u8,
1904                 PrmtMode::EdgeClampRight => 5_u8,
1905                 PrmtMode::Replicate16 => 6_u8,
1906             },
1907         );
1908     }
1909 }
1910 
1911 impl SM50Op for OpSel {
legalize(&mut self, b: &mut LegalizeBuilder)1912     fn legalize(&mut self, b: &mut LegalizeBuilder) {
1913         use RegFile::GPR;
1914         let [src0, src1] = &mut self.srcs;
1915         if swap_srcs_if_not_reg(src0, src1, GPR) {
1916             self.cond = self.cond.bnot();
1917         }
1918         b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU);
1919         b.copy_alu_src_if_i20_overflow(src1, GPR, SrcType::ALU);
1920     }
1921 
encode(&self, e: &mut SM50Encoder<'_>)1922     fn encode(&self, e: &mut SM50Encoder<'_>) {
1923         match &self.srcs[1].src_ref {
1924             SrcRef::Zero | SrcRef::Reg(_) => {
1925                 e.set_opcode(0x5ca0);
1926                 e.set_reg_src_ref(20..28, self.srcs[1].src_ref);
1927             }
1928             SrcRef::Imm32(imm32) => {
1929                 e.set_opcode(0x38a0);
1930                 e.set_src_imm_i20(20..39, 56, *imm32);
1931             }
1932             SrcRef::CBuf(cbuf) => {
1933                 e.set_opcode(0x4ca0);
1934                 e.set_src_cb(20..39, cbuf);
1935             }
1936             src => panic!("Invalid sel src1: {src}"),
1937         }
1938 
1939         e.set_dst(self.dst);
1940         e.set_reg_src(8..16, self.srcs[0]);
1941         e.set_pred_src(39..42, 42, self.cond);
1942     }
1943 }
1944 
1945 impl SM50Op for OpShfl {
legalize(&mut self, b: &mut LegalizeBuilder)1946     fn legalize(&mut self, b: &mut LegalizeBuilder) {
1947         use RegFile::GPR;
1948         b.copy_alu_src_if_not_reg(&mut self.src, GPR, SrcType::GPR);
1949         b.copy_alu_src_if_not_reg_or_imm(&mut self.lane, GPR, SrcType::ALU);
1950         b.copy_alu_src_if_not_reg_or_imm(&mut self.c, GPR, SrcType::ALU);
1951     }
1952 
encode(&self, e: &mut SM50Encoder<'_>)1953     fn encode(&self, e: &mut SM50Encoder<'_>) {
1954         e.set_opcode(0xef10);
1955 
1956         e.set_dst(self.dst);
1957         e.set_pred_dst(48..51, self.in_bounds);
1958         e.set_reg_src(8..16, self.src);
1959 
1960         match &self.lane.src_ref {
1961             SrcRef::Zero | SrcRef::Reg(_) => {
1962                 e.set_bit(28, false);
1963                 e.set_reg_src(20..28, self.lane);
1964             }
1965             SrcRef::Imm32(imm32) => {
1966                 e.set_bit(28, true);
1967                 e.set_field(20..25, *imm32 & 0x1f);
1968             }
1969             src => panic!("Invalid shfl lane: {src}"),
1970         }
1971         match &self.c.src_ref {
1972             SrcRef::Zero | SrcRef::Reg(_) => {
1973                 e.set_bit(29, false);
1974                 e.set_reg_src(39..47, self.c);
1975             }
1976             SrcRef::Imm32(imm32) => {
1977                 e.set_bit(29, true);
1978                 e.set_field(34..47, *imm32 & 0x1f1f);
1979             }
1980             src => panic!("Invalid shfl c: {src}"),
1981         }
1982 
1983         e.set_field(
1984             30..32,
1985             match self.op {
1986                 ShflOp::Idx => 0u8,
1987                 ShflOp::Up => 1u8,
1988                 ShflOp::Down => 2u8,
1989                 ShflOp::Bfly => 3u8,
1990             },
1991         );
1992     }
1993 }
1994 
1995 impl SM50Op for OpPSetP {
legalize(&mut self, _b: &mut LegalizeBuilder)1996     fn legalize(&mut self, _b: &mut LegalizeBuilder) {
1997         // Nothing to do
1998     }
1999 
encode(&self, e: &mut SM50Encoder<'_>)2000     fn encode(&self, e: &mut SM50Encoder<'_>) {
2001         e.set_opcode(0x5090);
2002 
2003         e.set_pred_dst(3..6, self.dsts[0]);
2004         e.set_pred_dst(0..3, self.dsts[1]);
2005 
2006         e.set_pred_src(12..15, 15, self.srcs[0]);
2007         e.set_pred_src(29..32, 32, self.srcs[1]);
2008         e.set_pred_src(39..42, 42, self.srcs[2]);
2009 
2010         e.set_pred_set_op(24..26, self.ops[0]);
2011         e.set_pred_set_op(45..47, self.ops[1]);
2012     }
2013 }
2014 
2015 impl SM50Encoder<'_> {
set_tex_dim(&mut self, range: Range<usize>, dim: TexDim)2016     fn set_tex_dim(&mut self, range: Range<usize>, dim: TexDim) {
2017         assert!(range.len() == 3);
2018         self.set_field(
2019             range,
2020             match dim {
2021                 TexDim::_1D => 0_u8,
2022                 TexDim::Array1D => 1_u8,
2023                 TexDim::_2D => 2_u8,
2024                 TexDim::Array2D => 3_u8,
2025                 TexDim::_3D => 4_u8,
2026                 TexDim::Cube => 6_u8,
2027                 TexDim::ArrayCube => 7_u8,
2028             },
2029         );
2030     }
2031 
set_tex_lod_mode(&mut self, range: Range<usize>, lod_mode: TexLodMode)2032     fn set_tex_lod_mode(&mut self, range: Range<usize>, lod_mode: TexLodMode) {
2033         assert!(range.len() == 2);
2034         self.set_field(
2035             range,
2036             match lod_mode {
2037                 TexLodMode::Auto => 0_u8,
2038                 TexLodMode::Zero => 1_u8,
2039                 TexLodMode::Bias => 2_u8,
2040                 TexLodMode::Lod => 3_u8,
2041                 _ => panic!("Unknown LOD mode"),
2042             },
2043         );
2044     }
2045 }
2046 
2047 impl SM50Op for OpTex {
legalize(&mut self, b: &mut LegalizeBuilder)2048     fn legalize(&mut self, b: &mut LegalizeBuilder) {
2049         legalize_ext_instr(self, b);
2050     }
2051 
encode(&self, e: &mut SM50Encoder<'_>)2052     fn encode(&self, e: &mut SM50Encoder<'_>) {
2053         e.set_opcode(0xdeb8);
2054 
2055         e.set_dst(self.dsts[0]);
2056         assert!(self.dsts[1].is_none());
2057         assert!(self.fault.is_none());
2058         e.set_reg_src(8..16, self.srcs[0]);
2059         e.set_reg_src(20..28, self.srcs[1]);
2060 
2061         e.set_tex_dim(28..31, self.dim);
2062         e.set_field(31..35, self.mask);
2063         e.set_bit(35, false); // ToDo: NDV
2064         e.set_bit(36, self.offset);
2065         e.set_tex_lod_mode(37..39, self.lod_mode);
2066         e.set_bit(49, false); // TODO: .NODEP
2067         e.set_bit(50, self.z_cmpr);
2068     }
2069 }
2070 
2071 impl SM50Op for OpTld {
legalize(&mut self, b: &mut LegalizeBuilder)2072     fn legalize(&mut self, b: &mut LegalizeBuilder) {
2073         legalize_ext_instr(self, b);
2074     }
2075 
encode(&self, e: &mut SM50Encoder<'_>)2076     fn encode(&self, e: &mut SM50Encoder<'_>) {
2077         e.set_opcode(0xdd38);
2078 
2079         e.set_dst(self.dsts[0]);
2080         assert!(self.dsts[1].is_none());
2081         assert!(self.fault.is_none());
2082         e.set_reg_src(8..16, self.srcs[0]);
2083         e.set_reg_src(20..28, self.srcs[1]);
2084 
2085         e.set_tex_dim(28..31, self.dim);
2086         e.set_field(31..35, self.mask);
2087         e.set_bit(35, self.offset);
2088         e.set_bit(49, false); // TODO: .NODEP
2089         e.set_bit(50, self.is_ms);
2090 
2091         assert!(
2092             self.lod_mode == TexLodMode::Zero
2093                 || self.lod_mode == TexLodMode::Lod
2094         );
2095         e.set_bit(55, self.lod_mode == TexLodMode::Lod);
2096     }
2097 }
2098 
2099 impl SM50Op for OpTld4 {
legalize(&mut self, b: &mut LegalizeBuilder)2100     fn legalize(&mut self, b: &mut LegalizeBuilder) {
2101         legalize_ext_instr(self, b);
2102     }
2103 
encode(&self, e: &mut SM50Encoder<'_>)2104     fn encode(&self, e: &mut SM50Encoder<'_>) {
2105         e.set_opcode(0xdef8);
2106 
2107         e.set_dst(self.dsts[0]);
2108         assert!(self.dsts[1].is_none());
2109         assert!(self.fault.is_none());
2110         e.set_reg_src(8..16, self.srcs[0]);
2111         e.set_reg_src(20..28, self.srcs[1]);
2112 
2113         e.set_tex_dim(28..31, self.dim);
2114         e.set_field(31..35, self.mask);
2115         e.set_bit(35, false); // ToDo: NDV
2116         e.set_field(
2117             36..38,
2118             match self.offset_mode {
2119                 Tld4OffsetMode::None => 0_u8,
2120                 Tld4OffsetMode::AddOffI => 1_u8,
2121                 Tld4OffsetMode::PerPx => 2_u8,
2122             },
2123         );
2124         e.set_field(38..40, self.comp);
2125         e.set_bit(49, false); // TODO: .NODEP
2126         e.set_bit(50, self.z_cmpr);
2127     }
2128 }
2129 
2130 impl SM50Op for OpTmml {
legalize(&mut self, b: &mut LegalizeBuilder)2131     fn legalize(&mut self, b: &mut LegalizeBuilder) {
2132         legalize_ext_instr(self, b);
2133     }
2134 
encode(&self, e: &mut SM50Encoder<'_>)2135     fn encode(&self, e: &mut SM50Encoder<'_>) {
2136         e.set_opcode(0xdf60);
2137 
2138         e.set_dst(self.dsts[0]);
2139         assert!(self.dsts[1].is_none());
2140         e.set_reg_src(8..16, self.srcs[0]);
2141         e.set_reg_src(20..28, self.srcs[1]);
2142 
2143         e.set_tex_dim(28..31, self.dim);
2144         e.set_field(31..35, self.mask);
2145         e.set_bit(35, false); // ToDo: NDV
2146         e.set_bit(49, false); // TODO: .NODEP
2147     }
2148 }
2149 
2150 impl SM50Op for OpTxd {
legalize(&mut self, b: &mut LegalizeBuilder)2151     fn legalize(&mut self, b: &mut LegalizeBuilder) {
2152         legalize_ext_instr(self, b);
2153     }
2154 
encode(&self, e: &mut SM50Encoder<'_>)2155     fn encode(&self, e: &mut SM50Encoder<'_>) {
2156         e.set_opcode(0xde78);
2157 
2158         e.set_dst(self.dsts[0]);
2159         assert!(self.dsts[1].is_none());
2160         assert!(self.fault.is_none());
2161         e.set_reg_src(8..16, self.srcs[0]);
2162         e.set_reg_src(20..28, self.srcs[1]);
2163 
2164         e.set_tex_dim(28..31, self.dim);
2165         e.set_field(31..35, self.mask);
2166         e.set_bit(35, self.offset);
2167         e.set_bit(49, false); // TODO: .NODEP
2168     }
2169 }
2170 
2171 impl SM50Op for OpTxq {
legalize(&mut self, b: &mut LegalizeBuilder)2172     fn legalize(&mut self, b: &mut LegalizeBuilder) {
2173         legalize_ext_instr(self, b);
2174     }
2175 
encode(&self, e: &mut SM50Encoder<'_>)2176     fn encode(&self, e: &mut SM50Encoder<'_>) {
2177         e.set_opcode(0xdf50);
2178 
2179         e.set_dst(self.dsts[0]);
2180         assert!(self.dsts[1].is_none());
2181         e.set_reg_src(8..16, self.src);
2182 
2183         e.set_field(
2184             22..28,
2185             match self.query {
2186                 TexQuery::Dimension => 1_u8,
2187                 TexQuery::TextureType => 2_u8,
2188                 TexQuery::SamplerPos => 5_u8,
2189                 // TexQuery::Filter => 0x10_u8,
2190                 // TexQuery::Lod => 0x12_u8,
2191                 // TexQuery::Wrap => 0x14_u8,
2192                 // TexQuery::BorderColour => 0x16,
2193             },
2194         );
2195         e.set_field(31..35, self.mask);
2196         e.set_bit(49, false); // TODO: .NODEP
2197     }
2198 }
2199 
2200 impl SM50Encoder<'_> {
set_mem_type(&mut self, range: Range<usize>, mem_type: MemType)2201     fn set_mem_type(&mut self, range: Range<usize>, mem_type: MemType) {
2202         assert!(range.len() == 3);
2203         self.set_field(
2204             range,
2205             match mem_type {
2206                 MemType::U8 => 0_u8,
2207                 MemType::I8 => 1_u8,
2208                 MemType::U16 => 2_u8,
2209                 MemType::I16 => 3_u8,
2210                 MemType::B32 => 4_u8,
2211                 MemType::B64 => 5_u8,
2212                 MemType::B128 => 6_u8,
2213             },
2214         );
2215     }
2216 
set_mem_order(&mut self, _order: &MemOrder)2217     fn set_mem_order(&mut self, _order: &MemOrder) {
2218         // TODO: order and scope aren't present before SM70, what should we do?
2219     }
2220 
set_mem_access(&mut self, access: &MemAccess)2221     fn set_mem_access(&mut self, access: &MemAccess) {
2222         self.set_field(
2223             45..46,
2224             match access.space.addr_type() {
2225                 MemAddrType::A32 => 0_u8,
2226                 MemAddrType::A64 => 1_u8,
2227             },
2228         );
2229         self.set_mem_type(48..51, access.mem_type);
2230         self.set_mem_order(&access.order);
2231     }
2232 
set_image_dim(&mut self, range: Range<usize>, dim: ImageDim)2233     fn set_image_dim(&mut self, range: Range<usize>, dim: ImageDim) {
2234         assert!(range.len() == 3);
2235         self.set_field(
2236             range,
2237             match dim {
2238                 ImageDim::_1D => 0_u8,
2239                 ImageDim::_1DBuffer => 1_u8,
2240                 ImageDim::_1DArray => 2_u8,
2241                 ImageDim::_2D => 3_u8,
2242                 ImageDim::_2DArray => 4_u8,
2243                 ImageDim::_3D => 5_u8,
2244             },
2245         );
2246     }
2247 }
2248 
2249 impl SM50Op for OpSuLd {
legalize(&mut self, b: &mut LegalizeBuilder)2250     fn legalize(&mut self, b: &mut LegalizeBuilder) {
2251         legalize_ext_instr(self, b);
2252     }
2253 
encode(&self, e: &mut SM50Encoder<'_>)2254     fn encode(&self, e: &mut SM50Encoder<'_>) {
2255         e.set_opcode(0xeb00);
2256 
2257         assert!(self.mask == 0x1 || self.mask == 0x3 || self.mask == 0xf);
2258         e.set_field(20..24, self.mask);
2259         e.set_image_dim(33..36, self.image_dim);
2260 
2261         // mem_eviction_policy not a thing for sm < 70
2262 
2263         let scope = match self.mem_order {
2264             MemOrder::Constant => MemScope::System,
2265             MemOrder::Weak => MemScope::CTA,
2266             MemOrder::Strong(s) => s,
2267         };
2268 
2269         e.set_field(
2270             24..26,
2271             match scope {
2272                 MemScope::CTA => 0_u8,
2273                 /* SM => 1_u8, */
2274                 MemScope::GPU => 2_u8,
2275                 MemScope::System => 3_u8,
2276             },
2277         );
2278 
2279         e.set_dst(self.dst);
2280 
2281         e.set_reg_src(8..16, self.coord);
2282         e.set_reg_src(39..47, self.handle);
2283     }
2284 }
2285 
2286 impl SM50Op for OpSuSt {
legalize(&mut self, b: &mut LegalizeBuilder)2287     fn legalize(&mut self, b: &mut LegalizeBuilder) {
2288         legalize_ext_instr(self, b);
2289     }
2290 
encode(&self, e: &mut SM50Encoder<'_>)2291     fn encode(&self, e: &mut SM50Encoder<'_>) {
2292         e.set_opcode(0xeb20);
2293 
2294         e.set_reg_src(8..16, self.coord);
2295         e.set_reg_src(0..8, self.data);
2296         e.set_reg_src(39..47, self.handle);
2297 
2298         e.set_image_dim(33..36, self.image_dim);
2299         e.set_mem_order(&self.mem_order);
2300 
2301         assert!(self.mask == 0x1 || self.mask == 0x3 || self.mask == 0xf);
2302         e.set_field(20..24, self.mask);
2303     }
2304 }
2305 
2306 impl SM50Encoder<'_> {
set_atom_op(&mut self, range: Range<usize>, atom_op: AtomOp)2307     fn set_atom_op(&mut self, range: Range<usize>, atom_op: AtomOp) {
2308         self.set_field(
2309             range,
2310             match atom_op {
2311                 AtomOp::Add => 0_u8,
2312                 AtomOp::Min => 1_u8,
2313                 AtomOp::Max => 2_u8,
2314                 AtomOp::Inc => 3_u8,
2315                 AtomOp::Dec => 4_u8,
2316                 AtomOp::And => 5_u8,
2317                 AtomOp::Or => 6_u8,
2318                 AtomOp::Xor => 7_u8,
2319                 AtomOp::Exch => 8_u8,
2320                 AtomOp::CmpExch(_) => panic!("CmpExch is a separate opcode"),
2321             },
2322         );
2323     }
2324 }
2325 
2326 impl SM50Op for OpSuAtom {
legalize(&mut self, b: &mut LegalizeBuilder)2327     fn legalize(&mut self, b: &mut LegalizeBuilder) {
2328         legalize_ext_instr(self, b);
2329     }
2330 
encode(&self, e: &mut SM50Encoder<'_>)2331     fn encode(&self, e: &mut SM50Encoder<'_>) {
2332         if let AtomOp::CmpExch(cmp_src) = self.atom_op {
2333             e.set_opcode(0xeac0);
2334             assert!(cmp_src == AtomCmpSrc::Packed);
2335         } else {
2336             e.set_opcode(0xea60);
2337             e.set_atom_op(29..33, self.atom_op);
2338         }
2339 
2340         let atom_type: u8 = match self.atom_type {
2341             AtomType::U32 => 0,
2342             AtomType::I32 => 1,
2343             AtomType::F32 => 3,
2344             AtomType::U64 => 2,
2345             AtomType::I64 => 5,
2346             _ => panic!("Unsupported atom type {}", self.atom_type),
2347         };
2348 
2349         e.set_image_dim(33..36, self.image_dim);
2350         e.set_field(36..39, atom_type);
2351 
2352         // The hardware requires that we set .D on atomics.  This is safe to do
2353         // in in the emit code because it only affects format conversion, not
2354         // surface coordinates and atomics are required to be performed with
2355         // image formats that that exactly match the shader data type.  So, for
2356         // instance, a uint32_t atomic has to happen on an R32_UINT or R32_SINT
2357         // image.
2358         e.set_bit(52, true); // .D
2359 
2360         e.set_dst(self.dst);
2361 
2362         e.set_reg_src(20..28, self.data);
2363         e.set_reg_src(8..16, self.coord);
2364         e.set_reg_src(39..47, self.handle);
2365     }
2366 }
2367 
2368 impl SM50Op for OpLd {
legalize(&mut self, b: &mut LegalizeBuilder)2369     fn legalize(&mut self, b: &mut LegalizeBuilder) {
2370         legalize_ext_instr(self, b);
2371     }
2372 
encode(&self, e: &mut SM50Encoder<'_>)2373     fn encode(&self, e: &mut SM50Encoder<'_>) {
2374         e.set_opcode(match self.access.space {
2375             MemSpace::Global(_) => 0xeed0,
2376             MemSpace::Local => 0xef40,
2377             MemSpace::Shared => 0xef48,
2378         });
2379 
2380         e.set_dst(self.dst);
2381         e.set_reg_src(8..16, self.addr);
2382         e.set_field(20..44, self.offset);
2383 
2384         e.set_mem_access(&self.access);
2385     }
2386 }
2387 
2388 impl SM50Op for OpLdc {
legalize(&mut self, b: &mut LegalizeBuilder)2389     fn legalize(&mut self, b: &mut LegalizeBuilder) {
2390         use RegFile::GPR;
2391         b.copy_alu_src_if_not_reg(&mut self.offset, GPR, SrcType::GPR);
2392     }
2393 
encode(&self, e: &mut SM50Encoder<'_>)2394     fn encode(&self, e: &mut SM50Encoder<'_>) {
2395         assert!(self.cb.src_mod.is_none());
2396         let SrcRef::CBuf(cb) = &self.cb.src_ref else {
2397             panic!("Not a CBuf source");
2398         };
2399         let CBuf::Binding(cb_idx) = cb.buf else {
2400             panic!("Must be a bound constant buffer");
2401         };
2402 
2403         e.set_opcode(0xef90);
2404 
2405         e.set_dst(self.dst);
2406         e.set_reg_src(8..16, self.offset);
2407         e.set_field(20..36, cb.offset);
2408         e.set_field(36..41, cb_idx);
2409         e.set_field(
2410             44..46,
2411             match self.mode {
2412                 LdcMode::Indexed => 0_u8,
2413                 LdcMode::IndexedLinear => 1_u8,
2414                 LdcMode::IndexedSegmented => 2_u8,
2415                 LdcMode::IndexedSegmentedLinear => 3_u8,
2416             },
2417         );
2418         e.set_mem_type(48..51, self.mem_type);
2419     }
2420 }
2421 
2422 impl SM50Op for OpSt {
legalize(&mut self, b: &mut LegalizeBuilder)2423     fn legalize(&mut self, b: &mut LegalizeBuilder) {
2424         legalize_ext_instr(self, b);
2425     }
2426 
encode(&self, e: &mut SM50Encoder<'_>)2427     fn encode(&self, e: &mut SM50Encoder<'_>) {
2428         e.set_opcode(match self.access.space {
2429             MemSpace::Global(_) => 0xeed8,
2430             MemSpace::Local => 0xef50,
2431             MemSpace::Shared => 0xef58,
2432         });
2433 
2434         e.set_reg_src(0..8, self.data);
2435         e.set_reg_src(8..16, self.addr);
2436         e.set_field(20..44, self.offset);
2437         e.set_mem_access(&self.access);
2438     }
2439 }
2440 
atom_src_as_ssa( b: &mut LegalizeBuilder, src: Src, atom_type: AtomType, ) -> SSARef2441 fn atom_src_as_ssa(
2442     b: &mut LegalizeBuilder,
2443     src: Src,
2444     atom_type: AtomType,
2445 ) -> SSARef {
2446     if let Some(ssa) = src.as_ssa() {
2447         return *ssa;
2448     }
2449 
2450     let tmp;
2451     if atom_type.bits() == 32 {
2452         tmp = b.alloc_ssa(RegFile::GPR, 1);
2453         b.copy_to(tmp.into(), 0.into());
2454     } else {
2455         debug_assert!(atom_type.bits() == 64);
2456         tmp = b.alloc_ssa(RegFile::GPR, 2);
2457         b.copy_to(tmp[0].into(), 0.into());
2458         b.copy_to(tmp[1].into(), 0.into());
2459     }
2460     tmp
2461 }
2462 
2463 impl SM50Op for OpAtom {
legalize(&mut self, b: &mut LegalizeBuilder)2464     fn legalize(&mut self, b: &mut LegalizeBuilder) {
2465         if self.atom_op == AtomOp::CmpExch(AtomCmpSrc::Separate) {
2466             let cmpr = atom_src_as_ssa(b, self.cmpr, self.atom_type);
2467             let data = atom_src_as_ssa(b, self.data, self.atom_type);
2468 
2469             let mut cmpr_data = Vec::new();
2470             cmpr_data.extend_from_slice(&cmpr);
2471             cmpr_data.extend_from_slice(&data);
2472             let cmpr_data = SSARef::try_from(cmpr_data).unwrap();
2473 
2474             self.cmpr = 0.into();
2475             self.data = cmpr_data.into();
2476             self.atom_op = AtomOp::CmpExch(AtomCmpSrc::Packed);
2477         }
2478         legalize_ext_instr(self, b);
2479     }
2480 
encode(&self, e: &mut SM50Encoder<'_>)2481     fn encode(&self, e: &mut SM50Encoder<'_>) {
2482         match self.mem_space {
2483             MemSpace::Global(addr_type) => {
2484                 if self.dst.is_none() {
2485                     e.set_opcode(0xebf8);
2486 
2487                     e.set_reg_src(0..8, self.data);
2488 
2489                     let data_type = match self.atom_type {
2490                         AtomType::U32 => 0_u8,
2491                         AtomType::I32 => 1_u8,
2492                         AtomType::U64 => 2_u8,
2493                         AtomType::F32 => 3_u8,
2494                         // NOTE: U128 => 4_u8,
2495                         AtomType::I64 => 5_u8,
2496                         _ => panic!("Unsupported data type"),
2497                     };
2498                     e.set_field(20..23, data_type);
2499                     e.set_atom_op(23..26, self.atom_op);
2500                 } else if let AtomOp::CmpExch(cmp_src) = self.atom_op {
2501                     e.set_opcode(0xee00);
2502 
2503                     e.set_dst(self.dst);
2504 
2505                     // TODO: These are all supported by the disassembler but
2506                     // only the packed layout appears to be supported by real
2507                     // hardware
2508                     let (data_src, data_layout) = match cmp_src {
2509                         AtomCmpSrc::Separate => {
2510                             if self.data.is_zero() {
2511                                 (self.cmpr, 1_u8)
2512                             } else {
2513                                 assert!(self.cmpr.is_zero());
2514                                 (self.data, 2_u8)
2515                             }
2516                         }
2517                         AtomCmpSrc::Packed => (self.data, 0_u8),
2518                     };
2519                     e.set_reg_src(20..28, data_src);
2520 
2521                     let data_type = match self.atom_type {
2522                         AtomType::U32 => 0_u8,
2523                         AtomType::U64 => 1_u8,
2524                         _ => panic!("Unsupported data type"),
2525                     };
2526                     e.set_field(49..50, data_type);
2527                     e.set_field(50..52, data_layout);
2528                     e.set_field(52..56, 15_u8); // subOp
2529                 } else {
2530                     e.set_opcode(0xed00);
2531 
2532                     e.set_dst(self.dst);
2533                     e.set_reg_src(20..28, self.data);
2534 
2535                     let data_type = match self.atom_type {
2536                         AtomType::U32 => 0_u8,
2537                         AtomType::I32 => 1_u8,
2538                         AtomType::U64 => 2_u8,
2539                         AtomType::F32 => 3_u8,
2540                         // NOTE: U128 => 4_u8,
2541                         AtomType::I64 => 5_u8,
2542                         _ => panic!("Unsupported data type"),
2543                     };
2544                     e.set_field(49..52, data_type);
2545                     e.set_atom_op(52..56, self.atom_op);
2546                 }
2547 
2548                 e.set_mem_order(&self.mem_order);
2549 
2550                 e.set_reg_src(8..16, self.addr);
2551                 e.set_field(28..48, self.addr_offset);
2552                 e.set_field(
2553                     48..49,
2554                     match addr_type {
2555                         MemAddrType::A32 => 0_u8,
2556                         MemAddrType::A64 => 1_u8,
2557                     },
2558                 );
2559             }
2560             MemSpace::Local => panic!("Atomics do not support local"),
2561             MemSpace::Shared => {
2562                 if let AtomOp::CmpExch(cmp_src) = self.atom_op {
2563                     e.set_opcode(0xee00);
2564 
2565                     assert!(cmp_src == AtomCmpSrc::Packed);
2566                     assert!(self.cmpr.is_zero());
2567                     e.set_reg_src(20..28, self.data);
2568 
2569                     let subop = match self.atom_type {
2570                         AtomType::U32 => 4_u8,
2571                         AtomType::U64 => 5_u8,
2572                         _ => panic!("Unsupported data type"),
2573                     };
2574                     e.set_field(52..56, subop);
2575                 } else {
2576                     e.set_opcode(0xec00);
2577 
2578                     e.set_reg_src(20..28, self.data);
2579 
2580                     let data_type = match self.atom_type {
2581                         AtomType::U32 => 0_u8,
2582                         AtomType::I32 => 1_u8,
2583                         AtomType::U64 => 2_u8,
2584                         AtomType::I64 => 3_u8,
2585                         _ => panic!("Unsupported data type"),
2586                     };
2587                     e.set_field(28..30, data_type);
2588                     e.set_atom_op(52..56, self.atom_op);
2589                 }
2590 
2591                 e.set_dst(self.dst);
2592                 e.set_reg_src(8..16, self.addr);
2593                 assert_eq!(self.addr_offset % 4, 0);
2594                 e.set_field(30..52, self.addr_offset / 4);
2595             }
2596         }
2597     }
2598 }
2599 
2600 impl SM50Op for OpAL2P {
legalize(&mut self, b: &mut LegalizeBuilder)2601     fn legalize(&mut self, b: &mut LegalizeBuilder) {
2602         legalize_ext_instr(self, b);
2603     }
2604 
encode(&self, e: &mut SM50Encoder<'_>)2605     fn encode(&self, e: &mut SM50Encoder<'_>) {
2606         e.set_opcode(0xefa0);
2607 
2608         e.set_dst(self.dst);
2609         e.set_reg_src(8..16, self.offset);
2610 
2611         e.set_field(20..31, self.access.addr);
2612         assert!(!self.access.patch);
2613         e.set_bit(32, self.access.output);
2614 
2615         e.set_field(47..49, 0_u8); // comps
2616         e.set_pred_dst(44..47, Dst::None);
2617     }
2618 }
2619 
2620 impl SM50Op for OpALd {
legalize(&mut self, b: &mut LegalizeBuilder)2621     fn legalize(&mut self, b: &mut LegalizeBuilder) {
2622         legalize_ext_instr(self, b);
2623     }
2624 
encode(&self, e: &mut SM50Encoder<'_>)2625     fn encode(&self, e: &mut SM50Encoder<'_>) {
2626         e.set_opcode(0xefd8);
2627 
2628         e.set_dst(self.dst);
2629         if self.access.phys {
2630             assert!(!self.access.patch);
2631             assert!(self.offset.src_ref.as_reg().is_some());
2632         } else if !self.access.patch {
2633             assert!(self.offset.is_zero());
2634         }
2635         e.set_reg_src(8..16, self.offset);
2636         e.set_reg_src(39..47, self.vtx);
2637 
2638         e.set_field(20..30, self.access.addr);
2639         e.set_bit(31, self.access.patch);
2640         e.set_bit(32, self.access.output);
2641         e.set_field(47..49, self.access.comps - 1);
2642     }
2643 }
2644 
2645 impl SM50Op for OpASt {
legalize(&mut self, b: &mut LegalizeBuilder)2646     fn legalize(&mut self, b: &mut LegalizeBuilder) {
2647         legalize_ext_instr(self, b);
2648     }
2649 
encode(&self, e: &mut SM50Encoder<'_>)2650     fn encode(&self, e: &mut SM50Encoder<'_>) {
2651         e.set_opcode(0xeff0);
2652 
2653         e.set_reg_src(0..8, self.data);
2654         e.set_reg_src(8..16, self.offset);
2655         e.set_reg_src(39..47, self.vtx);
2656 
2657         assert!(!self.access.phys);
2658         assert!(self.access.output);
2659         e.set_field(20..30, self.access.addr);
2660         e.set_bit(31, self.access.patch);
2661         e.set_bit(32, self.access.output);
2662         e.set_field(47..49, self.access.comps - 1);
2663     }
2664 }
2665 
2666 impl SM50Op for OpIpa {
legalize(&mut self, b: &mut LegalizeBuilder)2667     fn legalize(&mut self, b: &mut LegalizeBuilder) {
2668         legalize_ext_instr(self, b);
2669     }
2670 
encode(&self, e: &mut SM50Encoder<'_>)2671     fn encode(&self, e: &mut SM50Encoder<'_>) {
2672         e.set_opcode(0xe000);
2673 
2674         e.set_dst(self.dst);
2675         e.set_reg_src(8..16, 0.into()); // addr
2676         e.set_reg_src(20..28, self.inv_w);
2677         e.set_reg_src(39..47, self.offset);
2678 
2679         assert!(self.addr % 4 == 0);
2680         e.set_field(28..38, self.addr);
2681         e.set_bit(38, false); // .IDX
2682         e.set_pred_dst(47..50, Dst::None); // TODO: What is this for?
2683         e.set_bit(51, false); // .SAT
2684         e.set_field(
2685             52..54,
2686             match self.loc {
2687                 InterpLoc::Default => 0_u8,
2688                 InterpLoc::Centroid => 1_u8,
2689                 InterpLoc::Offset => 2_u8,
2690             },
2691         );
2692         e.set_field(
2693             54..56,
2694             match self.freq {
2695                 InterpFreq::Pass => 0_u8,
2696                 InterpFreq::PassMulW => 1_u8,
2697                 InterpFreq::Constant => 2_u8,
2698                 InterpFreq::State => 3_u8,
2699             },
2700         );
2701     }
2702 }
2703 
2704 impl SM50Op for OpCCtl {
legalize(&mut self, b: &mut LegalizeBuilder)2705     fn legalize(&mut self, b: &mut LegalizeBuilder) {
2706         legalize_ext_instr(self, b);
2707     }
2708 
encode(&self, e: &mut SM50Encoder<'_>)2709     fn encode(&self, e: &mut SM50Encoder<'_>) {
2710         match self.mem_space {
2711             MemSpace::Global(addr_type) => {
2712                 e.set_opcode(0xef60);
2713 
2714                 assert!(self.addr_offset % 4 == 0);
2715                 e.set_field(22..52, self.addr_offset / 4);
2716                 e.set_field(
2717                     52..53,
2718                     match addr_type {
2719                         MemAddrType::A32 => 0_u8,
2720                         MemAddrType::A64 => 1_u8,
2721                     },
2722                 );
2723             }
2724             MemSpace::Local => panic!("cctl does not support local"),
2725             MemSpace::Shared => {
2726                 e.set_opcode(0xef80);
2727 
2728                 assert!(self.addr_offset % 4 == 0);
2729                 e.set_field(22..44, self.addr_offset / 4);
2730             }
2731         }
2732 
2733         e.set_field(
2734             0..4,
2735             match self.op {
2736                 CCtlOp::Qry1 => 0_u8,
2737                 CCtlOp::PF1 => 1_u8,
2738                 CCtlOp::PF1_5 => 2_u8,
2739                 CCtlOp::PF2 => 3_u8,
2740                 CCtlOp::WB => 4_u8,
2741                 CCtlOp::IV => 5_u8,
2742                 CCtlOp::IVAll => 6_u8,
2743                 CCtlOp::RS => 7_u8,
2744                 CCtlOp::RSLB => 7_u8,
2745                 op => panic!("Unsupported cache control {op:?}"),
2746             },
2747         );
2748         e.set_reg_src(8..16, self.addr);
2749     }
2750 }
2751 
2752 impl SM50Op for OpMemBar {
legalize(&mut self, _b: &mut LegalizeBuilder)2753     fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2754         // Nothing to do
2755     }
2756 
encode(&self, e: &mut SM50Encoder<'_>)2757     fn encode(&self, e: &mut SM50Encoder<'_>) {
2758         e.set_opcode(0xef98);
2759 
2760         e.set_field(
2761             8..10,
2762             match self.scope {
2763                 MemScope::CTA => 0_u8,
2764                 MemScope::GPU => 1_u8,
2765                 MemScope::System => 2_u8,
2766             },
2767         );
2768     }
2769 }
2770 
2771 impl SM50Encoder<'_> {
set_rel_offset(&mut self, range: Range<usize>, label: &Label)2772     fn set_rel_offset(&mut self, range: Range<usize>, label: &Label) {
2773         let ip = u32::try_from(self.ip).unwrap();
2774         let ip = i32::try_from(ip).unwrap();
2775 
2776         let target_ip = *self.labels.get(label).unwrap();
2777         let target_ip = u32::try_from(target_ip).unwrap();
2778         let target_ip = i32::try_from(target_ip).unwrap();
2779 
2780         let rel_offset = target_ip - ip - 8;
2781 
2782         self.set_field(range, rel_offset);
2783     }
2784 }
2785 
2786 impl SM50Op for OpBra {
legalize(&mut self, _b: &mut LegalizeBuilder)2787     fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2788         // Nothing to do
2789     }
2790 
encode(&self, e: &mut SM50Encoder<'_>)2791     fn encode(&self, e: &mut SM50Encoder<'_>) {
2792         e.set_opcode(0xe240);
2793         e.set_rel_offset(20..44, &self.target);
2794         e.set_field(0..5, 0xF_u8); // TODO: Pred?
2795     }
2796 }
2797 
2798 impl SM50Op for OpSSy {
legalize(&mut self, _b: &mut LegalizeBuilder)2799     fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2800         // Nothing to do
2801     }
2802 
encode(&self, e: &mut SM50Encoder<'_>)2803     fn encode(&self, e: &mut SM50Encoder<'_>) {
2804         e.set_opcode(0xe290);
2805         e.set_rel_offset(20..44, &self.target);
2806         e.set_field(0..5, 0xF_u8); // TODO: Pred?
2807     }
2808 }
2809 
2810 impl SM50Op for OpSync {
legalize(&mut self, _b: &mut LegalizeBuilder)2811     fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2812         // Nothing to do
2813     }
2814 
encode(&self, e: &mut SM50Encoder<'_>)2815     fn encode(&self, e: &mut SM50Encoder<'_>) {
2816         e.set_opcode(0xf0f8);
2817         e.set_field(0..5, 0xF_u8); // TODO: Pred?
2818     }
2819 }
2820 
2821 impl SM50Op for OpBrk {
legalize(&mut self, _b: &mut LegalizeBuilder)2822     fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2823         // Nothing to do
2824     }
2825 
encode(&self, e: &mut SM50Encoder<'_>)2826     fn encode(&self, e: &mut SM50Encoder<'_>) {
2827         e.set_opcode(0xe340);
2828         e.set_field(0..5, 0xF_u8); // TODO: Pred?
2829     }
2830 }
2831 
2832 impl SM50Op for OpPBk {
legalize(&mut self, _b: &mut LegalizeBuilder)2833     fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2834         // Nothing to do
2835     }
2836 
encode(&self, e: &mut SM50Encoder<'_>)2837     fn encode(&self, e: &mut SM50Encoder<'_>) {
2838         e.set_opcode(0xe2a0);
2839         e.set_rel_offset(20..44, &self.target);
2840         e.set_field(0..5, 0xF_u8); // TODO: Pred?
2841     }
2842 }
2843 
2844 impl SM50Op for OpCont {
legalize(&mut self, _b: &mut LegalizeBuilder)2845     fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2846         // Nothing to do
2847     }
2848 
encode(&self, e: &mut SM50Encoder<'_>)2849     fn encode(&self, e: &mut SM50Encoder<'_>) {
2850         e.set_opcode(0xe350);
2851         e.set_field(0..5, 0xF_u8); // TODO: Pred?
2852     }
2853 }
2854 
2855 impl SM50Op for OpPCnt {
legalize(&mut self, _b: &mut LegalizeBuilder)2856     fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2857         // Nothing to do
2858     }
2859 
encode(&self, e: &mut SM50Encoder<'_>)2860     fn encode(&self, e: &mut SM50Encoder<'_>) {
2861         e.set_opcode(0xe2b0);
2862         e.set_rel_offset(20..44, &self.target);
2863         e.set_field(0..5, 0xF_u8); // TODO: Pred?
2864     }
2865 }
2866 
2867 impl SM50Op for OpExit {
legalize(&mut self, _b: &mut LegalizeBuilder)2868     fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2869         // Nothing to do
2870     }
2871 
encode(&self, e: &mut SM50Encoder<'_>)2872     fn encode(&self, e: &mut SM50Encoder<'_>) {
2873         e.set_opcode(0xe300);
2874 
2875         // TODO: CC flags
2876         e.set_field(0..4, 0xf_u8); // CC.T
2877     }
2878 }
2879 
2880 impl SM50Op for OpBar {
legalize(&mut self, _b: &mut LegalizeBuilder)2881     fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2882         // Nothing to do
2883     }
2884 
encode(&self, e: &mut SM50Encoder<'_>)2885     fn encode(&self, e: &mut SM50Encoder<'_>) {
2886         e.set_opcode(0xf0a8);
2887 
2888         e.set_reg_src(8..16, SrcRef::Zero.into());
2889 
2890         // 00: RED.POPC
2891         // 01: RED.AND
2892         // 02: RED.OR
2893         e.set_field(35..37, 0_u8);
2894 
2895         // 00: SYNC
2896         // 01: ARV
2897         // 02: RED
2898         // 03: SCAN
2899         e.set_field(32..35, 0_u8);
2900 
2901         e.set_pred_src(39..42, 42, SrcRef::True.into());
2902     }
2903 }
2904 
2905 impl SM50Op for OpCS2R {
legalize(&mut self, _b: &mut LegalizeBuilder)2906     fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2907         // Nothing to do
2908     }
2909 
encode(&self, e: &mut SM50Encoder<'_>)2910     fn encode(&self, e: &mut SM50Encoder<'_>) {
2911         e.set_opcode(0x50c8);
2912         e.set_dst(self.dst);
2913         e.set_field(20..28, self.idx);
2914     }
2915 }
2916 
2917 impl SM50Op for OpIsberd {
legalize(&mut self, _b: &mut LegalizeBuilder)2918     fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2919         // Nothing to do
2920     }
2921 
encode(&self, e: &mut SM50Encoder<'_>)2922     fn encode(&self, e: &mut SM50Encoder<'_>) {
2923         e.set_opcode(0xefd0);
2924         e.set_dst(self.dst);
2925         e.set_reg_src(8..16, self.idx);
2926     }
2927 }
2928 
2929 impl SM50Op for OpKill {
legalize(&mut self, _b: &mut LegalizeBuilder)2930     fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2931         // Nothing to do
2932     }
2933 
encode(&self, e: &mut SM50Encoder<'_>)2934     fn encode(&self, e: &mut SM50Encoder<'_>) {
2935         e.set_opcode(0xe330);
2936         e.set_field(0..5, 0x0f_u8);
2937     }
2938 }
2939 
2940 impl SM50Op for OpNop {
legalize(&mut self, _b: &mut LegalizeBuilder)2941     fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2942         // Nothing to do
2943     }
2944 
encode(&self, e: &mut SM50Encoder<'_>)2945     fn encode(&self, e: &mut SM50Encoder<'_>) {
2946         e.set_opcode(0x50b0);
2947 
2948         // TODO: CC flags
2949         e.set_field(8..12, 0xf_u8); // CC.T
2950     }
2951 }
2952 
2953 impl SM50Op for OpPixLd {
legalize(&mut self, _b: &mut LegalizeBuilder)2954     fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2955         // Nothing to do
2956     }
2957 
encode(&self, e: &mut SM50Encoder<'_>)2958     fn encode(&self, e: &mut SM50Encoder<'_>) {
2959         e.set_opcode(0xefe8);
2960         e.set_dst(self.dst);
2961         e.set_reg_src(8..16, 0.into());
2962         e.set_field(
2963             31..34,
2964             match &self.val {
2965                 PixVal::CovMask => 1_u8,
2966                 PixVal::Covered => 2_u8,
2967                 PixVal::Offset => 3_u8,
2968                 PixVal::CentroidOffset => 4_u8,
2969                 PixVal::MyIndex => 5_u8,
2970                 other => panic!("Unsupported PixVal: {other}"),
2971             },
2972         );
2973         e.set_pred_dst(45..48, Dst::None);
2974     }
2975 }
2976 
2977 impl SM50Op for OpS2R {
legalize(&mut self, _b: &mut LegalizeBuilder)2978     fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2979         // Nothing to do
2980     }
2981 
encode(&self, e: &mut SM50Encoder<'_>)2982     fn encode(&self, e: &mut SM50Encoder<'_>) {
2983         e.set_opcode(0xf0c8);
2984         e.set_dst(self.dst);
2985         e.set_field(20..28, self.idx);
2986     }
2987 }
2988 
2989 impl SM50Op for OpVote {
legalize(&mut self, _b: &mut LegalizeBuilder)2990     fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2991         // Nothing to do
2992     }
2993 
encode(&self, e: &mut SM50Encoder<'_>)2994     fn encode(&self, e: &mut SM50Encoder<'_>) {
2995         e.set_opcode(0x50d8);
2996 
2997         e.set_dst(self.ballot);
2998         e.set_pred_dst(45..48, self.vote);
2999         e.set_pred_src(39..42, 42, self.pred);
3000 
3001         e.set_field(
3002             48..50,
3003             match self.op {
3004                 VoteOp::All => 0u8,
3005                 VoteOp::Any => 1u8,
3006                 VoteOp::Eq => 2u8,
3007             },
3008         );
3009     }
3010 }
3011 
3012 impl SM50Op for OpOut {
legalize(&mut self, b: &mut LegalizeBuilder)3013     fn legalize(&mut self, b: &mut LegalizeBuilder) {
3014         use RegFile::GPR;
3015         b.copy_alu_src_if_not_reg(&mut self.handle, GPR, SrcType::GPR);
3016         b.copy_alu_src_if_i20_overflow(&mut self.stream, GPR, SrcType::ALU);
3017     }
3018 
encode(&self, e: &mut SM50Encoder<'_>)3019     fn encode(&self, e: &mut SM50Encoder<'_>) {
3020         match &self.stream.src_ref {
3021             SrcRef::Zero | SrcRef::Reg(_) => {
3022                 e.set_opcode(0xfbe0);
3023                 e.set_reg_src(20..28, self.stream);
3024             }
3025             SrcRef::Imm32(imm32) => {
3026                 e.set_opcode(0xf6e0);
3027                 e.set_src_imm_i20(20..39, 56, *imm32);
3028             }
3029             SrcRef::CBuf(cbuf) => {
3030                 e.set_opcode(0xebe0);
3031                 e.set_src_cb(20..39, cbuf);
3032             }
3033             src => panic!("Invalid out stream: {src}"),
3034         }
3035 
3036         e.set_field(
3037             39..41,
3038             match self.out_type {
3039                 OutType::Emit => 1_u8,
3040                 OutType::Cut => 2_u8,
3041                 OutType::EmitThenCut => 3_u8,
3042             },
3043         );
3044 
3045         e.set_reg_src(8..16, self.handle);
3046         e.set_dst(self.dst);
3047     }
3048 }
3049 
3050 macro_rules! as_sm50_op_match {
3051     ($op: expr) => {
3052         match $op {
3053             Op::FAdd(op) => op,
3054             Op::FMnMx(op) => op,
3055             Op::FMul(op) => op,
3056             Op::FFma(op) => op,
3057             Op::FSet(op) => op,
3058             Op::FSetP(op) => op,
3059             Op::FSwzAdd(op) => op,
3060             Op::Rro(op) => op,
3061             Op::MuFu(op) => op,
3062             Op::Flo(op) => op,
3063             Op::DAdd(op) => op,
3064             Op::DFma(op) => op,
3065             Op::DMnMx(op) => op,
3066             Op::DMul(op) => op,
3067             Op::DSetP(op) => op,
3068             Op::IAdd2(op) => op,
3069             Op::IAdd2X(op) => op,
3070             Op::Mov(op) => op,
3071             Op::Sel(op) => op,
3072             Op::Shfl(op) => op,
3073             Op::Vote(op) => op,
3074             Op::PSetP(op) => op,
3075             Op::SuSt(op) => op,
3076             Op::S2R(op) => op,
3077             Op::PopC(op) => op,
3078             Op::Prmt(op) => op,
3079             Op::Ld(op) => op,
3080             Op::Ldc(op) => op,
3081             Op::St(op) => op,
3082             Op::Lop2(op) => op,
3083             Op::Shf(op) => op,
3084             Op::Shl(op) => op,
3085             Op::Shr(op) => op,
3086             Op::F2F(op) => op,
3087             Op::F2I(op) => op,
3088             Op::I2F(op) => op,
3089             Op::I2I(op) => op,
3090             Op::IMad(op) => op,
3091             Op::IMul(op) => op,
3092             Op::IMnMx(op) => op,
3093             Op::ISetP(op) => op,
3094             Op::Tex(op) => op,
3095             Op::Tld(op) => op,
3096             Op::Tld4(op) => op,
3097             Op::Tmml(op) => op,
3098             Op::Txd(op) => op,
3099             Op::Txq(op) => op,
3100             Op::Ipa(op) => op,
3101             Op::AL2P(op) => op,
3102             Op::ALd(op) => op,
3103             Op::ASt(op) => op,
3104             Op::CCtl(op) => op,
3105             Op::MemBar(op) => op,
3106             Op::Atom(op) => op,
3107             Op::Bra(op) => op,
3108             Op::SSy(op) => op,
3109             Op::Sync(op) => op,
3110             Op::Brk(op) => op,
3111             Op::PBk(op) => op,
3112             Op::Cont(op) => op,
3113             Op::PCnt(op) => op,
3114             Op::Exit(op) => op,
3115             Op::Bar(op) => op,
3116             Op::SuLd(op) => op,
3117             Op::SuAtom(op) => op,
3118             Op::Kill(op) => op,
3119             Op::CS2R(op) => op,
3120             Op::Nop(op) => op,
3121             Op::PixLd(op) => op,
3122             Op::Isberd(op) => op,
3123             Op::Out(op) => op,
3124             Op::Bfe(op) => op,
3125             _ => panic!("Unhandled instruction {}", $op),
3126         }
3127     };
3128 }
3129 
as_sm50_op(op: &Op) -> &dyn SM50Op3130 fn as_sm50_op(op: &Op) -> &dyn SM50Op {
3131     as_sm50_op_match!(op)
3132 }
3133 
as_sm50_op_mut(op: &mut Op) -> &mut dyn SM50Op3134 fn as_sm50_op_mut(op: &mut Op) -> &mut dyn SM50Op {
3135     as_sm50_op_match!(op)
3136 }
3137 
encode_instr( instr_index: usize, instr: Option<&Box<Instr>>, sm: &ShaderModel50, labels: &HashMap<Label, usize>, ip: &mut usize, sched_instr: &mut [u32; 2], ) -> [u32; 2]3138 fn encode_instr(
3139     instr_index: usize,
3140     instr: Option<&Box<Instr>>,
3141     sm: &ShaderModel50,
3142     labels: &HashMap<Label, usize>,
3143     ip: &mut usize,
3144     sched_instr: &mut [u32; 2],
3145 ) -> [u32; 2] {
3146     let mut e = SM50Encoder {
3147         sm,
3148         ip: *ip,
3149         labels,
3150         inst: [0_u32; 2],
3151         sched: 0,
3152     };
3153 
3154     if let Some(instr) = instr {
3155         as_sm50_op(&instr.op).encode(&mut e);
3156         e.set_pred(&instr.pred);
3157         e.set_instr_deps(&instr.deps);
3158     } else {
3159         let nop = OpNop { label: None };
3160         nop.encode(&mut e);
3161         e.set_pred(&true.into());
3162         e.set_instr_deps(&InstrDeps::new());
3163     }
3164 
3165     *ip += 8;
3166 
3167     BitMutView::new(sched_instr)
3168         .set_field(21 * instr_index..21 * (instr_index + 1), e.sched);
3169 
3170     e.inst
3171 }
3172 
encode_sm50_shader(sm: &ShaderModel50, s: &Shader<'_>) -> Vec<u32>3173 fn encode_sm50_shader(sm: &ShaderModel50, s: &Shader<'_>) -> Vec<u32> {
3174     assert!(s.functions.len() == 1);
3175     let func = &s.functions[0];
3176 
3177     let mut num_instrs = 0_usize;
3178     let mut labels = HashMap::new();
3179     for b in &func.blocks {
3180         // We ensure blocks will have groups of 3 instructions with a
3181         // schedule instruction before each groups.  As we should never jump
3182         // to a schedule instruction, we account for that here.
3183         labels.insert(b.label, num_instrs + 8);
3184 
3185         let block_num_instrs = b.instrs.len().next_multiple_of(3);
3186 
3187         // Every 3 instructions, we have a new schedule instruction so we
3188         // need to account for that.
3189         num_instrs += (block_num_instrs + (block_num_instrs / 3)) * 8;
3190     }
3191 
3192     let mut encoded = Vec::new();
3193     for b in &func.blocks {
3194         // A block is composed of groups of 3 instructions.
3195         let block_num_instrs = b.instrs.len().next_multiple_of(3);
3196 
3197         let mut instrs_iter = b.instrs.iter();
3198 
3199         for _ in 0..(block_num_instrs / 3) {
3200             let mut ip = ((encoded.len() / 2) + 1) * 8;
3201 
3202             let mut sched_instr = [0x0; 2];
3203 
3204             let instr0 = encode_instr(
3205                 0,
3206                 instrs_iter.next(),
3207                 sm,
3208                 &labels,
3209                 &mut ip,
3210                 &mut sched_instr,
3211             );
3212             let instr1 = encode_instr(
3213                 1,
3214                 instrs_iter.next(),
3215                 sm,
3216                 &labels,
3217                 &mut ip,
3218                 &mut sched_instr,
3219             );
3220             let instr2 = encode_instr(
3221                 2,
3222                 instrs_iter.next(),
3223                 sm,
3224                 &labels,
3225                 &mut ip,
3226                 &mut sched_instr,
3227             );
3228 
3229             encoded.extend_from_slice(&sched_instr[..]);
3230             encoded.extend_from_slice(&instr0[..]);
3231             encoded.extend_from_slice(&instr1[..]);
3232             encoded.extend_from_slice(&instr2[..]);
3233         }
3234     }
3235 
3236     encoded
3237 }
3238