• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright © 2022 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3 
4 use crate::ir::*;
5 use bitview::*;
6 
7 use std::collections::HashMap;
8 use std::ops::Range;
9 
10 struct ALURegRef {
11     pub reg: RegRef,
12     pub abs: bool,
13     pub neg: bool,
14 }
15 
16 struct ALUCBufRef {
17     pub cb: CBufRef,
18     pub abs: bool,
19     pub neg: bool,
20 }
21 
22 enum ALUSrc {
23     None,
24     Imm32(u32),
25     Reg(ALURegRef),
26     UReg(ALURegRef),
27     CBuf(ALUCBufRef),
28 }
29 
src_mod_has_abs(src_mod: SrcMod) -> bool30 fn src_mod_has_abs(src_mod: SrcMod) -> bool {
31     match src_mod {
32         SrcMod::None | SrcMod::FNeg | SrcMod::INeg | SrcMod::BNot => false,
33         SrcMod::FAbs | SrcMod::FNegAbs => true,
34     }
35 }
36 
src_mod_has_neg(src_mod: SrcMod) -> bool37 fn src_mod_has_neg(src_mod: SrcMod) -> bool {
38     match src_mod {
39         SrcMod::None | SrcMod::FAbs => false,
40         SrcMod::FNeg | SrcMod::FNegAbs | SrcMod::INeg | SrcMod::BNot => true,
41     }
42 }
43 
src_mod_is_bnot(src_mod: SrcMod) -> bool44 fn src_mod_is_bnot(src_mod: SrcMod) -> bool {
45     match src_mod {
46         SrcMod::None => false,
47         SrcMod::BNot => true,
48         _ => panic!("Not an predicate source modifier"),
49     }
50 }
51 
dst_is_bar(dst: Dst) -> bool52 fn dst_is_bar(dst: Dst) -> bool {
53     match dst {
54         Dst::None => false,
55         Dst::SSA(ssa) => ssa.file() == RegFile::Bar,
56         Dst::Reg(reg) => reg.file() == RegFile::Bar,
57     }
58 }
59 
60 impl ALUSrc {
from_src_file(src: &Src, file: RegFile) -> ALUSrc61     fn from_src_file(src: &Src, file: RegFile) -> ALUSrc {
62         match src.src_ref {
63             SrcRef::Zero | SrcRef::Reg(_) => {
64                 let reg = match src.src_ref {
65                     SrcRef::Zero => RegRef::zero(file, 1),
66                     SrcRef::Reg(reg) => reg,
67                     _ => panic!("Invalid source ref"),
68                 };
69                 assert!(reg.comps() <= 2);
70                 assert!(reg.file() == file);
71                 let alu_ref = ALURegRef {
72                     reg: reg,
73                     abs: src_mod_has_abs(src.src_mod),
74                     neg: src_mod_has_neg(src.src_mod),
75                 };
76                 match reg.file() {
77                     RegFile::GPR => ALUSrc::Reg(alu_ref),
78                     RegFile::UGPR => ALUSrc::UReg(alu_ref),
79                     _ => panic!("Invalid ALU register file"),
80                 }
81             }
82             SrcRef::Imm32(i) => {
83                 assert!(src.src_mod.is_none());
84                 ALUSrc::Imm32(i)
85             }
86             SrcRef::CBuf(cb) => {
87                 let alu_ref = ALUCBufRef {
88                     cb: cb,
89                     abs: src_mod_has_abs(src.src_mod),
90                     neg: src_mod_has_neg(src.src_mod),
91                 };
92                 ALUSrc::CBuf(alu_ref)
93             }
94             _ => panic!("Invalid ALU source"),
95         }
96     }
97 
from_src(src: &Src) -> ALUSrc98     pub fn from_src(src: &Src) -> ALUSrc {
99         ALUSrc::from_src_file(src, RegFile::GPR)
100     }
101 
102     #[allow(dead_code)]
from_usrc(src: &Src) -> ALUSrc103     pub fn from_usrc(src: &Src) -> ALUSrc {
104         assert!(src.is_uniform());
105         ALUSrc::from_src_file(src, RegFile::UGPR)
106     }
107 }
108 
109 struct SM70Instr {
110     inst: [u32; 4],
111     sm: u8,
112 }
113 
114 impl BitViewable for SM70Instr {
bits(&self) -> usize115     fn bits(&self) -> usize {
116         BitView::new(&self.inst).bits()
117     }
118 
get_bit_range_u64(&self, range: Range<usize>) -> u64119     fn get_bit_range_u64(&self, range: Range<usize>) -> u64 {
120         BitView::new(&self.inst).get_bit_range_u64(range)
121     }
122 }
123 
124 impl BitMutViewable for SM70Instr {
set_bit_range_u64(&mut self, range: Range<usize>, val: u64)125     fn set_bit_range_u64(&mut self, range: Range<usize>, val: u64) {
126         BitMutView::new(&mut self.inst).set_bit_range_u64(range, val);
127     }
128 }
129 
130 impl SetFieldU64 for SM70Instr {
set_field_u64(&mut self, range: Range<usize>, val: u64)131     fn set_field_u64(&mut self, range: Range<usize>, val: u64) {
132         BitMutView::new(&mut self.inst).set_field_u64(range, val);
133     }
134 }
135 
136 impl SM70Instr {
set_bit(&mut self, bit: usize, val: bool)137     fn set_bit(&mut self, bit: usize, val: bool) {
138         BitMutView::new(&mut self.inst).set_bit(bit, val);
139     }
140 
set_src_imm(&mut self, range: Range<usize>, u: &u32)141     fn set_src_imm(&mut self, range: Range<usize>, u: &u32) {
142         assert!(range.len() == 32);
143         self.set_field(range, *u);
144     }
145 
set_reg(&mut self, range: Range<usize>, reg: RegRef)146     fn set_reg(&mut self, range: Range<usize>, reg: RegRef) {
147         assert!(range.len() == 8);
148         assert!(reg.file() == RegFile::GPR);
149         self.set_field(range, reg.base_idx());
150     }
151 
set_ureg(&mut self, range: Range<usize>, reg: RegRef)152     fn set_ureg(&mut self, range: Range<usize>, reg: RegRef) {
153         assert!(self.sm >= 75);
154         assert!(range.len() == 8);
155         assert!(reg.file() == RegFile::UGPR);
156         assert!(reg.base_idx() <= 63);
157         self.set_field(range, reg.base_idx());
158     }
159 
set_pred_reg(&mut self, range: Range<usize>, reg: RegRef)160     fn set_pred_reg(&mut self, range: Range<usize>, reg: RegRef) {
161         assert!(range.len() == 3);
162         assert!(reg.file() == RegFile::Pred);
163         assert!(reg.base_idx() <= 7);
164         assert!(reg.comps() == 1);
165         self.set_field(range, reg.base_idx());
166     }
167 
set_reg_src(&mut self, range: Range<usize>, src: Src)168     fn set_reg_src(&mut self, range: Range<usize>, src: Src) {
169         assert!(src.src_mod.is_none());
170         match src.src_ref {
171             SrcRef::Zero => self.set_reg(range, RegRef::zero(RegFile::GPR, 1)),
172             SrcRef::Reg(reg) => self.set_reg(range, reg),
173             _ => panic!("Not a register"),
174         }
175     }
176 
set_pred_dst(&mut self, range: Range<usize>, dst: Dst)177     fn set_pred_dst(&mut self, range: Range<usize>, dst: Dst) {
178         match dst {
179             Dst::None => {
180                 self.set_pred_reg(range, RegRef::zero(RegFile::Pred, 1));
181             }
182             Dst::Reg(reg) => self.set_pred_reg(range, reg),
183             _ => panic!("Not a register"),
184         }
185     }
186 
set_pred_src(&mut self, range: Range<usize>, not_bit: usize, src: Src)187     fn set_pred_src(&mut self, range: Range<usize>, not_bit: usize, src: Src) {
188         // The default for predicates is true
189         let true_reg = RegRef::new(RegFile::Pred, 7, 1);
190 
191         let (not, reg) = match src.src_ref {
192             SrcRef::True => (false, true_reg),
193             SrcRef::False => (true, true_reg),
194             SrcRef::Reg(reg) => (false, reg),
195             _ => panic!("Not a register"),
196         };
197         self.set_pred_reg(range, reg);
198         self.set_bit(not_bit, not ^ src_mod_is_bnot(src.src_mod));
199     }
200 
set_src_cb(&mut self, range: Range<usize>, cb: &CBufRef)201     fn set_src_cb(&mut self, range: Range<usize>, cb: &CBufRef) {
202         let mut v = BitMutView::new_subset(self, range);
203         v.set_field(0..16, cb.offset);
204         if let CBuf::Binding(idx) = cb.buf {
205             v.set_field(16..21, idx);
206         } else {
207             panic!("Must be a bound constant buffer");
208         }
209     }
210 
211     #[allow(dead_code)]
set_src_cx(&mut self, range: Range<usize>, cb: &CBufRef)212     fn set_src_cx(&mut self, range: Range<usize>, cb: &CBufRef) {
213         assert!(self.sm >= 75);
214 
215         let mut v = BitMutView::new_subset(self, range);
216         if let CBuf::BindlessGPR(reg) = cb.buf {
217             assert!(reg.base_idx() <= 63);
218             assert!(reg.file() == RegFile::UGPR);
219             v.set_field(0..8, reg.base_idx());
220         } else {
221             panic!("Must be a bound constant buffer");
222         }
223         assert!(cb.offset % 4 == 0);
224         v.set_field(8..22, cb.offset / 4);
225     }
226 
set_opcode(&mut self, opcode: u16)227     fn set_opcode(&mut self, opcode: u16) {
228         self.set_field(0..12, opcode);
229     }
230 
set_pred(&mut self, pred: &Pred)231     fn set_pred(&mut self, pred: &Pred) {
232         assert!(!pred.is_false());
233         self.set_pred_reg(
234             12..15,
235             match pred.pred_ref {
236                 PredRef::None => RegRef::zero(RegFile::Pred, 1),
237                 PredRef::Reg(reg) => reg,
238                 PredRef::SSA(_) => panic!("SSA values must be lowered"),
239             },
240         );
241         self.set_bit(15, pred.pred_inv);
242     }
243 
set_dst(&mut self, dst: Dst)244     fn set_dst(&mut self, dst: Dst) {
245         match dst {
246             Dst::None => self.set_reg(16..24, RegRef::zero(RegFile::GPR, 1)),
247             Dst::Reg(reg) => self.set_reg(16..24, reg),
248             _ => panic!("Not a register"),
249         }
250     }
251 
set_bar_reg(&mut self, range: Range<usize>, reg: RegRef)252     fn set_bar_reg(&mut self, range: Range<usize>, reg: RegRef) {
253         assert!(range.len() == 4);
254         assert!(reg.file() == RegFile::Bar);
255         assert!(reg.comps() == 1);
256         self.set_field(range, reg.base_idx());
257     }
258 
set_bar_dst(&mut self, range: Range<usize>, dst: Dst)259     fn set_bar_dst(&mut self, range: Range<usize>, dst: Dst) {
260         self.set_bar_reg(range, *dst.as_reg().unwrap());
261     }
262 
set_bar_src(&mut self, range: Range<usize>, src: Src)263     fn set_bar_src(&mut self, range: Range<usize>, src: Src) {
264         assert!(src.src_mod.is_none());
265         self.set_bar_reg(range, *src.src_ref.as_reg().unwrap());
266     }
267 
set_alu_reg( &mut self, range: Range<usize>, abs_bit: usize, neg_bit: usize, reg: &ALURegRef, )268     fn set_alu_reg(
269         &mut self,
270         range: Range<usize>,
271         abs_bit: usize,
272         neg_bit: usize,
273         reg: &ALURegRef,
274     ) {
275         self.set_reg(range, reg.reg);
276         self.set_bit(abs_bit, reg.abs);
277         self.set_bit(neg_bit, reg.neg);
278     }
279 
set_alu_ureg( &mut self, range: Range<usize>, abs_bit: usize, neg_bit: usize, reg: &ALURegRef, )280     fn set_alu_ureg(
281         &mut self,
282         range: Range<usize>,
283         abs_bit: usize,
284         neg_bit: usize,
285         reg: &ALURegRef,
286     ) {
287         self.set_ureg(range, reg.reg);
288         self.set_bit(abs_bit, reg.abs);
289         self.set_bit(neg_bit, reg.neg);
290     }
291 
set_alu_cb( &mut self, range: Range<usize>, abs_bit: usize, neg_bit: usize, cb: &ALUCBufRef, )292     fn set_alu_cb(
293         &mut self,
294         range: Range<usize>,
295         abs_bit: usize,
296         neg_bit: usize,
297         cb: &ALUCBufRef,
298     ) {
299         self.set_src_cb(range, &cb.cb);
300         self.set_bit(abs_bit, cb.abs);
301         self.set_bit(neg_bit, cb.neg);
302     }
303 
set_alu_reg_src( &mut self, range: Range<usize>, abs_bit: usize, neg_bit: usize, src: &ALUSrc, )304     fn set_alu_reg_src(
305         &mut self,
306         range: Range<usize>,
307         abs_bit: usize,
308         neg_bit: usize,
309         src: &ALUSrc,
310     ) {
311         match src {
312             ALUSrc::None => (),
313             ALUSrc::Reg(reg) => self.set_alu_reg(range, abs_bit, neg_bit, reg),
314             _ => panic!("Invalid ALU src0"),
315         }
316     }
317 
encode_alu( &mut self, opcode: u16, dst: Option<Dst>, src0: ALUSrc, src1: ALUSrc, src2: ALUSrc, )318     fn encode_alu(
319         &mut self,
320         opcode: u16,
321         dst: Option<Dst>,
322         src0: ALUSrc,
323         src1: ALUSrc,
324         src2: ALUSrc,
325     ) {
326         if let Some(dst) = dst {
327             self.set_dst(dst);
328         }
329 
330         self.set_alu_reg_src(24..32, 73, 72, &src0);
331 
332         let form = match &src2 {
333             ALUSrc::None | ALUSrc::Reg(_) => {
334                 self.set_alu_reg_src(64..72, 74, 75, &src2);
335                 match &src1 {
336                     ALUSrc::None => 1_u8, // form
337                     ALUSrc::Reg(reg1) => {
338                         self.set_alu_reg(32..40, 62, 63, reg1);
339                         1_u8 // form
340                     }
341                     ALUSrc::UReg(reg1) => {
342                         self.set_alu_ureg(32..40, 62, 63, reg1);
343                         6_u8 // form
344                     }
345                     ALUSrc::Imm32(imm) => {
346                         self.set_src_imm(32..64, imm);
347                         4_u8 // form
348                     }
349                     ALUSrc::CBuf(cb) => {
350                         self.set_alu_cb(38..59, 62, 63, cb);
351                         5_u8 // form
352                     }
353                 }
354             }
355             ALUSrc::UReg(reg2) => {
356                 self.set_alu_ureg(32..40, 62, 63, reg2);
357                 self.set_alu_reg_src(64..72, 74, 75, &src1);
358                 7_u8 // form
359             }
360             ALUSrc::Imm32(imm) => {
361                 self.set_src_imm(32..64, imm);
362                 self.set_alu_reg_src(64..72, 74, 75, &src1);
363                 2_u8 // form
364             }
365             ALUSrc::CBuf(cb) => {
366                 // TODO set_src_cx
367                 self.set_alu_cb(38..59, 62, 63, cb);
368                 self.set_alu_reg_src(64..72, 74, 75, &src1);
369                 3_u8 // form
370             }
371         };
372 
373         self.set_field(0..9, opcode);
374         self.set_field(9..12, form);
375     }
376 
set_instr_deps(&mut self, deps: &InstrDeps)377     fn set_instr_deps(&mut self, deps: &InstrDeps) {
378         self.set_field(105..109, deps.delay);
379         self.set_bit(109, deps.yld);
380         self.set_field(110..113, deps.wr_bar().unwrap_or(7));
381         self.set_field(113..116, deps.rd_bar().unwrap_or(7));
382         self.set_field(116..122, deps.wt_bar_mask);
383         self.set_field(122..126, deps.reuse_mask);
384     }
385 
set_rnd_mode(&mut self, range: Range<usize>, rnd_mode: FRndMode)386     fn set_rnd_mode(&mut self, range: Range<usize>, rnd_mode: FRndMode) {
387         assert!(range.len() == 2);
388         self.set_field(
389             range,
390             match rnd_mode {
391                 FRndMode::NearestEven => 0_u8,
392                 FRndMode::NegInf => 1_u8,
393                 FRndMode::PosInf => 2_u8,
394                 FRndMode::Zero => 3_u8,
395             },
396         );
397     }
398 
encode_fadd(&mut self, op: &OpFAdd)399     fn encode_fadd(&mut self, op: &OpFAdd) {
400         if op.srcs[1].src_ref.as_reg().is_some() {
401             self.encode_alu(
402                 0x021,
403                 Some(op.dst),
404                 ALUSrc::from_src(&op.srcs[0]),
405                 ALUSrc::from_src(&op.srcs[1]),
406                 ALUSrc::None,
407             );
408         } else {
409             self.encode_alu(
410                 0x021,
411                 Some(op.dst),
412                 ALUSrc::from_src(&op.srcs[0]),
413                 ALUSrc::from_src(&Src::new_zero()),
414                 ALUSrc::from_src(&op.srcs[1]),
415             );
416         }
417         self.set_bit(77, op.saturate);
418         self.set_rnd_mode(78..80, op.rnd_mode);
419         self.set_bit(80, op.ftz);
420     }
421 
encode_ffma(&mut self, op: &OpFFma)422     fn encode_ffma(&mut self, op: &OpFFma) {
423         self.encode_alu(
424             0x023,
425             Some(op.dst),
426             ALUSrc::from_src(&op.srcs[0]),
427             ALUSrc::from_src(&op.srcs[1]),
428             ALUSrc::from_src(&op.srcs[2]),
429         );
430         self.set_bit(76, op.dnz);
431         self.set_bit(77, op.saturate);
432         self.set_rnd_mode(78..80, op.rnd_mode);
433         self.set_bit(80, op.ftz);
434     }
435 
encode_fmnmx(&mut self, op: &OpFMnMx)436     fn encode_fmnmx(&mut self, op: &OpFMnMx) {
437         self.encode_alu(
438             0x009,
439             Some(op.dst),
440             ALUSrc::from_src(&op.srcs[0]),
441             ALUSrc::from_src(&op.srcs[1]),
442             ALUSrc::from_src(&Src::new_zero()),
443         );
444         self.set_pred_src(87..90, 90, op.min);
445         self.set_bit(80, op.ftz);
446     }
447 
encode_fmul(&mut self, op: &OpFMul)448     fn encode_fmul(&mut self, op: &OpFMul) {
449         self.encode_alu(
450             0x020,
451             Some(op.dst),
452             ALUSrc::from_src(&op.srcs[0]),
453             ALUSrc::from_src(&op.srcs[1]),
454             ALUSrc::from_src(&Src::new_zero()),
455         );
456         self.set_bit(76, op.dnz);
457         self.set_bit(77, op.saturate);
458         self.set_rnd_mode(78..80, op.rnd_mode);
459         self.set_bit(80, op.ftz);
460         self.set_field(84..87, 0x4_u8) // TODO: PDIV
461     }
462 
set_float_cmp_op(&mut self, range: Range<usize>, op: FloatCmpOp)463     fn set_float_cmp_op(&mut self, range: Range<usize>, op: FloatCmpOp) {
464         assert!(range.len() == 4);
465         self.set_field(
466             range,
467             match op {
468                 FloatCmpOp::OrdLt => 0x01_u8,
469                 FloatCmpOp::OrdEq => 0x02_u8,
470                 FloatCmpOp::OrdLe => 0x03_u8,
471                 FloatCmpOp::OrdGt => 0x04_u8,
472                 FloatCmpOp::OrdNe => 0x05_u8,
473                 FloatCmpOp::OrdGe => 0x06_u8,
474                 FloatCmpOp::UnordLt => 0x09_u8,
475                 FloatCmpOp::UnordEq => 0x0a_u8,
476                 FloatCmpOp::UnordLe => 0x0b_u8,
477                 FloatCmpOp::UnordGt => 0x0c_u8,
478                 FloatCmpOp::UnordNe => 0x0d_u8,
479                 FloatCmpOp::UnordGe => 0x0e_u8,
480                 FloatCmpOp::IsNum => 0x07_u8,
481                 FloatCmpOp::IsNan => 0x08_u8,
482             },
483         );
484     }
485 
encode_fset(&mut self, op: &OpFSet)486     fn encode_fset(&mut self, op: &OpFSet) {
487         self.encode_alu(
488             0x00a,
489             Some(op.dst),
490             ALUSrc::from_src(&op.srcs[0]),
491             ALUSrc::from_src(&op.srcs[1]),
492             ALUSrc::None,
493         );
494         self.set_float_cmp_op(76..80, op.cmp_op);
495         self.set_bit(80, op.ftz);
496         self.set_field(87..90, 0x7_u8); // TODO: src predicate
497     }
498 
set_pred_set_op(&mut self, range: Range<usize>, op: PredSetOp)499     fn set_pred_set_op(&mut self, range: Range<usize>, op: PredSetOp) {
500         assert!(range.len() == 2);
501         self.set_field(
502             range,
503             match op {
504                 PredSetOp::And => 0_u8,
505                 PredSetOp::Or => 1_u8,
506                 PredSetOp::Xor => 2_u8,
507             },
508         );
509     }
510 
encode_fsetp(&mut self, op: &OpFSetP)511     fn encode_fsetp(&mut self, op: &OpFSetP) {
512         self.encode_alu(
513             0x00b,
514             None,
515             ALUSrc::from_src(&op.srcs[0]),
516             ALUSrc::from_src(&op.srcs[1]),
517             ALUSrc::None,
518         );
519 
520         self.set_pred_set_op(74..76, op.set_op);
521         self.set_float_cmp_op(76..80, op.cmp_op);
522         self.set_bit(80, op.ftz);
523 
524         self.set_pred_dst(81..84, op.dst);
525         self.set_pred_dst(84..87, Dst::None); // dst1
526 
527         self.set_pred_src(87..90, 90, op.accum);
528     }
529 
encode_fswzadd(&mut self, op: &OpFSwzAdd)530     fn encode_fswzadd(&mut self, op: &OpFSwzAdd) {
531         self.set_opcode(0x822);
532         self.set_dst(op.dst);
533 
534         self.set_reg_src(24..32, op.srcs[0]);
535         self.set_reg_src(64..72, op.srcs[1]);
536 
537         let mut subop = 0x0_u8;
538 
539         for (i, swz_op) in op.ops.iter().enumerate() {
540             let swz_op = match swz_op {
541                 FSwzAddOp::Add => 0,
542                 FSwzAddOp::SubRight => 2,
543                 FSwzAddOp::SubLeft => 1,
544                 FSwzAddOp::MoveLeft => 3,
545             };
546 
547             subop |= swz_op << ((op.ops.len() - i - 1) * 2);
548         }
549 
550         self.set_field(32..40, subop);
551 
552         self.set_bit(77, false); // NDV
553         self.set_rnd_mode(78..80, op.rnd_mode);
554         self.set_bit(80, op.ftz);
555     }
556 
encode_mufu(&mut self, op: &OpMuFu)557     fn encode_mufu(&mut self, op: &OpMuFu) {
558         self.encode_alu(
559             0x108,
560             Some(op.dst),
561             ALUSrc::None,
562             ALUSrc::from_src(&op.src),
563             ALUSrc::None,
564         );
565         self.set_field(
566             74..80,
567             match op.op {
568                 MuFuOp::Cos => 0_u8,
569                 MuFuOp::Sin => 1_u8,
570                 MuFuOp::Exp2 => 2_u8,
571                 MuFuOp::Log2 => 3_u8,
572                 MuFuOp::Rcp => 4_u8,
573                 MuFuOp::Rsq => 5_u8,
574                 MuFuOp::Rcp64H => 6_u8,
575                 MuFuOp::Rsq64H => 7_u8,
576                 MuFuOp::Sqrt => 8_u8,
577                 MuFuOp::Tanh => 9_u8,
578             },
579         );
580     }
581 
encode_dadd(&mut self, op: &OpDAdd)582     fn encode_dadd(&mut self, op: &OpDAdd) {
583         self.encode_alu(
584             0x029,
585             Some(op.dst),
586             ALUSrc::from_src(&op.srcs[0]),
587             ALUSrc::None,
588             ALUSrc::from_src(&op.srcs[1]),
589         );
590         self.set_rnd_mode(78..80, op.rnd_mode);
591     }
592 
encode_dfma(&mut self, op: &OpDFma)593     fn encode_dfma(&mut self, op: &OpDFma) {
594         self.encode_alu(
595             0x02b,
596             Some(op.dst),
597             ALUSrc::from_src(&op.srcs[0]),
598             ALUSrc::from_src(&op.srcs[1]),
599             ALUSrc::from_src(&op.srcs[2]),
600         );
601         self.set_rnd_mode(78..80, op.rnd_mode);
602     }
603 
encode_dmul(&mut self, op: &OpDMul)604     fn encode_dmul(&mut self, op: &OpDMul) {
605         self.encode_alu(
606             0x028,
607             Some(op.dst),
608             ALUSrc::from_src(&op.srcs[0]),
609             ALUSrc::from_src(&op.srcs[1]),
610             ALUSrc::None,
611         );
612         self.set_rnd_mode(78..80, op.rnd_mode);
613     }
614 
encode_dsetp(&mut self, op: &OpDSetP)615     fn encode_dsetp(&mut self, op: &OpDSetP) {
616         match op.srcs[1].src_ref {
617             SrcRef::Reg(_) | SrcRef::Zero => {
618                 self.encode_alu(
619                     0x02a,
620                     None,
621                     ALUSrc::from_src(&op.srcs[0]),
622                     ALUSrc::from_src(&op.srcs[1]),
623                     ALUSrc::None,
624                 );
625             }
626             _ => {
627                 self.encode_alu(
628                     0x02a,
629                     None,
630                     ALUSrc::from_src(&op.srcs[0]),
631                     ALUSrc::None,
632                     ALUSrc::from_src(&op.srcs[1]),
633                 );
634             }
635         }
636 
637         self.set_pred_set_op(74..76, op.set_op);
638         self.set_float_cmp_op(76..80, op.cmp_op);
639 
640         self.set_pred_dst(81..84, op.dst);
641         self.set_pred_dst(84..87, Dst::None); /* dst1 */
642 
643         self.set_pred_src(87..90, 90, op.accum);
644     }
645 
encode_bmsk(&mut self, op: &OpBMsk)646     fn encode_bmsk(&mut self, op: &OpBMsk) {
647         self.encode_alu(
648             0x01b,
649             Some(op.dst),
650             ALUSrc::from_src(&op.pos),
651             ALUSrc::from_src(&op.width),
652             ALUSrc::None,
653         );
654 
655         self.set_bit(75, op.wrap);
656     }
657 
encode_brev(&mut self, op: &OpBRev)658     fn encode_brev(&mut self, op: &OpBRev) {
659         self.encode_alu(
660             0x101,
661             Some(op.dst),
662             ALUSrc::None,
663             ALUSrc::from_src(&op.src),
664             ALUSrc::None,
665         );
666     }
667 
encode_flo(&mut self, op: &OpFlo)668     fn encode_flo(&mut self, op: &OpFlo) {
669         self.encode_alu(
670             0x100,
671             Some(op.dst),
672             ALUSrc::None,
673             ALUSrc::from_src(&op.src),
674             ALUSrc::None,
675         );
676         self.set_pred_dst(81..84, Dst::None);
677         self.set_field(74..75, op.return_shift_amount as u8);
678         self.set_field(73..74, op.signed as u8);
679         let not_mod = matches!(op.src.src_mod, SrcMod::BNot);
680         self.set_field(63..64, not_mod)
681     }
682 
encode_iabs(&mut self, op: &OpIAbs)683     fn encode_iabs(&mut self, op: &OpIAbs) {
684         self.encode_alu(
685             0x013,
686             Some(op.dst),
687             ALUSrc::None,
688             ALUSrc::from_src(&op.src),
689             ALUSrc::None,
690         );
691     }
692 
encode_iadd3(&mut self, op: &OpIAdd3)693     fn encode_iadd3(&mut self, op: &OpIAdd3) {
694         // Hardware requires at least one of these be unmodified
695         assert!(op.srcs[0].src_mod.is_none() || op.srcs[1].src_mod.is_none());
696 
697         self.encode_alu(
698             0x010,
699             Some(op.dst),
700             ALUSrc::from_src(&op.srcs[0]),
701             ALUSrc::from_src(&op.srcs[1]),
702             ALUSrc::from_src(&op.srcs[2]),
703         );
704 
705         self.set_pred_dst(81..84, op.overflow[0]);
706         self.set_pred_dst(84..87, op.overflow[1]);
707     }
708 
encode_iadd3x(&mut self, op: &OpIAdd3X)709     fn encode_iadd3x(&mut self, op: &OpIAdd3X) {
710         // Hardware requires at least one of these be unmodified
711         assert!(op.srcs[0].src_mod.is_none() || op.srcs[1].src_mod.is_none());
712 
713         self.encode_alu(
714             0x010,
715             Some(op.dst),
716             ALUSrc::from_src(&op.srcs[0]),
717             ALUSrc::from_src(&op.srcs[1]),
718             ALUSrc::from_src(&op.srcs[2]),
719         );
720 
721         self.set_bit(74, true); // .X
722 
723         self.set_pred_dst(81..84, op.overflow[0]);
724         self.set_pred_dst(84..87, op.overflow[1]);
725 
726         self.set_pred_src(87..90, 90, op.carry[0]);
727         self.set_pred_src(77..80, 80, op.carry[1]);
728     }
729 
encode_idp4(&mut self, op: &OpIDp4)730     fn encode_idp4(&mut self, op: &OpIDp4) {
731         self.encode_alu(
732             0x026,
733             Some(op.dst),
734             ALUSrc::from_src(&op.srcs[0]),
735             ALUSrc::from_src(&op.srcs[1]),
736             ALUSrc::from_src(&op.srcs[2]),
737         );
738 
739         self.set_bit(
740             73,
741             match op.src_types[0] {
742                 IntType::U8 => false,
743                 IntType::I8 => true,
744                 _ => panic!("Invalid DP4 source type"),
745             },
746         );
747         self.set_bit(
748             74,
749             match op.src_types[1] {
750                 IntType::U8 => false,
751                 IntType::I8 => true,
752                 _ => panic!("Invalid DP4 source type"),
753             },
754         );
755     }
756 
encode_imad(&mut self, op: &OpIMad)757     fn encode_imad(&mut self, op: &OpIMad) {
758         self.encode_alu(
759             0x024,
760             Some(op.dst),
761             ALUSrc::from_src(&op.srcs[0]),
762             ALUSrc::from_src(&op.srcs[1]),
763             ALUSrc::from_src(&op.srcs[2]),
764         );
765         self.set_pred_dst(81..84, Dst::None);
766         self.set_bit(73, op.signed);
767     }
768 
encode_imad64(&mut self, op: &OpIMad64)769     fn encode_imad64(&mut self, op: &OpIMad64) {
770         self.encode_alu(
771             0x025,
772             Some(op.dst),
773             ALUSrc::from_src(&op.srcs[0]),
774             ALUSrc::from_src(&op.srcs[1]),
775             ALUSrc::from_src(&op.srcs[2]),
776         );
777         self.set_pred_dst(81..84, Dst::None);
778         self.set_bit(73, op.signed);
779     }
780 
encode_imnmx(&mut self, op: &OpIMnMx)781     fn encode_imnmx(&mut self, op: &OpIMnMx) {
782         self.encode_alu(
783             0x017,
784             Some(op.dst),
785             ALUSrc::from_src(&op.srcs[0]),
786             ALUSrc::from_src(&op.srcs[1]),
787             ALUSrc::None,
788         );
789         self.set_pred_src(87..90, 90, op.min);
790         self.set_bit(
791             73,
792             match op.cmp_type {
793                 IntCmpType::U32 => false,
794                 IntCmpType::I32 => true,
795             },
796         );
797     }
798 
set_int_cmp_op(&mut self, range: Range<usize>, op: IntCmpOp)799     fn set_int_cmp_op(&mut self, range: Range<usize>, op: IntCmpOp) {
800         assert!(range.len() == 3);
801         self.set_field(
802             range,
803             match op {
804                 IntCmpOp::Eq => 2_u8,
805                 IntCmpOp::Ne => 5_u8,
806                 IntCmpOp::Lt => 1_u8,
807                 IntCmpOp::Le => 3_u8,
808                 IntCmpOp::Gt => 4_u8,
809                 IntCmpOp::Ge => 6_u8,
810             },
811         );
812     }
813 
encode_isetp(&mut self, op: &OpISetP)814     fn encode_isetp(&mut self, op: &OpISetP) {
815         self.encode_alu(
816             0x00c,
817             None,
818             ALUSrc::from_src(&op.srcs[0]),
819             ALUSrc::from_src(&op.srcs[1]),
820             ALUSrc::None,
821         );
822 
823         self.set_pred_src(68..71, 71, op.low_cmp);
824         self.set_bit(72, op.ex);
825 
826         self.set_field(
827             73..74,
828             match op.cmp_type {
829                 IntCmpType::U32 => 0_u32,
830                 IntCmpType::I32 => 1_u32,
831             },
832         );
833         self.set_pred_set_op(74..76, op.set_op);
834         self.set_int_cmp_op(76..79, op.cmp_op);
835 
836         self.set_pred_dst(81..84, op.dst);
837         self.set_pred_dst(84..87, Dst::None); // dst1
838 
839         self.set_pred_src(87..90, 90, op.accum);
840     }
841 
encode_lop3(&mut self, op: &OpLop3)842     fn encode_lop3(&mut self, op: &OpLop3) {
843         self.encode_alu(
844             0x012,
845             Some(op.dst),
846             ALUSrc::from_src(&op.srcs[0]),
847             ALUSrc::from_src(&op.srcs[1]),
848             ALUSrc::from_src(&op.srcs[2]),
849         );
850 
851         self.set_field(72..80, op.op.lut);
852         self.set_bit(80, false); // .PAND
853         self.set_field(81..84, 7_u32); // pred
854         self.set_pred_src(87..90, 90, SrcRef::False.into());
855     }
856 
encode_popc(&mut self, op: &OpPopC)857     fn encode_popc(&mut self, op: &OpPopC) {
858         self.encode_alu(
859             0x109,
860             Some(op.dst),
861             ALUSrc::None,
862             ALUSrc::from_src(&op.src),
863             ALUSrc::None,
864         );
865 
866         let not_mod = matches!(op.src.src_mod, SrcMod::BNot);
867         self.set_field(63..64, not_mod)
868     }
869 
encode_shf(&mut self, op: &OpShf)870     fn encode_shf(&mut self, op: &OpShf) {
871         self.encode_alu(
872             0x019,
873             Some(op.dst),
874             ALUSrc::from_src(&op.low),
875             ALUSrc::from_src(&op.shift),
876             ALUSrc::from_src(&op.high),
877         );
878 
879         self.set_field(
880             73..75,
881             match op.data_type {
882                 IntType::I64 => 0_u8,
883                 IntType::U64 => 1_u8,
884                 IntType::I32 => 2_u8,
885                 IntType::U32 => 3_u8,
886                 _ => panic!("Invalid shift data type"),
887             },
888         );
889         self.set_bit(75, op.wrap);
890         self.set_bit(76, op.right);
891         self.set_bit(80, op.dst_high);
892     }
893 
encode_f2f(&mut self, op: &OpF2F)894     fn encode_f2f(&mut self, op: &OpF2F) {
895         assert!(!op.integer_rnd);
896         if op.src_type.bits() <= 32 && op.dst_type.bits() <= 32 {
897             self.encode_alu(
898                 0x104,
899                 Some(op.dst),
900                 ALUSrc::None,
901                 ALUSrc::from_src(&op.src),
902                 ALUSrc::None,
903             );
904         } else {
905             self.encode_alu(
906                 0x110,
907                 Some(op.dst),
908                 ALUSrc::None,
909                 ALUSrc::from_src(&op.src),
910                 ALUSrc::None,
911             );
912         }
913 
914         if op.high {
915             self.set_field(60..62, 1_u8); // .H1
916         }
917 
918         self.set_field(75..77, (op.dst_type.bits() / 8).ilog2());
919         self.set_rnd_mode(78..80, op.rnd_mode);
920         self.set_bit(80, op.ftz);
921         self.set_field(84..86, (op.src_type.bits() / 8).ilog2());
922     }
923 
encode_f2i(&mut self, op: &OpF2I)924     fn encode_f2i(&mut self, op: &OpF2I) {
925         if op.src_type.bits() <= 32 && op.dst_type.bits() <= 32 {
926             self.encode_alu(
927                 0x105,
928                 Some(op.dst),
929                 ALUSrc::None,
930                 ALUSrc::from_src(&op.src),
931                 ALUSrc::None,
932             );
933         } else {
934             self.encode_alu(
935                 0x111,
936                 Some(op.dst),
937                 ALUSrc::None,
938                 ALUSrc::from_src(&op.src),
939                 ALUSrc::None,
940             );
941         }
942 
943         self.set_bit(72, op.dst_type.is_signed());
944         self.set_field(75..77, (op.dst_type.bits() / 8).ilog2());
945         self.set_bit(77, false); // NTZ
946         self.set_rnd_mode(78..80, op.rnd_mode);
947         self.set_bit(80, op.ftz);
948         self.set_field(84..86, (op.src_type.bits() / 8).ilog2());
949     }
950 
encode_i2f(&mut self, op: &OpI2F)951     fn encode_i2f(&mut self, op: &OpI2F) {
952         if op.src_type.bits() <= 32 && op.dst_type.bits() <= 32 {
953             self.encode_alu(
954                 0x106,
955                 Some(op.dst),
956                 ALUSrc::None,
957                 ALUSrc::from_src(&op.src),
958                 ALUSrc::None,
959             );
960         } else {
961             self.encode_alu(
962                 0x112,
963                 Some(op.dst),
964                 ALUSrc::None,
965                 ALUSrc::from_src(&op.src),
966                 ALUSrc::None,
967             );
968         }
969 
970         self.set_field(60..62, 0_u8); // TODO: subop
971         self.set_bit(74, op.src_type.is_signed());
972         self.set_field(75..77, (op.dst_type.bits() / 8).ilog2());
973         self.set_rnd_mode(78..80, op.rnd_mode);
974         self.set_field(84..86, (op.src_type.bits() / 8).ilog2());
975     }
976 
encode_frnd(&mut self, op: &OpFRnd)977     fn encode_frnd(&mut self, op: &OpFRnd) {
978         if op.src_type.bits() <= 32 && op.dst_type.bits() <= 32 {
979             self.encode_alu(
980                 0x107,
981                 Some(op.dst),
982                 ALUSrc::None,
983                 ALUSrc::from_src(&op.src),
984                 ALUSrc::None,
985             );
986         } else {
987             self.encode_alu(
988                 0x113,
989                 Some(op.dst),
990                 ALUSrc::None,
991                 ALUSrc::from_src(&op.src),
992                 ALUSrc::None,
993             );
994         }
995 
996         self.set_field(84..86, (op.src_type.bits() / 8).ilog2());
997         self.set_bit(80, op.ftz);
998         self.set_rnd_mode(78..80, op.rnd_mode);
999         self.set_field(75..77, (op.dst_type.bits() / 8).ilog2());
1000     }
1001 
encode_mov(&mut self, op: &OpMov)1002     fn encode_mov(&mut self, op: &OpMov) {
1003         self.encode_alu(
1004             0x002,
1005             Some(op.dst),
1006             ALUSrc::None,
1007             ALUSrc::from_src(&op.src),
1008             ALUSrc::None,
1009         );
1010         self.set_field(72..76, op.quad_lanes);
1011     }
1012 
encode_prmt(&mut self, op: &OpPrmt)1013     fn encode_prmt(&mut self, op: &OpPrmt) {
1014         self.encode_alu(
1015             0x16,
1016             Some(op.dst),
1017             ALUSrc::from_src(&op.srcs[0]),
1018             ALUSrc::from_src(&op.sel),
1019             ALUSrc::from_src(&op.srcs[1]),
1020         );
1021 
1022         self.set_field(
1023             72..75,
1024             match op.mode {
1025                 PrmtMode::Index => 0_u8,
1026                 PrmtMode::Forward4Extract => 1_u8,
1027                 PrmtMode::Backward4Extract => 2_u8,
1028                 PrmtMode::Replicate8 => 3_u8,
1029                 PrmtMode::EdgeClampLeft => 4_u8,
1030                 PrmtMode::EdgeClampRight => 5_u8,
1031                 PrmtMode::Replicate16 => 6_u8,
1032             },
1033         )
1034     }
1035 
encode_sel(&mut self, op: &OpSel)1036     fn encode_sel(&mut self, op: &OpSel) {
1037         self.encode_alu(
1038             0x007,
1039             Some(op.dst),
1040             ALUSrc::from_src(&op.srcs[0]),
1041             ALUSrc::from_src(&op.srcs[1]),
1042             ALUSrc::None,
1043         );
1044 
1045         self.set_pred_src(87..90, 90, op.cond);
1046     }
1047 
encode_shfl(&mut self, op: &OpShfl)1048     fn encode_shfl(&mut self, op: &OpShfl) {
1049         assert!(op.lane.src_mod.is_none());
1050         assert!(op.c.src_mod.is_none());
1051 
1052         match &op.lane.src_ref {
1053             SrcRef::Zero | SrcRef::Reg(_) => match &op.c.src_ref {
1054                 SrcRef::Zero | SrcRef::Reg(_) => {
1055                     self.set_opcode(0x389);
1056                     self.set_reg_src(32..40, op.lane);
1057                     self.set_reg_src(64..72, op.c);
1058                 }
1059                 SrcRef::Imm32(imm_c) => {
1060                     self.set_opcode(0x589);
1061                     self.set_reg_src(32..40, op.lane);
1062                     self.set_field(40..53, *imm_c & 0x1f1f);
1063                 }
1064                 _ => panic!("Invalid instruction form"),
1065             },
1066             SrcRef::Imm32(imm_lane) => match &op.c.src_ref {
1067                 SrcRef::Zero | SrcRef::Reg(_) => {
1068                     self.set_opcode(0x989);
1069                     self.set_field(53..58, *imm_lane & 0x1f);
1070                     self.set_reg_src(64..72, op.c);
1071                 }
1072                 SrcRef::Imm32(imm_c) => {
1073                     self.set_opcode(0xf89);
1074                     self.set_field(40..53, *imm_c & 0x1f1f);
1075                     self.set_field(53..58, *imm_lane & 0x1f);
1076                 }
1077                 _ => panic!("Invalid instruction form"),
1078             },
1079             _ => panic!("Invalid instruction form"),
1080         };
1081 
1082         self.set_dst(op.dst);
1083         self.set_pred_dst(81..84, op.in_bounds);
1084         self.set_reg_src(24..32, op.src);
1085         self.set_field(
1086             58..60,
1087             match op.op {
1088                 ShflOp::Idx => 0_u8,
1089                 ShflOp::Up => 1_u8,
1090                 ShflOp::Down => 2_u8,
1091                 ShflOp::Bfly => 3_u8,
1092             },
1093         );
1094     }
1095 
encode_plop3(&mut self, op: &OpPLop3)1096     fn encode_plop3(&mut self, op: &OpPLop3) {
1097         self.set_opcode(0x81c);
1098         self.set_field(16..24, op.ops[1].lut);
1099         self.set_field(64..67, op.ops[0].lut & 0x7);
1100         self.set_field(72..77, op.ops[0].lut >> 3);
1101 
1102         self.set_pred_src(68..71, 71, op.srcs[2]);
1103 
1104         self.set_pred_src(77..80, 80, op.srcs[1]);
1105         self.set_pred_dst(81..84, op.dsts[0]);
1106         self.set_pred_dst(84..87, op.dsts[1]);
1107 
1108         self.set_pred_src(87..90, 90, op.srcs[0]);
1109     }
1110 
set_tex_dim(&mut self, range: Range<usize>, dim: TexDim)1111     fn set_tex_dim(&mut self, range: Range<usize>, dim: TexDim) {
1112         assert!(range.len() == 3);
1113         self.set_field(
1114             range,
1115             match dim {
1116                 TexDim::_1D => 0_u8,
1117                 TexDim::Array1D => 4_u8,
1118                 TexDim::_2D => 1_u8,
1119                 TexDim::Array2D => 5_u8,
1120                 TexDim::_3D => 2_u8,
1121                 TexDim::Cube => 3_u8,
1122                 TexDim::ArrayCube => 7_u8,
1123             },
1124         );
1125     }
1126 
set_tex_lod_mode(&mut self, range: Range<usize>, lod_mode: TexLodMode)1127     fn set_tex_lod_mode(&mut self, range: Range<usize>, lod_mode: TexLodMode) {
1128         assert!(range.len() == 3);
1129         self.set_field(
1130             range,
1131             match lod_mode {
1132                 TexLodMode::Auto => 0_u8,
1133                 TexLodMode::Zero => 1_u8,
1134                 TexLodMode::Bias => 2_u8,
1135                 TexLodMode::Lod => 3_u8,
1136                 TexLodMode::Clamp => 4_u8,
1137                 TexLodMode::BiasClamp => 5_u8,
1138             },
1139         );
1140     }
1141 
encode_tex(&mut self, op: &OpTex)1142     fn encode_tex(&mut self, op: &OpTex) {
1143         self.set_opcode(0x361);
1144         self.set_bit(59, true); // .B
1145 
1146         self.set_dst(op.dsts[0]);
1147         if let Dst::Reg(reg) = op.dsts[1] {
1148             self.set_reg(64..72, reg);
1149         } else {
1150             self.set_field(64..72, 255_u8);
1151         }
1152         self.set_pred_dst(81..84, op.resident);
1153 
1154         self.set_reg_src(24..32, op.srcs[0]);
1155         self.set_reg_src(32..40, op.srcs[1]);
1156 
1157         self.set_tex_dim(61..64, op.dim);
1158         self.set_field(72..76, op.mask);
1159         self.set_bit(76, op.offset);
1160         self.set_bit(77, false); // ToDo: NDV
1161         self.set_bit(78, op.z_cmpr);
1162         self.set_field(84..87, 1);
1163         self.set_tex_lod_mode(87..90, op.lod_mode);
1164         self.set_bit(90, false); // TODO: .NODEP
1165     }
1166 
encode_tld(&mut self, op: &OpTld)1167     fn encode_tld(&mut self, op: &OpTld) {
1168         self.set_opcode(0x367);
1169         self.set_bit(59, true); // .B
1170 
1171         self.set_dst(op.dsts[0]);
1172         if let Dst::Reg(reg) = op.dsts[1] {
1173             self.set_reg(64..72, reg);
1174         } else {
1175             self.set_field(64..72, 255_u8);
1176         }
1177         self.set_pred_dst(81..84, op.resident);
1178 
1179         self.set_reg_src(24..32, op.srcs[0]);
1180         self.set_reg_src(32..40, op.srcs[1]);
1181 
1182         self.set_tex_dim(61..64, op.dim);
1183         self.set_field(72..76, op.mask);
1184         self.set_bit(76, op.offset);
1185         // bit 77: .CL
1186         self.set_bit(78, op.is_ms);
1187         // bits 79..81: .F16
1188         assert!(
1189             op.lod_mode == TexLodMode::Zero || op.lod_mode == TexLodMode::Lod
1190         );
1191         self.set_tex_lod_mode(87..90, op.lod_mode);
1192         self.set_bit(90, false); // TODO: .NODEP
1193     }
1194 
encode_tld4(&mut self, op: &OpTld4)1195     fn encode_tld4(&mut self, op: &OpTld4) {
1196         self.set_opcode(0x364);
1197         self.set_bit(59, true); // .B
1198 
1199         self.set_dst(op.dsts[0]);
1200         if let Dst::Reg(reg) = op.dsts[1] {
1201             self.set_reg(64..72, reg);
1202         } else {
1203             self.set_field(64..72, 255_u8);
1204         }
1205         self.set_pred_dst(81..84, op.resident);
1206 
1207         self.set_reg_src(24..32, op.srcs[0]);
1208         self.set_reg_src(32..40, op.srcs[1]);
1209 
1210         self.set_tex_dim(61..64, op.dim);
1211         self.set_field(72..76, op.mask);
1212         self.set_field(
1213             76..78,
1214             match op.offset_mode {
1215                 Tld4OffsetMode::None => 0_u8,
1216                 Tld4OffsetMode::AddOffI => 1_u8,
1217                 Tld4OffsetMode::PerPx => 2_u8,
1218             },
1219         );
1220         // bit 77: .CL
1221         self.set_bit(78, op.z_cmpr);
1222         self.set_bit(84, true); // !.EF
1223         self.set_field(87..89, op.comp);
1224         self.set_bit(90, false); // TODO: .NODEP
1225     }
1226 
encode_tmml(&mut self, op: &OpTmml)1227     fn encode_tmml(&mut self, op: &OpTmml) {
1228         self.set_opcode(0x36a);
1229         self.set_bit(59, true); // .B
1230 
1231         self.set_dst(op.dsts[0]);
1232         if let Dst::Reg(reg) = op.dsts[1] {
1233             self.set_reg(64..72, reg);
1234         } else {
1235             self.set_field(64..72, 255_u8);
1236         }
1237 
1238         self.set_reg_src(24..32, op.srcs[0]);
1239         self.set_reg_src(32..40, op.srcs[1]);
1240 
1241         self.set_tex_dim(61..64, op.dim);
1242         self.set_field(72..76, op.mask);
1243         self.set_bit(77, false); // ToDo: NDV
1244         self.set_bit(90, false); // TODO: .NODEP
1245     }
1246 
encode_txd(&mut self, op: &OpTxd)1247     fn encode_txd(&mut self, op: &OpTxd) {
1248         self.set_opcode(0x36d);
1249         self.set_bit(59, true); // .B
1250 
1251         self.set_dst(op.dsts[0]);
1252         if let Dst::Reg(reg) = op.dsts[1] {
1253             self.set_reg(64..72, reg);
1254         } else {
1255             self.set_field(64..72, 255_u8);
1256         }
1257         self.set_pred_dst(81..84, op.resident);
1258 
1259         self.set_reg_src(24..32, op.srcs[0]);
1260         self.set_reg_src(32..40, op.srcs[1]);
1261 
1262         self.set_tex_dim(61..64, op.dim);
1263         self.set_field(72..76, op.mask);
1264         self.set_bit(76, op.offset);
1265         self.set_bit(77, false); // ToDo: NDV
1266         self.set_bit(90, false); // TODO: .NODEP
1267     }
1268 
encode_txq(&mut self, op: &OpTxq)1269     fn encode_txq(&mut self, op: &OpTxq) {
1270         self.set_opcode(0x370);
1271         self.set_bit(59, true); // .B
1272 
1273         self.set_dst(op.dsts[0]);
1274         if let Dst::Reg(reg) = op.dsts[1] {
1275             self.set_reg(64..72, reg);
1276         } else {
1277             self.set_field(64..72, 255_u8);
1278         }
1279 
1280         self.set_reg_src(24..32, op.src);
1281         self.set_field(
1282             62..64,
1283             match op.query {
1284                 TexQuery::Dimension => 0_u8,
1285                 TexQuery::TextureType => 1_u8,
1286                 TexQuery::SamplerPos => 2_u8,
1287             },
1288         );
1289         self.set_field(72..76, op.mask);
1290     }
1291 
set_image_dim(&mut self, range: Range<usize>, dim: ImageDim)1292     fn set_image_dim(&mut self, range: Range<usize>, dim: ImageDim) {
1293         assert!(range.len() == 3);
1294         self.set_field(
1295             range,
1296             match dim {
1297                 ImageDim::_1D => 0_u8,
1298                 ImageDim::_1DBuffer => 1_u8,
1299                 ImageDim::_1DArray => 2_u8,
1300                 ImageDim::_2D => 3_u8,
1301                 ImageDim::_2DArray => 4_u8,
1302                 ImageDim::_3D => 5_u8,
1303             },
1304         );
1305     }
1306 
set_mem_order(&mut self, order: &MemOrder)1307     fn set_mem_order(&mut self, order: &MemOrder) {
1308         if self.sm < 80 {
1309             let scope = match order {
1310                 MemOrder::Constant => MemScope::System,
1311                 MemOrder::Weak => MemScope::CTA,
1312                 MemOrder::Strong(s) => *s,
1313             };
1314             self.set_field(
1315                 77..79,
1316                 match scope {
1317                     MemScope::CTA => 0_u8,
1318                     // SM => 1_u8,
1319                     MemScope::GPU => 2_u8,
1320                     MemScope::System => 3_u8,
1321                 },
1322             );
1323             self.set_field(
1324                 79..81,
1325                 match order {
1326                     MemOrder::Constant => 0_u8,
1327                     MemOrder::Weak => 1_u8,
1328                     MemOrder::Strong(_) => 2_u8,
1329                     // MMIO => 3_u8,
1330                 },
1331             );
1332         } else {
1333             self.set_field(
1334                 77..81,
1335                 match order {
1336                     MemOrder::Constant => 0x4_u8,
1337                     MemOrder::Weak => 0x0_u8,
1338                     MemOrder::Strong(MemScope::CTA) => 0x5_u8,
1339                     MemOrder::Strong(MemScope::GPU) => 0x7_u8,
1340                     MemOrder::Strong(MemScope::System) => 0xa_u8,
1341                 },
1342             );
1343         }
1344     }
1345 
set_eviction_priority(&mut self, pri: &MemEvictionPriority)1346     fn set_eviction_priority(&mut self, pri: &MemEvictionPriority) {
1347         self.set_field(
1348             84..86,
1349             match pri {
1350                 MemEvictionPriority::First => 0_u8,
1351                 MemEvictionPriority::Normal => 1_u8,
1352                 MemEvictionPriority::Last => 2_u8,
1353                 MemEvictionPriority::Unchanged => 3_u8,
1354             },
1355         );
1356     }
1357 
encode_suld(&mut self, op: &OpSuLd)1358     fn encode_suld(&mut self, op: &OpSuLd) {
1359         self.set_opcode(0x998);
1360 
1361         self.set_dst(op.dst);
1362         self.set_reg_src(24..32, op.coord);
1363         self.set_reg_src(64..72, op.handle);
1364         self.set_pred_dst(81..84, op.resident);
1365 
1366         self.set_image_dim(61..64, op.image_dim);
1367         self.set_mem_order(&op.mem_order);
1368         self.set_eviction_priority(&op.mem_eviction_priority);
1369 
1370         assert!(op.mask == 0x1 || op.mask == 0x3 || op.mask == 0xf);
1371         self.set_field(72..76, op.mask);
1372     }
1373 
encode_sust(&mut self, op: &OpSuSt)1374     fn encode_sust(&mut self, op: &OpSuSt) {
1375         self.set_opcode(0x99c);
1376 
1377         self.set_reg_src(24..32, op.coord);
1378         self.set_reg_src(32..40, op.data);
1379         self.set_reg_src(64..72, op.handle);
1380 
1381         self.set_image_dim(61..64, op.image_dim);
1382         self.set_mem_order(&op.mem_order);
1383         self.set_eviction_priority(&op.mem_eviction_priority);
1384 
1385         assert!(op.mask == 0x1 || op.mask == 0x3 || op.mask == 0xf);
1386         self.set_field(72..76, op.mask);
1387     }
1388 
encode_suatom(&mut self, op: &OpSuAtom)1389     fn encode_suatom(&mut self, op: &OpSuAtom) {
1390         if matches!(op.atom_op, AtomOp::CmpExch) {
1391             self.set_opcode(0x396);
1392         } else {
1393             self.set_opcode(0x394);
1394         }
1395 
1396         self.set_dst(op.dst);
1397         self.set_reg_src(24..32, op.coord);
1398         self.set_reg_src(32..40, op.data);
1399         self.set_reg_src(64..72, op.handle);
1400         self.set_pred_dst(81..84, op.resident);
1401 
1402         self.set_image_dim(61..64, op.image_dim);
1403         self.set_mem_order(&op.mem_order);
1404         self.set_eviction_priority(&op.mem_eviction_priority);
1405 
1406         self.set_bit(72, false); // .BA
1407         self.set_atom_type(73..76, op.atom_type);
1408         self.set_atom_op(87..91, op.atom_op);
1409     }
1410 
set_mem_type(&mut self, range: Range<usize>, mem_type: MemType)1411     fn set_mem_type(&mut self, range: Range<usize>, mem_type: MemType) {
1412         assert!(range.len() == 3);
1413         self.set_field(
1414             range,
1415             match mem_type {
1416                 MemType::U8 => 0_u8,
1417                 MemType::I8 => 1_u8,
1418                 MemType::U16 => 2_u8,
1419                 MemType::I16 => 3_u8,
1420                 MemType::B32 => 4_u8,
1421                 MemType::B64 => 5_u8,
1422                 MemType::B128 => 6_u8,
1423             },
1424         );
1425     }
1426 
set_mem_access(&mut self, access: &MemAccess)1427     fn set_mem_access(&mut self, access: &MemAccess) {
1428         self.set_field(
1429             72..73,
1430             match access.space.addr_type() {
1431                 MemAddrType::A32 => 0_u8,
1432                 MemAddrType::A64 => 1_u8,
1433             },
1434         );
1435         self.set_mem_type(73..76, access.mem_type);
1436         self.set_mem_order(&access.order);
1437         self.set_eviction_priority(&access.eviction_priority);
1438     }
1439 
encode_ldg(&mut self, op: &OpLd)1440     fn encode_ldg(&mut self, op: &OpLd) {
1441         self.set_opcode(0x980);
1442 
1443         self.set_dst(op.dst);
1444         self.set_reg_src(24..32, op.addr);
1445         self.set_field(32..64, op.offset);
1446 
1447         self.set_mem_access(&op.access);
1448     }
1449 
encode_ldl(&mut self, op: &OpLd)1450     fn encode_ldl(&mut self, op: &OpLd) {
1451         self.set_opcode(0x983);
1452         self.set_field(84..87, 1_u8);
1453 
1454         self.set_dst(op.dst);
1455         self.set_reg_src(24..32, op.addr);
1456         self.set_field(40..64, op.offset);
1457 
1458         self.set_mem_type(73..76, op.access.mem_type);
1459         assert!(op.access.order == MemOrder::Strong(MemScope::CTA));
1460         assert!(op.access.eviction_priority == MemEvictionPriority::Normal);
1461     }
1462 
encode_lds(&mut self, op: &OpLd)1463     fn encode_lds(&mut self, op: &OpLd) {
1464         self.set_opcode(0x984);
1465 
1466         self.set_dst(op.dst);
1467         self.set_reg_src(24..32, op.addr);
1468         self.set_field(40..64, op.offset);
1469 
1470         self.set_mem_type(73..76, op.access.mem_type);
1471         assert!(op.access.order == MemOrder::Strong(MemScope::CTA));
1472         assert!(op.access.eviction_priority == MemEvictionPriority::Normal);
1473 
1474         self.set_bit(87, false); // !.ZD - Returns a predicate?
1475     }
1476 
encode_ld(&mut self, op: &OpLd)1477     fn encode_ld(&mut self, op: &OpLd) {
1478         match op.access.space {
1479             MemSpace::Global(_) => self.encode_ldg(op),
1480             MemSpace::Local => self.encode_ldl(op),
1481             MemSpace::Shared => self.encode_lds(op),
1482         }
1483     }
1484 
encode_ldc(&mut self, op: &OpLdc)1485     fn encode_ldc(&mut self, op: &OpLdc) {
1486         self.encode_alu(
1487             0x182,
1488             Some(op.dst),
1489             ALUSrc::from_src(&op.offset),
1490             ALUSrc::from_src(&op.cb),
1491             ALUSrc::None,
1492         );
1493 
1494         self.set_mem_type(73..76, op.mem_type);
1495         self.set_field(78..80, 0_u8); // subop
1496     }
1497 
encode_stg(&mut self, op: &OpSt)1498     fn encode_stg(&mut self, op: &OpSt) {
1499         self.set_opcode(0x385);
1500 
1501         self.set_reg_src(24..32, op.addr);
1502         self.set_field(32..64, op.offset);
1503         self.set_reg_src(64..72, op.data);
1504 
1505         self.set_mem_access(&op.access);
1506     }
1507 
encode_stl(&mut self, op: &OpSt)1508     fn encode_stl(&mut self, op: &OpSt) {
1509         self.set_opcode(0x387);
1510         self.set_field(84..87, 1_u8);
1511 
1512         self.set_reg_src(24..32, op.addr);
1513         self.set_reg_src(32..40, op.data);
1514         self.set_field(40..64, op.offset);
1515 
1516         self.set_mem_type(73..76, op.access.mem_type);
1517         assert!(op.access.order == MemOrder::Strong(MemScope::CTA));
1518         assert!(op.access.eviction_priority == MemEvictionPriority::Normal);
1519     }
1520 
encode_sts(&mut self, op: &OpSt)1521     fn encode_sts(&mut self, op: &OpSt) {
1522         self.set_opcode(0x388);
1523 
1524         self.set_reg_src(24..32, op.addr);
1525         self.set_reg_src(32..40, op.data);
1526         self.set_field(40..64, op.offset);
1527 
1528         self.set_mem_type(73..76, op.access.mem_type);
1529         assert!(op.access.order == MemOrder::Strong(MemScope::CTA));
1530         assert!(op.access.eviction_priority == MemEvictionPriority::Normal);
1531     }
1532 
encode_st(&mut self, op: &OpSt)1533     fn encode_st(&mut self, op: &OpSt) {
1534         match op.access.space {
1535             MemSpace::Global(_) => self.encode_stg(op),
1536             MemSpace::Local => self.encode_stl(op),
1537             MemSpace::Shared => self.encode_sts(op),
1538         }
1539     }
1540 
set_atom_op(&mut self, range: Range<usize>, atom_op: AtomOp)1541     fn set_atom_op(&mut self, range: Range<usize>, atom_op: AtomOp) {
1542         assert!(range.len() == 4);
1543         self.set_field(
1544             range,
1545             match atom_op {
1546                 AtomOp::Add | AtomOp::CmpExch => 0_u8,
1547                 AtomOp::Min => 1_u8,
1548                 AtomOp::Max => 2_u8,
1549                 AtomOp::Inc => 3_u8,
1550                 AtomOp::Dec => 4_u8,
1551                 AtomOp::And => 5_u8,
1552                 AtomOp::Or => 6_u8,
1553                 AtomOp::Xor => 7_u8,
1554                 AtomOp::Exch => 8_u8,
1555             },
1556         );
1557     }
1558 
set_atom_type(&mut self, range: Range<usize>, atom_type: AtomType)1559     fn set_atom_type(&mut self, range: Range<usize>, atom_type: AtomType) {
1560         assert!(range.len() == 3);
1561         self.set_field(
1562             range,
1563             match atom_type {
1564                 AtomType::U32 => 0_u8,
1565                 AtomType::I32 => 1_u8,
1566                 AtomType::U64 => 2_u8,
1567                 AtomType::F32 => 3_u8,
1568                 AtomType::F16x2 => 4_u8,
1569                 AtomType::I64 => 5_u8,
1570                 AtomType::F64 => 6_u8,
1571             },
1572         );
1573     }
1574 
encode_atomg(&mut self, op: &OpAtom)1575     fn encode_atomg(&mut self, op: &OpAtom) {
1576         if op.atom_op == AtomOp::CmpExch {
1577             self.set_opcode(0x38b);
1578 
1579             self.set_reg_src(32..40, op.cmpr);
1580             self.set_reg_src(64..72, op.data);
1581         } else {
1582             self.set_opcode(0x38a);
1583 
1584             self.set_reg_src(32..40, op.data);
1585 
1586             self.set_atom_op(87..91, op.atom_op);
1587         }
1588 
1589         self.set_dst(op.dst);
1590         self.set_pred_dst(81..84, Dst::None);
1591 
1592         self.set_reg_src(24..32, op.addr);
1593         self.set_field(40..64, op.addr_offset);
1594 
1595         self.set_field(
1596             72..73,
1597             match op.mem_space.addr_type() {
1598                 MemAddrType::A32 => 0_u8,
1599                 MemAddrType::A64 => 1_u8,
1600             },
1601         );
1602 
1603         self.set_atom_type(73..76, op.atom_type);
1604         self.set_mem_order(&op.mem_order);
1605         self.set_eviction_priority(&op.mem_eviction_priority);
1606     }
1607 
encode_atoms(&mut self, op: &OpAtom)1608     fn encode_atoms(&mut self, op: &OpAtom) {
1609         if op.atom_op == AtomOp::CmpExch {
1610             self.set_opcode(0x38d);
1611 
1612             self.set_reg_src(32..40, op.cmpr);
1613             self.set_reg_src(64..72, op.data);
1614         } else {
1615             self.set_opcode(0x38c);
1616 
1617             self.set_reg_src(32..40, op.data);
1618 
1619             self.set_atom_op(87..91, op.atom_op);
1620         }
1621 
1622         self.set_dst(op.dst);
1623         self.set_reg_src(24..32, op.addr);
1624         self.set_field(40..64, op.addr_offset);
1625 
1626         assert!(op.mem_order == MemOrder::Strong(MemScope::CTA));
1627         assert!(op.mem_eviction_priority == MemEvictionPriority::Normal);
1628 
1629         self.set_atom_type(73..76, op.atom_type);
1630     }
1631 
encode_atom(&mut self, op: &OpAtom)1632     fn encode_atom(&mut self, op: &OpAtom) {
1633         match op.mem_space {
1634             MemSpace::Global(_) => self.encode_atomg(op),
1635             MemSpace::Local => panic!("Atomics do not support local"),
1636             MemSpace::Shared => self.encode_atoms(op),
1637         }
1638     }
1639 
encode_al2p(&mut self, op: &OpAL2P)1640     fn encode_al2p(&mut self, op: &OpAL2P) {
1641         self.set_opcode(0x920);
1642 
1643         self.set_dst(op.dst);
1644         self.set_reg_src(24..32, op.offset);
1645 
1646         self.set_field(40..50, op.access.addr);
1647         self.set_field(74..76, 0_u8); // comps
1648         assert!(!op.access.patch);
1649         self.set_bit(79, op.access.output);
1650     }
1651 
encode_ald(&mut self, op: &OpALd)1652     fn encode_ald(&mut self, op: &OpALd) {
1653         self.set_opcode(0x321);
1654 
1655         self.set_dst(op.dst);
1656         self.set_reg_src(32..40, op.vtx);
1657         self.set_reg_src(24..32, op.offset);
1658 
1659         self.set_field(40..50, op.access.addr);
1660         self.set_field(74..76, op.access.comps - 1);
1661         self.set_field(76..77, op.access.patch);
1662         self.set_field(77..78, op.access.phys);
1663         self.set_field(79..80, op.access.output);
1664     }
1665 
encode_ast(&mut self, op: &OpASt)1666     fn encode_ast(&mut self, op: &OpASt) {
1667         self.set_opcode(0x322);
1668 
1669         self.set_reg_src(32..40, op.data);
1670         self.set_reg_src(64..72, op.vtx);
1671         self.set_reg_src(24..32, op.offset);
1672 
1673         self.set_field(40..50, op.access.addr);
1674         self.set_field(74..76, op.access.comps - 1);
1675         self.set_field(76..77, op.access.patch);
1676         self.set_field(77..78, op.access.phys);
1677         assert!(op.access.output);
1678     }
1679 
encode_ipa(&mut self, op: &OpIpa)1680     fn encode_ipa(&mut self, op: &OpIpa) {
1681         self.set_opcode(0x326);
1682 
1683         self.set_dst(op.dst);
1684 
1685         assert!(op.addr % 4 == 0);
1686         self.set_field(64..72, op.addr >> 2);
1687 
1688         self.set_field(
1689             76..78,
1690             match op.loc {
1691                 InterpLoc::Default => 0_u8,
1692                 InterpLoc::Centroid => 1_u8,
1693                 InterpLoc::Offset => 2_u8,
1694             },
1695         );
1696         self.set_field(
1697             78..80,
1698             match op.freq {
1699                 InterpFreq::Pass => 0_u8,
1700                 InterpFreq::Constant => 1_u8,
1701                 InterpFreq::State => 2_u8,
1702                 InterpFreq::PassMulW => {
1703                     panic!("InterpFreq::PassMulW is invalid on SM70+");
1704                 }
1705             },
1706         );
1707 
1708         assert!(op.inv_w.is_zero());
1709         self.set_reg_src(32..40, op.offset);
1710 
1711         // TODO: What is this for?
1712         self.set_pred_dst(81..84, Dst::None);
1713     }
1714 
encode_ldtram(&mut self, op: &OpLdTram)1715     fn encode_ldtram(&mut self, op: &OpLdTram) {
1716         self.set_opcode(0x3ad);
1717         self.set_dst(op.dst);
1718         self.set_ureg(24..32, RegRef::zero(RegFile::UGPR, 1));
1719 
1720         assert!(op.addr % 4 == 0);
1721         self.set_field(64..72, op.addr >> 2);
1722 
1723         self.set_bit(72, op.use_c);
1724 
1725         // Unknown but required
1726         self.set_bit(91, true);
1727     }
1728 
encode_cctl(&mut self, op: &OpCCtl)1729     fn encode_cctl(&mut self, op: &OpCCtl) {
1730         assert!(matches!(op.mem_space, MemSpace::Global(_)));
1731         self.set_opcode(0x98f);
1732 
1733         self.set_reg_src(24..32, op.addr);
1734         self.set_field(32..64, op.addr_offset);
1735 
1736         self.set_field(
1737             87..91,
1738             match op.op {
1739                 CCtlOp::PF1 => 0_u8,
1740                 CCtlOp::PF2 => 1_u8,
1741                 CCtlOp::WB => 2_u8,
1742                 CCtlOp::IV => 3_u8,
1743                 CCtlOp::IVAll => 4_u8,
1744                 CCtlOp::RS => 5_u8,
1745                 CCtlOp::IVAllP => 6_u8,
1746                 CCtlOp::WBAll => 7_u8,
1747                 CCtlOp::WBAllP => 8_u8,
1748             },
1749         );
1750     }
1751 
encode_membar(&mut self, op: &OpMemBar)1752     fn encode_membar(&mut self, op: &OpMemBar) {
1753         self.set_opcode(0x992);
1754 
1755         self.set_bit(72, false); // !.MMIO
1756         self.set_field(
1757             76..79,
1758             match op.scope {
1759                 MemScope::CTA => 0_u8,
1760                 // SM => 1_u8,
1761                 MemScope::GPU => 2_u8,
1762                 MemScope::System => 3_u8,
1763             },
1764         );
1765         self.set_bit(80, false); // .SC
1766     }
1767 
set_rel_offset( &mut self, range: Range<usize>, label: &Label, ip: usize, labels: &HashMap<Label, usize>, )1768     fn set_rel_offset(
1769         &mut self,
1770         range: Range<usize>,
1771         label: &Label,
1772         ip: usize,
1773         labels: &HashMap<Label, usize>,
1774     ) {
1775         let ip = u64::try_from(ip).unwrap();
1776         let ip = i64::try_from(ip).unwrap();
1777 
1778         let target_ip = *labels.get(label).unwrap();
1779         let target_ip = u64::try_from(target_ip).unwrap();
1780         let target_ip = i64::try_from(target_ip).unwrap();
1781 
1782         let rel_offset = target_ip - ip - 4;
1783 
1784         self.set_field(range, rel_offset);
1785     }
1786 
encode_bclear(&mut self, op: &OpBClear)1787     fn encode_bclear(&mut self, op: &OpBClear) {
1788         self.set_opcode(0x355);
1789 
1790         self.set_dst(Dst::None);
1791         self.set_bar_dst(24..28, op.dst);
1792 
1793         self.set_bit(84, true); // .CLEAR
1794     }
1795 
encode_bmov(&mut self, op: &OpBMov)1796     fn encode_bmov(&mut self, op: &OpBMov) {
1797         if dst_is_bar(op.dst) {
1798             self.set_opcode(0x356);
1799 
1800             self.set_bar_dst(24..28, op.dst);
1801             self.set_reg_src(32..40, op.src);
1802 
1803             self.set_bit(84, op.clear);
1804         } else {
1805             self.set_opcode(0x355);
1806 
1807             self.set_dst(op.dst);
1808             self.set_bar_src(24..28, op.src);
1809 
1810             self.set_bit(84, op.clear);
1811         }
1812     }
1813 
encode_break(&mut self, op: &OpBreak)1814     fn encode_break(&mut self, op: &OpBreak) {
1815         self.set_opcode(0x942);
1816         assert!(op.bar_in.src_ref.as_reg() == op.bar_out.as_reg());
1817         self.set_bar_dst(16..20, op.bar_out);
1818         self.set_pred_src(87..90, 90, op.cond);
1819     }
1820 
encode_bssy( &mut self, op: &OpBSSy, ip: usize, labels: &HashMap<Label, usize>, )1821     fn encode_bssy(
1822         &mut self,
1823         op: &OpBSSy,
1824         ip: usize,
1825         labels: &HashMap<Label, usize>,
1826     ) {
1827         self.set_opcode(0x945);
1828         assert!(op.bar_in.src_ref.as_reg() == op.bar_out.as_reg());
1829         self.set_bar_dst(16..20, op.bar_out);
1830         self.set_rel_offset(34..64, &op.target, ip, labels);
1831         self.set_pred_src(87..90, 90, op.cond);
1832     }
1833 
encode_bsync(&mut self, op: &OpBSync)1834     fn encode_bsync(&mut self, op: &OpBSync) {
1835         self.set_opcode(0x941);
1836         self.set_bar_src(16..20, op.bar);
1837         self.set_pred_src(87..90, 90, op.cond);
1838     }
1839 
encode_bra( &mut self, op: &OpBra, ip: usize, labels: &HashMap<Label, usize>, )1840     fn encode_bra(
1841         &mut self,
1842         op: &OpBra,
1843         ip: usize,
1844         labels: &HashMap<Label, usize>,
1845     ) {
1846         self.set_opcode(0x947);
1847         self.set_rel_offset(34..82, &op.target, ip, labels);
1848         self.set_field(87..90, 0x7_u8); // TODO: Pred?
1849     }
1850 
encode_exit(&mut self, _op: &OpExit)1851     fn encode_exit(&mut self, _op: &OpExit) {
1852         self.set_opcode(0x94d);
1853 
1854         // ./.KEEPREFCOUNT/.PREEMPTED/.INVALID3
1855         self.set_field(84..85, false);
1856         self.set_field(85..86, false); // .NO_ATEXIT
1857         self.set_field(87..90, 0x7_u8); // TODO: Predicate
1858         self.set_field(90..91, false); // NOT
1859     }
1860 
encode_warpsync(&mut self, op: &OpWarpSync)1861     fn encode_warpsync(&mut self, op: &OpWarpSync) {
1862         self.encode_alu(
1863             0x148,
1864             None,
1865             ALUSrc::None,
1866             ALUSrc::Imm32(op.mask),
1867             ALUSrc::None,
1868         );
1869         self.set_pred_src(87..90, 90, SrcRef::True.into());
1870     }
1871 
encode_bar(&mut self, _op: &OpBar)1872     fn encode_bar(&mut self, _op: &OpBar) {
1873         self.set_opcode(0xb1d);
1874 
1875         // self.set_opcode(0x31d);
1876 
1877         // // src0 == src1
1878         // self.set_reg_src(32..40, SrcRef::Zero.into());
1879 
1880         // // 00: RED.POPC
1881         // // 01: RED.AND
1882         // // 02: RED.OR
1883         // self.set_field(74..76, 0_u8);
1884 
1885         // // 00: SYNC
1886         // // 01: ARV
1887         // // 02: RED
1888         // // 03: SCAN
1889         // self.set_field(77..79, 0_u8);
1890 
1891         // self.set_pred_src(87..90, 90, SrcRef::True.into());
1892     }
1893 
encode_cs2r(&mut self, op: &OpCS2R)1894     fn encode_cs2r(&mut self, op: &OpCS2R) {
1895         self.set_opcode(0x805);
1896         self.set_dst(op.dst);
1897         self.set_field(72..80, op.idx);
1898         self.set_bit(80, op.dst.as_reg().unwrap().comps() == 2); // .64
1899     }
1900 
encode_isberd(&mut self, op: &OpIsberd)1901     fn encode_isberd(&mut self, op: &OpIsberd) {
1902         self.set_opcode(0x923);
1903         self.set_dst(op.dst);
1904         self.set_reg_src(24..32, op.idx);
1905     }
1906 
encode_kill(&mut self, _op: &OpKill)1907     fn encode_kill(&mut self, _op: &OpKill) {
1908         self.set_opcode(0x95b);
1909         self.set_pred_src(87..90, 90, SrcRef::True.into());
1910     }
1911 
encode_nop(&mut self, _op: &OpNop)1912     fn encode_nop(&mut self, _op: &OpNop) {
1913         self.set_opcode(0x918);
1914     }
1915 
encode_pixld(&mut self, op: &OpPixLd)1916     fn encode_pixld(&mut self, op: &OpPixLd) {
1917         self.set_opcode(0x925);
1918         self.set_dst(op.dst);
1919         self.set_field(
1920             78..81,
1921             match op.val {
1922                 PixVal::MsCount => 0_u8,
1923                 PixVal::CovMask => 1_u8,
1924                 PixVal::CentroidOffset => 2_u8,
1925                 PixVal::MyIndex => 3_u8,
1926                 PixVal::InnerCoverage => 4_u8,
1927             },
1928         );
1929         self.set_pred_dst(81..84, Dst::None);
1930     }
1931 
encode_s2r(&mut self, op: &OpS2R)1932     fn encode_s2r(&mut self, op: &OpS2R) {
1933         self.set_opcode(0x919);
1934         self.set_dst(op.dst);
1935         self.set_field(72..80, op.idx);
1936     }
1937 
encode_out(&mut self, op: &OpOut)1938     fn encode_out(&mut self, op: &OpOut) {
1939         self.encode_alu(
1940             0x124,
1941             Some(op.dst),
1942             ALUSrc::from_src(&op.handle),
1943             ALUSrc::from_src(&op.stream),
1944             ALUSrc::None,
1945         );
1946 
1947         self.set_field(
1948             78..80,
1949             match op.out_type {
1950                 OutType::Emit => 1_u8,
1951                 OutType::Cut => 2_u8,
1952                 OutType::EmitThenCut => 3_u8,
1953             },
1954         );
1955     }
1956 
encode_out_final(&mut self, op: &OpOutFinal)1957     fn encode_out_final(&mut self, op: &OpOutFinal) {
1958         self.encode_alu(
1959             0x124,
1960             Some(Dst::None),
1961             ALUSrc::from_src(&op.handle),
1962             ALUSrc::from_src(&Src::new_zero()),
1963             ALUSrc::None,
1964         );
1965     }
1966 
encode_vote(&mut self, op: &OpVote)1967     fn encode_vote(&mut self, op: &OpVote) {
1968         self.set_opcode(0x806);
1969         self.set_dst(op.ballot);
1970 
1971         self.set_field(
1972             72..74,
1973             match op.op {
1974                 VoteOp::All => 0_u8,
1975                 VoteOp::Any => 1_u8,
1976                 VoteOp::Eq => 2_u8,
1977             },
1978         );
1979 
1980         self.set_pred_dst(81..84, op.vote);
1981         self.set_pred_src(87..90, 90, op.pred);
1982     }
1983 
encode( instr: &Instr, sm: u8, ip: usize, labels: &HashMap<Label, usize>, ) -> [u32; 4]1984     pub fn encode(
1985         instr: &Instr,
1986         sm: u8,
1987         ip: usize,
1988         labels: &HashMap<Label, usize>,
1989     ) -> [u32; 4] {
1990         assert!(sm >= 70);
1991 
1992         let mut si = SM70Instr {
1993             inst: [0; 4],
1994             sm: sm,
1995         };
1996 
1997         match &instr.op {
1998             Op::FAdd(op) => si.encode_fadd(op),
1999             Op::FFma(op) => si.encode_ffma(op),
2000             Op::FMnMx(op) => si.encode_fmnmx(op),
2001             Op::FMul(op) => si.encode_fmul(op),
2002             Op::FSet(op) => si.encode_fset(op),
2003             Op::FSetP(op) => si.encode_fsetp(op),
2004             Op::FSwzAdd(op) => si.encode_fswzadd(op),
2005             Op::DAdd(op) => si.encode_dadd(op),
2006             Op::DFma(op) => si.encode_dfma(op),
2007             Op::DMul(op) => si.encode_dmul(op),
2008             Op::DSetP(op) => si.encode_dsetp(op),
2009             Op::MuFu(op) => si.encode_mufu(op),
2010             Op::BMsk(op) => si.encode_bmsk(op),
2011             Op::BRev(op) => si.encode_brev(op),
2012             Op::Flo(op) => si.encode_flo(op),
2013             Op::IAbs(op) => si.encode_iabs(op),
2014             Op::IAdd3(op) => si.encode_iadd3(op),
2015             Op::IAdd3X(op) => si.encode_iadd3x(op),
2016             Op::IDp4(op) => si.encode_idp4(op),
2017             Op::IMad(op) => si.encode_imad(op),
2018             Op::IMad64(op) => si.encode_imad64(op),
2019             Op::IMnMx(op) => si.encode_imnmx(op),
2020             Op::ISetP(op) => si.encode_isetp(op),
2021             Op::Lop3(op) => si.encode_lop3(op),
2022             Op::PopC(op) => si.encode_popc(op),
2023             Op::Shf(op) => si.encode_shf(op),
2024             Op::F2F(op) => si.encode_f2f(op),
2025             Op::F2I(op) => si.encode_f2i(op),
2026             Op::I2F(op) => si.encode_i2f(op),
2027             Op::FRnd(op) => si.encode_frnd(op),
2028             Op::Mov(op) => si.encode_mov(op),
2029             Op::Prmt(op) => si.encode_prmt(op),
2030             Op::Sel(op) => si.encode_sel(op),
2031             Op::Shfl(op) => si.encode_shfl(op),
2032             Op::PLop3(op) => si.encode_plop3(op),
2033             Op::Tex(op) => si.encode_tex(op),
2034             Op::Tld(op) => si.encode_tld(op),
2035             Op::Tld4(op) => si.encode_tld4(op),
2036             Op::Tmml(op) => si.encode_tmml(op),
2037             Op::Txd(op) => si.encode_txd(op),
2038             Op::Txq(op) => si.encode_txq(op),
2039             Op::SuLd(op) => si.encode_suld(op),
2040             Op::SuSt(op) => si.encode_sust(op),
2041             Op::SuAtom(op) => si.encode_suatom(op),
2042             Op::Ld(op) => si.encode_ld(op),
2043             Op::Ldc(op) => si.encode_ldc(op),
2044             Op::St(op) => si.encode_st(op),
2045             Op::Atom(op) => si.encode_atom(op),
2046             Op::AL2P(op) => si.encode_al2p(op),
2047             Op::ALd(op) => si.encode_ald(op),
2048             Op::ASt(op) => si.encode_ast(op),
2049             Op::Ipa(op) => si.encode_ipa(op),
2050             Op::LdTram(op) => si.encode_ldtram(op),
2051             Op::CCtl(op) => si.encode_cctl(op),
2052             Op::MemBar(op) => si.encode_membar(op),
2053             Op::BClear(op) => si.encode_bclear(op),
2054             Op::BMov(op) => si.encode_bmov(op),
2055             Op::Break(op) => si.encode_break(op),
2056             Op::BSSy(op) => si.encode_bssy(op, ip, labels),
2057             Op::BSync(op) => si.encode_bsync(op),
2058             Op::Bra(op) => si.encode_bra(op, ip, labels),
2059             Op::Exit(op) => si.encode_exit(op),
2060             Op::WarpSync(op) => si.encode_warpsync(op),
2061             Op::Bar(op) => si.encode_bar(op),
2062             Op::CS2R(op) => si.encode_cs2r(op),
2063             Op::Isberd(op) => si.encode_isberd(op),
2064             Op::Kill(op) => si.encode_kill(op),
2065             Op::Nop(op) => si.encode_nop(op),
2066             Op::PixLd(op) => si.encode_pixld(op),
2067             Op::S2R(op) => si.encode_s2r(op),
2068             Op::Out(op) => si.encode_out(op),
2069             Op::OutFinal(op) => si.encode_out_final(op),
2070             Op::Vote(op) => si.encode_vote(op),
2071             _ => panic!("Unhandled instruction"),
2072         }
2073 
2074         si.set_pred(&instr.pred);
2075         si.set_instr_deps(&instr.deps);
2076 
2077         si.inst
2078     }
2079 }
2080 
2081 impl Shader {
encode_sm70(&self) -> Vec<u32>2082     pub fn encode_sm70(&self) -> Vec<u32> {
2083         assert!(self.functions.len() == 1);
2084         let func = &self.functions[0];
2085 
2086         let mut ip = 0_usize;
2087         let mut labels = HashMap::new();
2088         for b in &func.blocks {
2089             labels.insert(b.label, ip);
2090             for instr in &b.instrs {
2091                 if let Op::Nop(op) = &instr.op {
2092                     if let Some(label) = op.label {
2093                         labels.insert(label, ip);
2094                     }
2095                 }
2096                 ip += 4;
2097             }
2098         }
2099 
2100         let mut encoded = Vec::new();
2101         for b in &func.blocks {
2102             for instr in &b.instrs {
2103                 let e = SM70Instr::encode(
2104                     instr,
2105                     self.info.sm,
2106                     encoded.len(),
2107                     &labels,
2108                 );
2109                 encoded.extend_from_slice(&e[..]);
2110             }
2111         }
2112         encoded
2113     }
2114 }
2115