1 // Copyright © 2022 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3
4 use crate::ir::*;
5 use bitview::*;
6
7 use std::collections::HashMap;
8 use std::ops::Range;
9
10 struct ALURegRef {
11 pub reg: RegRef,
12 pub abs: bool,
13 pub neg: bool,
14 }
15
16 struct ALUCBufRef {
17 pub cb: CBufRef,
18 pub abs: bool,
19 pub neg: bool,
20 }
21
22 enum ALUSrc {
23 None,
24 Imm32(u32),
25 Reg(ALURegRef),
26 UReg(ALURegRef),
27 CBuf(ALUCBufRef),
28 }
29
src_mod_has_abs(src_mod: SrcMod) -> bool30 fn src_mod_has_abs(src_mod: SrcMod) -> bool {
31 match src_mod {
32 SrcMod::None | SrcMod::FNeg | SrcMod::INeg | SrcMod::BNot => false,
33 SrcMod::FAbs | SrcMod::FNegAbs => true,
34 }
35 }
36
src_mod_has_neg(src_mod: SrcMod) -> bool37 fn src_mod_has_neg(src_mod: SrcMod) -> bool {
38 match src_mod {
39 SrcMod::None | SrcMod::FAbs => false,
40 SrcMod::FNeg | SrcMod::FNegAbs | SrcMod::INeg | SrcMod::BNot => true,
41 }
42 }
43
src_mod_is_bnot(src_mod: SrcMod) -> bool44 fn src_mod_is_bnot(src_mod: SrcMod) -> bool {
45 match src_mod {
46 SrcMod::None => false,
47 SrcMod::BNot => true,
48 _ => panic!("Not an predicate source modifier"),
49 }
50 }
51
dst_is_bar(dst: Dst) -> bool52 fn dst_is_bar(dst: Dst) -> bool {
53 match dst {
54 Dst::None => false,
55 Dst::SSA(ssa) => ssa.file() == RegFile::Bar,
56 Dst::Reg(reg) => reg.file() == RegFile::Bar,
57 }
58 }
59
60 impl ALUSrc {
from_src_file(src: &Src, file: RegFile) -> ALUSrc61 fn from_src_file(src: &Src, file: RegFile) -> ALUSrc {
62 match src.src_ref {
63 SrcRef::Zero | SrcRef::Reg(_) => {
64 let reg = match src.src_ref {
65 SrcRef::Zero => RegRef::zero(file, 1),
66 SrcRef::Reg(reg) => reg,
67 _ => panic!("Invalid source ref"),
68 };
69 assert!(reg.comps() <= 2);
70 assert!(reg.file() == file);
71 let alu_ref = ALURegRef {
72 reg: reg,
73 abs: src_mod_has_abs(src.src_mod),
74 neg: src_mod_has_neg(src.src_mod),
75 };
76 match reg.file() {
77 RegFile::GPR => ALUSrc::Reg(alu_ref),
78 RegFile::UGPR => ALUSrc::UReg(alu_ref),
79 _ => panic!("Invalid ALU register file"),
80 }
81 }
82 SrcRef::Imm32(i) => {
83 assert!(src.src_mod.is_none());
84 ALUSrc::Imm32(i)
85 }
86 SrcRef::CBuf(cb) => {
87 let alu_ref = ALUCBufRef {
88 cb: cb,
89 abs: src_mod_has_abs(src.src_mod),
90 neg: src_mod_has_neg(src.src_mod),
91 };
92 ALUSrc::CBuf(alu_ref)
93 }
94 _ => panic!("Invalid ALU source"),
95 }
96 }
97
from_src(src: &Src) -> ALUSrc98 pub fn from_src(src: &Src) -> ALUSrc {
99 ALUSrc::from_src_file(src, RegFile::GPR)
100 }
101
102 #[allow(dead_code)]
from_usrc(src: &Src) -> ALUSrc103 pub fn from_usrc(src: &Src) -> ALUSrc {
104 assert!(src.is_uniform());
105 ALUSrc::from_src_file(src, RegFile::UGPR)
106 }
107 }
108
109 struct SM70Instr {
110 inst: [u32; 4],
111 sm: u8,
112 }
113
114 impl BitViewable for SM70Instr {
bits(&self) -> usize115 fn bits(&self) -> usize {
116 BitView::new(&self.inst).bits()
117 }
118
get_bit_range_u64(&self, range: Range<usize>) -> u64119 fn get_bit_range_u64(&self, range: Range<usize>) -> u64 {
120 BitView::new(&self.inst).get_bit_range_u64(range)
121 }
122 }
123
124 impl BitMutViewable for SM70Instr {
set_bit_range_u64(&mut self, range: Range<usize>, val: u64)125 fn set_bit_range_u64(&mut self, range: Range<usize>, val: u64) {
126 BitMutView::new(&mut self.inst).set_bit_range_u64(range, val);
127 }
128 }
129
130 impl SetFieldU64 for SM70Instr {
set_field_u64(&mut self, range: Range<usize>, val: u64)131 fn set_field_u64(&mut self, range: Range<usize>, val: u64) {
132 BitMutView::new(&mut self.inst).set_field_u64(range, val);
133 }
134 }
135
136 impl SM70Instr {
set_bit(&mut self, bit: usize, val: bool)137 fn set_bit(&mut self, bit: usize, val: bool) {
138 BitMutView::new(&mut self.inst).set_bit(bit, val);
139 }
140
set_src_imm(&mut self, range: Range<usize>, u: &u32)141 fn set_src_imm(&mut self, range: Range<usize>, u: &u32) {
142 assert!(range.len() == 32);
143 self.set_field(range, *u);
144 }
145
set_reg(&mut self, range: Range<usize>, reg: RegRef)146 fn set_reg(&mut self, range: Range<usize>, reg: RegRef) {
147 assert!(range.len() == 8);
148 assert!(reg.file() == RegFile::GPR);
149 self.set_field(range, reg.base_idx());
150 }
151
set_ureg(&mut self, range: Range<usize>, reg: RegRef)152 fn set_ureg(&mut self, range: Range<usize>, reg: RegRef) {
153 assert!(self.sm >= 75);
154 assert!(range.len() == 8);
155 assert!(reg.file() == RegFile::UGPR);
156 assert!(reg.base_idx() <= 63);
157 self.set_field(range, reg.base_idx());
158 }
159
set_pred_reg(&mut self, range: Range<usize>, reg: RegRef)160 fn set_pred_reg(&mut self, range: Range<usize>, reg: RegRef) {
161 assert!(range.len() == 3);
162 assert!(reg.file() == RegFile::Pred);
163 assert!(reg.base_idx() <= 7);
164 assert!(reg.comps() == 1);
165 self.set_field(range, reg.base_idx());
166 }
167
set_reg_src(&mut self, range: Range<usize>, src: Src)168 fn set_reg_src(&mut self, range: Range<usize>, src: Src) {
169 assert!(src.src_mod.is_none());
170 match src.src_ref {
171 SrcRef::Zero => self.set_reg(range, RegRef::zero(RegFile::GPR, 1)),
172 SrcRef::Reg(reg) => self.set_reg(range, reg),
173 _ => panic!("Not a register"),
174 }
175 }
176
set_pred_dst(&mut self, range: Range<usize>, dst: Dst)177 fn set_pred_dst(&mut self, range: Range<usize>, dst: Dst) {
178 match dst {
179 Dst::None => {
180 self.set_pred_reg(range, RegRef::zero(RegFile::Pred, 1));
181 }
182 Dst::Reg(reg) => self.set_pred_reg(range, reg),
183 _ => panic!("Not a register"),
184 }
185 }
186
set_pred_src(&mut self, range: Range<usize>, not_bit: usize, src: Src)187 fn set_pred_src(&mut self, range: Range<usize>, not_bit: usize, src: Src) {
188 // The default for predicates is true
189 let true_reg = RegRef::new(RegFile::Pred, 7, 1);
190
191 let (not, reg) = match src.src_ref {
192 SrcRef::True => (false, true_reg),
193 SrcRef::False => (true, true_reg),
194 SrcRef::Reg(reg) => (false, reg),
195 _ => panic!("Not a register"),
196 };
197 self.set_pred_reg(range, reg);
198 self.set_bit(not_bit, not ^ src_mod_is_bnot(src.src_mod));
199 }
200
set_src_cb(&mut self, range: Range<usize>, cb: &CBufRef)201 fn set_src_cb(&mut self, range: Range<usize>, cb: &CBufRef) {
202 let mut v = BitMutView::new_subset(self, range);
203 v.set_field(0..16, cb.offset);
204 if let CBuf::Binding(idx) = cb.buf {
205 v.set_field(16..21, idx);
206 } else {
207 panic!("Must be a bound constant buffer");
208 }
209 }
210
211 #[allow(dead_code)]
set_src_cx(&mut self, range: Range<usize>, cb: &CBufRef)212 fn set_src_cx(&mut self, range: Range<usize>, cb: &CBufRef) {
213 assert!(self.sm >= 75);
214
215 let mut v = BitMutView::new_subset(self, range);
216 if let CBuf::BindlessGPR(reg) = cb.buf {
217 assert!(reg.base_idx() <= 63);
218 assert!(reg.file() == RegFile::UGPR);
219 v.set_field(0..8, reg.base_idx());
220 } else {
221 panic!("Must be a bound constant buffer");
222 }
223 assert!(cb.offset % 4 == 0);
224 v.set_field(8..22, cb.offset / 4);
225 }
226
set_opcode(&mut self, opcode: u16)227 fn set_opcode(&mut self, opcode: u16) {
228 self.set_field(0..12, opcode);
229 }
230
set_pred(&mut self, pred: &Pred)231 fn set_pred(&mut self, pred: &Pred) {
232 assert!(!pred.is_false());
233 self.set_pred_reg(
234 12..15,
235 match pred.pred_ref {
236 PredRef::None => RegRef::zero(RegFile::Pred, 1),
237 PredRef::Reg(reg) => reg,
238 PredRef::SSA(_) => panic!("SSA values must be lowered"),
239 },
240 );
241 self.set_bit(15, pred.pred_inv);
242 }
243
set_dst(&mut self, dst: Dst)244 fn set_dst(&mut self, dst: Dst) {
245 match dst {
246 Dst::None => self.set_reg(16..24, RegRef::zero(RegFile::GPR, 1)),
247 Dst::Reg(reg) => self.set_reg(16..24, reg),
248 _ => panic!("Not a register"),
249 }
250 }
251
set_bar_reg(&mut self, range: Range<usize>, reg: RegRef)252 fn set_bar_reg(&mut self, range: Range<usize>, reg: RegRef) {
253 assert!(range.len() == 4);
254 assert!(reg.file() == RegFile::Bar);
255 assert!(reg.comps() == 1);
256 self.set_field(range, reg.base_idx());
257 }
258
set_bar_dst(&mut self, range: Range<usize>, dst: Dst)259 fn set_bar_dst(&mut self, range: Range<usize>, dst: Dst) {
260 self.set_bar_reg(range, *dst.as_reg().unwrap());
261 }
262
set_bar_src(&mut self, range: Range<usize>, src: Src)263 fn set_bar_src(&mut self, range: Range<usize>, src: Src) {
264 assert!(src.src_mod.is_none());
265 self.set_bar_reg(range, *src.src_ref.as_reg().unwrap());
266 }
267
set_alu_reg( &mut self, range: Range<usize>, abs_bit: usize, neg_bit: usize, reg: &ALURegRef, )268 fn set_alu_reg(
269 &mut self,
270 range: Range<usize>,
271 abs_bit: usize,
272 neg_bit: usize,
273 reg: &ALURegRef,
274 ) {
275 self.set_reg(range, reg.reg);
276 self.set_bit(abs_bit, reg.abs);
277 self.set_bit(neg_bit, reg.neg);
278 }
279
set_alu_ureg( &mut self, range: Range<usize>, abs_bit: usize, neg_bit: usize, reg: &ALURegRef, )280 fn set_alu_ureg(
281 &mut self,
282 range: Range<usize>,
283 abs_bit: usize,
284 neg_bit: usize,
285 reg: &ALURegRef,
286 ) {
287 self.set_ureg(range, reg.reg);
288 self.set_bit(abs_bit, reg.abs);
289 self.set_bit(neg_bit, reg.neg);
290 }
291
set_alu_cb( &mut self, range: Range<usize>, abs_bit: usize, neg_bit: usize, cb: &ALUCBufRef, )292 fn set_alu_cb(
293 &mut self,
294 range: Range<usize>,
295 abs_bit: usize,
296 neg_bit: usize,
297 cb: &ALUCBufRef,
298 ) {
299 self.set_src_cb(range, &cb.cb);
300 self.set_bit(abs_bit, cb.abs);
301 self.set_bit(neg_bit, cb.neg);
302 }
303
set_alu_reg_src( &mut self, range: Range<usize>, abs_bit: usize, neg_bit: usize, src: &ALUSrc, )304 fn set_alu_reg_src(
305 &mut self,
306 range: Range<usize>,
307 abs_bit: usize,
308 neg_bit: usize,
309 src: &ALUSrc,
310 ) {
311 match src {
312 ALUSrc::None => (),
313 ALUSrc::Reg(reg) => self.set_alu_reg(range, abs_bit, neg_bit, reg),
314 _ => panic!("Invalid ALU src0"),
315 }
316 }
317
encode_alu( &mut self, opcode: u16, dst: Option<Dst>, src0: ALUSrc, src1: ALUSrc, src2: ALUSrc, )318 fn encode_alu(
319 &mut self,
320 opcode: u16,
321 dst: Option<Dst>,
322 src0: ALUSrc,
323 src1: ALUSrc,
324 src2: ALUSrc,
325 ) {
326 if let Some(dst) = dst {
327 self.set_dst(dst);
328 }
329
330 self.set_alu_reg_src(24..32, 73, 72, &src0);
331
332 let form = match &src2 {
333 ALUSrc::None | ALUSrc::Reg(_) => {
334 self.set_alu_reg_src(64..72, 74, 75, &src2);
335 match &src1 {
336 ALUSrc::None => 1_u8, // form
337 ALUSrc::Reg(reg1) => {
338 self.set_alu_reg(32..40, 62, 63, reg1);
339 1_u8 // form
340 }
341 ALUSrc::UReg(reg1) => {
342 self.set_alu_ureg(32..40, 62, 63, reg1);
343 6_u8 // form
344 }
345 ALUSrc::Imm32(imm) => {
346 self.set_src_imm(32..64, imm);
347 4_u8 // form
348 }
349 ALUSrc::CBuf(cb) => {
350 self.set_alu_cb(38..59, 62, 63, cb);
351 5_u8 // form
352 }
353 }
354 }
355 ALUSrc::UReg(reg2) => {
356 self.set_alu_ureg(32..40, 62, 63, reg2);
357 self.set_alu_reg_src(64..72, 74, 75, &src1);
358 7_u8 // form
359 }
360 ALUSrc::Imm32(imm) => {
361 self.set_src_imm(32..64, imm);
362 self.set_alu_reg_src(64..72, 74, 75, &src1);
363 2_u8 // form
364 }
365 ALUSrc::CBuf(cb) => {
366 // TODO set_src_cx
367 self.set_alu_cb(38..59, 62, 63, cb);
368 self.set_alu_reg_src(64..72, 74, 75, &src1);
369 3_u8 // form
370 }
371 };
372
373 self.set_field(0..9, opcode);
374 self.set_field(9..12, form);
375 }
376
set_instr_deps(&mut self, deps: &InstrDeps)377 fn set_instr_deps(&mut self, deps: &InstrDeps) {
378 self.set_field(105..109, deps.delay);
379 self.set_bit(109, deps.yld);
380 self.set_field(110..113, deps.wr_bar().unwrap_or(7));
381 self.set_field(113..116, deps.rd_bar().unwrap_or(7));
382 self.set_field(116..122, deps.wt_bar_mask);
383 self.set_field(122..126, deps.reuse_mask);
384 }
385
set_rnd_mode(&mut self, range: Range<usize>, rnd_mode: FRndMode)386 fn set_rnd_mode(&mut self, range: Range<usize>, rnd_mode: FRndMode) {
387 assert!(range.len() == 2);
388 self.set_field(
389 range,
390 match rnd_mode {
391 FRndMode::NearestEven => 0_u8,
392 FRndMode::NegInf => 1_u8,
393 FRndMode::PosInf => 2_u8,
394 FRndMode::Zero => 3_u8,
395 },
396 );
397 }
398
encode_fadd(&mut self, op: &OpFAdd)399 fn encode_fadd(&mut self, op: &OpFAdd) {
400 if op.srcs[1].src_ref.as_reg().is_some() {
401 self.encode_alu(
402 0x021,
403 Some(op.dst),
404 ALUSrc::from_src(&op.srcs[0]),
405 ALUSrc::from_src(&op.srcs[1]),
406 ALUSrc::None,
407 );
408 } else {
409 self.encode_alu(
410 0x021,
411 Some(op.dst),
412 ALUSrc::from_src(&op.srcs[0]),
413 ALUSrc::from_src(&Src::new_zero()),
414 ALUSrc::from_src(&op.srcs[1]),
415 );
416 }
417 self.set_bit(77, op.saturate);
418 self.set_rnd_mode(78..80, op.rnd_mode);
419 self.set_bit(80, op.ftz);
420 }
421
encode_ffma(&mut self, op: &OpFFma)422 fn encode_ffma(&mut self, op: &OpFFma) {
423 self.encode_alu(
424 0x023,
425 Some(op.dst),
426 ALUSrc::from_src(&op.srcs[0]),
427 ALUSrc::from_src(&op.srcs[1]),
428 ALUSrc::from_src(&op.srcs[2]),
429 );
430 self.set_bit(76, op.dnz);
431 self.set_bit(77, op.saturate);
432 self.set_rnd_mode(78..80, op.rnd_mode);
433 self.set_bit(80, op.ftz);
434 }
435
encode_fmnmx(&mut self, op: &OpFMnMx)436 fn encode_fmnmx(&mut self, op: &OpFMnMx) {
437 self.encode_alu(
438 0x009,
439 Some(op.dst),
440 ALUSrc::from_src(&op.srcs[0]),
441 ALUSrc::from_src(&op.srcs[1]),
442 ALUSrc::from_src(&Src::new_zero()),
443 );
444 self.set_pred_src(87..90, 90, op.min);
445 self.set_bit(80, op.ftz);
446 }
447
encode_fmul(&mut self, op: &OpFMul)448 fn encode_fmul(&mut self, op: &OpFMul) {
449 self.encode_alu(
450 0x020,
451 Some(op.dst),
452 ALUSrc::from_src(&op.srcs[0]),
453 ALUSrc::from_src(&op.srcs[1]),
454 ALUSrc::from_src(&Src::new_zero()),
455 );
456 self.set_bit(76, op.dnz);
457 self.set_bit(77, op.saturate);
458 self.set_rnd_mode(78..80, op.rnd_mode);
459 self.set_bit(80, op.ftz);
460 self.set_field(84..87, 0x4_u8) // TODO: PDIV
461 }
462
set_float_cmp_op(&mut self, range: Range<usize>, op: FloatCmpOp)463 fn set_float_cmp_op(&mut self, range: Range<usize>, op: FloatCmpOp) {
464 assert!(range.len() == 4);
465 self.set_field(
466 range,
467 match op {
468 FloatCmpOp::OrdLt => 0x01_u8,
469 FloatCmpOp::OrdEq => 0x02_u8,
470 FloatCmpOp::OrdLe => 0x03_u8,
471 FloatCmpOp::OrdGt => 0x04_u8,
472 FloatCmpOp::OrdNe => 0x05_u8,
473 FloatCmpOp::OrdGe => 0x06_u8,
474 FloatCmpOp::UnordLt => 0x09_u8,
475 FloatCmpOp::UnordEq => 0x0a_u8,
476 FloatCmpOp::UnordLe => 0x0b_u8,
477 FloatCmpOp::UnordGt => 0x0c_u8,
478 FloatCmpOp::UnordNe => 0x0d_u8,
479 FloatCmpOp::UnordGe => 0x0e_u8,
480 FloatCmpOp::IsNum => 0x07_u8,
481 FloatCmpOp::IsNan => 0x08_u8,
482 },
483 );
484 }
485
encode_fset(&mut self, op: &OpFSet)486 fn encode_fset(&mut self, op: &OpFSet) {
487 self.encode_alu(
488 0x00a,
489 Some(op.dst),
490 ALUSrc::from_src(&op.srcs[0]),
491 ALUSrc::from_src(&op.srcs[1]),
492 ALUSrc::None,
493 );
494 self.set_float_cmp_op(76..80, op.cmp_op);
495 self.set_bit(80, op.ftz);
496 self.set_field(87..90, 0x7_u8); // TODO: src predicate
497 }
498
set_pred_set_op(&mut self, range: Range<usize>, op: PredSetOp)499 fn set_pred_set_op(&mut self, range: Range<usize>, op: PredSetOp) {
500 assert!(range.len() == 2);
501 self.set_field(
502 range,
503 match op {
504 PredSetOp::And => 0_u8,
505 PredSetOp::Or => 1_u8,
506 PredSetOp::Xor => 2_u8,
507 },
508 );
509 }
510
encode_fsetp(&mut self, op: &OpFSetP)511 fn encode_fsetp(&mut self, op: &OpFSetP) {
512 self.encode_alu(
513 0x00b,
514 None,
515 ALUSrc::from_src(&op.srcs[0]),
516 ALUSrc::from_src(&op.srcs[1]),
517 ALUSrc::None,
518 );
519
520 self.set_pred_set_op(74..76, op.set_op);
521 self.set_float_cmp_op(76..80, op.cmp_op);
522 self.set_bit(80, op.ftz);
523
524 self.set_pred_dst(81..84, op.dst);
525 self.set_pred_dst(84..87, Dst::None); // dst1
526
527 self.set_pred_src(87..90, 90, op.accum);
528 }
529
encode_fswzadd(&mut self, op: &OpFSwzAdd)530 fn encode_fswzadd(&mut self, op: &OpFSwzAdd) {
531 self.set_opcode(0x822);
532 self.set_dst(op.dst);
533
534 self.set_reg_src(24..32, op.srcs[0]);
535 self.set_reg_src(64..72, op.srcs[1]);
536
537 let mut subop = 0x0_u8;
538
539 for (i, swz_op) in op.ops.iter().enumerate() {
540 let swz_op = match swz_op {
541 FSwzAddOp::Add => 0,
542 FSwzAddOp::SubRight => 2,
543 FSwzAddOp::SubLeft => 1,
544 FSwzAddOp::MoveLeft => 3,
545 };
546
547 subop |= swz_op << ((op.ops.len() - i - 1) * 2);
548 }
549
550 self.set_field(32..40, subop);
551
552 self.set_bit(77, false); // NDV
553 self.set_rnd_mode(78..80, op.rnd_mode);
554 self.set_bit(80, op.ftz);
555 }
556
encode_mufu(&mut self, op: &OpMuFu)557 fn encode_mufu(&mut self, op: &OpMuFu) {
558 self.encode_alu(
559 0x108,
560 Some(op.dst),
561 ALUSrc::None,
562 ALUSrc::from_src(&op.src),
563 ALUSrc::None,
564 );
565 self.set_field(
566 74..80,
567 match op.op {
568 MuFuOp::Cos => 0_u8,
569 MuFuOp::Sin => 1_u8,
570 MuFuOp::Exp2 => 2_u8,
571 MuFuOp::Log2 => 3_u8,
572 MuFuOp::Rcp => 4_u8,
573 MuFuOp::Rsq => 5_u8,
574 MuFuOp::Rcp64H => 6_u8,
575 MuFuOp::Rsq64H => 7_u8,
576 MuFuOp::Sqrt => 8_u8,
577 MuFuOp::Tanh => 9_u8,
578 },
579 );
580 }
581
encode_dadd(&mut self, op: &OpDAdd)582 fn encode_dadd(&mut self, op: &OpDAdd) {
583 self.encode_alu(
584 0x029,
585 Some(op.dst),
586 ALUSrc::from_src(&op.srcs[0]),
587 ALUSrc::None,
588 ALUSrc::from_src(&op.srcs[1]),
589 );
590 self.set_rnd_mode(78..80, op.rnd_mode);
591 }
592
encode_dfma(&mut self, op: &OpDFma)593 fn encode_dfma(&mut self, op: &OpDFma) {
594 self.encode_alu(
595 0x02b,
596 Some(op.dst),
597 ALUSrc::from_src(&op.srcs[0]),
598 ALUSrc::from_src(&op.srcs[1]),
599 ALUSrc::from_src(&op.srcs[2]),
600 );
601 self.set_rnd_mode(78..80, op.rnd_mode);
602 }
603
encode_dmul(&mut self, op: &OpDMul)604 fn encode_dmul(&mut self, op: &OpDMul) {
605 self.encode_alu(
606 0x028,
607 Some(op.dst),
608 ALUSrc::from_src(&op.srcs[0]),
609 ALUSrc::from_src(&op.srcs[1]),
610 ALUSrc::None,
611 );
612 self.set_rnd_mode(78..80, op.rnd_mode);
613 }
614
encode_dsetp(&mut self, op: &OpDSetP)615 fn encode_dsetp(&mut self, op: &OpDSetP) {
616 match op.srcs[1].src_ref {
617 SrcRef::Reg(_) | SrcRef::Zero => {
618 self.encode_alu(
619 0x02a,
620 None,
621 ALUSrc::from_src(&op.srcs[0]),
622 ALUSrc::from_src(&op.srcs[1]),
623 ALUSrc::None,
624 );
625 }
626 _ => {
627 self.encode_alu(
628 0x02a,
629 None,
630 ALUSrc::from_src(&op.srcs[0]),
631 ALUSrc::None,
632 ALUSrc::from_src(&op.srcs[1]),
633 );
634 }
635 }
636
637 self.set_pred_set_op(74..76, op.set_op);
638 self.set_float_cmp_op(76..80, op.cmp_op);
639
640 self.set_pred_dst(81..84, op.dst);
641 self.set_pred_dst(84..87, Dst::None); /* dst1 */
642
643 self.set_pred_src(87..90, 90, op.accum);
644 }
645
encode_bmsk(&mut self, op: &OpBMsk)646 fn encode_bmsk(&mut self, op: &OpBMsk) {
647 self.encode_alu(
648 0x01b,
649 Some(op.dst),
650 ALUSrc::from_src(&op.pos),
651 ALUSrc::from_src(&op.width),
652 ALUSrc::None,
653 );
654
655 self.set_bit(75, op.wrap);
656 }
657
encode_brev(&mut self, op: &OpBRev)658 fn encode_brev(&mut self, op: &OpBRev) {
659 self.encode_alu(
660 0x101,
661 Some(op.dst),
662 ALUSrc::None,
663 ALUSrc::from_src(&op.src),
664 ALUSrc::None,
665 );
666 }
667
encode_flo(&mut self, op: &OpFlo)668 fn encode_flo(&mut self, op: &OpFlo) {
669 self.encode_alu(
670 0x100,
671 Some(op.dst),
672 ALUSrc::None,
673 ALUSrc::from_src(&op.src),
674 ALUSrc::None,
675 );
676 self.set_pred_dst(81..84, Dst::None);
677 self.set_field(74..75, op.return_shift_amount as u8);
678 self.set_field(73..74, op.signed as u8);
679 let not_mod = matches!(op.src.src_mod, SrcMod::BNot);
680 self.set_field(63..64, not_mod)
681 }
682
encode_iabs(&mut self, op: &OpIAbs)683 fn encode_iabs(&mut self, op: &OpIAbs) {
684 self.encode_alu(
685 0x013,
686 Some(op.dst),
687 ALUSrc::None,
688 ALUSrc::from_src(&op.src),
689 ALUSrc::None,
690 );
691 }
692
encode_iadd3(&mut self, op: &OpIAdd3)693 fn encode_iadd3(&mut self, op: &OpIAdd3) {
694 // Hardware requires at least one of these be unmodified
695 assert!(op.srcs[0].src_mod.is_none() || op.srcs[1].src_mod.is_none());
696
697 self.encode_alu(
698 0x010,
699 Some(op.dst),
700 ALUSrc::from_src(&op.srcs[0]),
701 ALUSrc::from_src(&op.srcs[1]),
702 ALUSrc::from_src(&op.srcs[2]),
703 );
704
705 self.set_pred_dst(81..84, op.overflow[0]);
706 self.set_pred_dst(84..87, op.overflow[1]);
707 }
708
encode_iadd3x(&mut self, op: &OpIAdd3X)709 fn encode_iadd3x(&mut self, op: &OpIAdd3X) {
710 // Hardware requires at least one of these be unmodified
711 assert!(op.srcs[0].src_mod.is_none() || op.srcs[1].src_mod.is_none());
712
713 self.encode_alu(
714 0x010,
715 Some(op.dst),
716 ALUSrc::from_src(&op.srcs[0]),
717 ALUSrc::from_src(&op.srcs[1]),
718 ALUSrc::from_src(&op.srcs[2]),
719 );
720
721 self.set_bit(74, true); // .X
722
723 self.set_pred_dst(81..84, op.overflow[0]);
724 self.set_pred_dst(84..87, op.overflow[1]);
725
726 self.set_pred_src(87..90, 90, op.carry[0]);
727 self.set_pred_src(77..80, 80, op.carry[1]);
728 }
729
encode_idp4(&mut self, op: &OpIDp4)730 fn encode_idp4(&mut self, op: &OpIDp4) {
731 self.encode_alu(
732 0x026,
733 Some(op.dst),
734 ALUSrc::from_src(&op.srcs[0]),
735 ALUSrc::from_src(&op.srcs[1]),
736 ALUSrc::from_src(&op.srcs[2]),
737 );
738
739 self.set_bit(
740 73,
741 match op.src_types[0] {
742 IntType::U8 => false,
743 IntType::I8 => true,
744 _ => panic!("Invalid DP4 source type"),
745 },
746 );
747 self.set_bit(
748 74,
749 match op.src_types[1] {
750 IntType::U8 => false,
751 IntType::I8 => true,
752 _ => panic!("Invalid DP4 source type"),
753 },
754 );
755 }
756
encode_imad(&mut self, op: &OpIMad)757 fn encode_imad(&mut self, op: &OpIMad) {
758 self.encode_alu(
759 0x024,
760 Some(op.dst),
761 ALUSrc::from_src(&op.srcs[0]),
762 ALUSrc::from_src(&op.srcs[1]),
763 ALUSrc::from_src(&op.srcs[2]),
764 );
765 self.set_pred_dst(81..84, Dst::None);
766 self.set_bit(73, op.signed);
767 }
768
encode_imad64(&mut self, op: &OpIMad64)769 fn encode_imad64(&mut self, op: &OpIMad64) {
770 self.encode_alu(
771 0x025,
772 Some(op.dst),
773 ALUSrc::from_src(&op.srcs[0]),
774 ALUSrc::from_src(&op.srcs[1]),
775 ALUSrc::from_src(&op.srcs[2]),
776 );
777 self.set_pred_dst(81..84, Dst::None);
778 self.set_bit(73, op.signed);
779 }
780
encode_imnmx(&mut self, op: &OpIMnMx)781 fn encode_imnmx(&mut self, op: &OpIMnMx) {
782 self.encode_alu(
783 0x017,
784 Some(op.dst),
785 ALUSrc::from_src(&op.srcs[0]),
786 ALUSrc::from_src(&op.srcs[1]),
787 ALUSrc::None,
788 );
789 self.set_pred_src(87..90, 90, op.min);
790 self.set_bit(
791 73,
792 match op.cmp_type {
793 IntCmpType::U32 => false,
794 IntCmpType::I32 => true,
795 },
796 );
797 }
798
set_int_cmp_op(&mut self, range: Range<usize>, op: IntCmpOp)799 fn set_int_cmp_op(&mut self, range: Range<usize>, op: IntCmpOp) {
800 assert!(range.len() == 3);
801 self.set_field(
802 range,
803 match op {
804 IntCmpOp::Eq => 2_u8,
805 IntCmpOp::Ne => 5_u8,
806 IntCmpOp::Lt => 1_u8,
807 IntCmpOp::Le => 3_u8,
808 IntCmpOp::Gt => 4_u8,
809 IntCmpOp::Ge => 6_u8,
810 },
811 );
812 }
813
encode_isetp(&mut self, op: &OpISetP)814 fn encode_isetp(&mut self, op: &OpISetP) {
815 self.encode_alu(
816 0x00c,
817 None,
818 ALUSrc::from_src(&op.srcs[0]),
819 ALUSrc::from_src(&op.srcs[1]),
820 ALUSrc::None,
821 );
822
823 self.set_pred_src(68..71, 71, op.low_cmp);
824 self.set_bit(72, op.ex);
825
826 self.set_field(
827 73..74,
828 match op.cmp_type {
829 IntCmpType::U32 => 0_u32,
830 IntCmpType::I32 => 1_u32,
831 },
832 );
833 self.set_pred_set_op(74..76, op.set_op);
834 self.set_int_cmp_op(76..79, op.cmp_op);
835
836 self.set_pred_dst(81..84, op.dst);
837 self.set_pred_dst(84..87, Dst::None); // dst1
838
839 self.set_pred_src(87..90, 90, op.accum);
840 }
841
encode_lop3(&mut self, op: &OpLop3)842 fn encode_lop3(&mut self, op: &OpLop3) {
843 self.encode_alu(
844 0x012,
845 Some(op.dst),
846 ALUSrc::from_src(&op.srcs[0]),
847 ALUSrc::from_src(&op.srcs[1]),
848 ALUSrc::from_src(&op.srcs[2]),
849 );
850
851 self.set_field(72..80, op.op.lut);
852 self.set_bit(80, false); // .PAND
853 self.set_field(81..84, 7_u32); // pred
854 self.set_pred_src(87..90, 90, SrcRef::False.into());
855 }
856
encode_popc(&mut self, op: &OpPopC)857 fn encode_popc(&mut self, op: &OpPopC) {
858 self.encode_alu(
859 0x109,
860 Some(op.dst),
861 ALUSrc::None,
862 ALUSrc::from_src(&op.src),
863 ALUSrc::None,
864 );
865
866 let not_mod = matches!(op.src.src_mod, SrcMod::BNot);
867 self.set_field(63..64, not_mod)
868 }
869
encode_shf(&mut self, op: &OpShf)870 fn encode_shf(&mut self, op: &OpShf) {
871 self.encode_alu(
872 0x019,
873 Some(op.dst),
874 ALUSrc::from_src(&op.low),
875 ALUSrc::from_src(&op.shift),
876 ALUSrc::from_src(&op.high),
877 );
878
879 self.set_field(
880 73..75,
881 match op.data_type {
882 IntType::I64 => 0_u8,
883 IntType::U64 => 1_u8,
884 IntType::I32 => 2_u8,
885 IntType::U32 => 3_u8,
886 _ => panic!("Invalid shift data type"),
887 },
888 );
889 self.set_bit(75, op.wrap);
890 self.set_bit(76, op.right);
891 self.set_bit(80, op.dst_high);
892 }
893
encode_f2f(&mut self, op: &OpF2F)894 fn encode_f2f(&mut self, op: &OpF2F) {
895 assert!(!op.integer_rnd);
896 if op.src_type.bits() <= 32 && op.dst_type.bits() <= 32 {
897 self.encode_alu(
898 0x104,
899 Some(op.dst),
900 ALUSrc::None,
901 ALUSrc::from_src(&op.src),
902 ALUSrc::None,
903 );
904 } else {
905 self.encode_alu(
906 0x110,
907 Some(op.dst),
908 ALUSrc::None,
909 ALUSrc::from_src(&op.src),
910 ALUSrc::None,
911 );
912 }
913
914 if op.high {
915 self.set_field(60..62, 1_u8); // .H1
916 }
917
918 self.set_field(75..77, (op.dst_type.bits() / 8).ilog2());
919 self.set_rnd_mode(78..80, op.rnd_mode);
920 self.set_bit(80, op.ftz);
921 self.set_field(84..86, (op.src_type.bits() / 8).ilog2());
922 }
923
encode_f2i(&mut self, op: &OpF2I)924 fn encode_f2i(&mut self, op: &OpF2I) {
925 if op.src_type.bits() <= 32 && op.dst_type.bits() <= 32 {
926 self.encode_alu(
927 0x105,
928 Some(op.dst),
929 ALUSrc::None,
930 ALUSrc::from_src(&op.src),
931 ALUSrc::None,
932 );
933 } else {
934 self.encode_alu(
935 0x111,
936 Some(op.dst),
937 ALUSrc::None,
938 ALUSrc::from_src(&op.src),
939 ALUSrc::None,
940 );
941 }
942
943 self.set_bit(72, op.dst_type.is_signed());
944 self.set_field(75..77, (op.dst_type.bits() / 8).ilog2());
945 self.set_bit(77, false); // NTZ
946 self.set_rnd_mode(78..80, op.rnd_mode);
947 self.set_bit(80, op.ftz);
948 self.set_field(84..86, (op.src_type.bits() / 8).ilog2());
949 }
950
encode_i2f(&mut self, op: &OpI2F)951 fn encode_i2f(&mut self, op: &OpI2F) {
952 if op.src_type.bits() <= 32 && op.dst_type.bits() <= 32 {
953 self.encode_alu(
954 0x106,
955 Some(op.dst),
956 ALUSrc::None,
957 ALUSrc::from_src(&op.src),
958 ALUSrc::None,
959 );
960 } else {
961 self.encode_alu(
962 0x112,
963 Some(op.dst),
964 ALUSrc::None,
965 ALUSrc::from_src(&op.src),
966 ALUSrc::None,
967 );
968 }
969
970 self.set_field(60..62, 0_u8); // TODO: subop
971 self.set_bit(74, op.src_type.is_signed());
972 self.set_field(75..77, (op.dst_type.bits() / 8).ilog2());
973 self.set_rnd_mode(78..80, op.rnd_mode);
974 self.set_field(84..86, (op.src_type.bits() / 8).ilog2());
975 }
976
encode_frnd(&mut self, op: &OpFRnd)977 fn encode_frnd(&mut self, op: &OpFRnd) {
978 if op.src_type.bits() <= 32 && op.dst_type.bits() <= 32 {
979 self.encode_alu(
980 0x107,
981 Some(op.dst),
982 ALUSrc::None,
983 ALUSrc::from_src(&op.src),
984 ALUSrc::None,
985 );
986 } else {
987 self.encode_alu(
988 0x113,
989 Some(op.dst),
990 ALUSrc::None,
991 ALUSrc::from_src(&op.src),
992 ALUSrc::None,
993 );
994 }
995
996 self.set_field(84..86, (op.src_type.bits() / 8).ilog2());
997 self.set_bit(80, op.ftz);
998 self.set_rnd_mode(78..80, op.rnd_mode);
999 self.set_field(75..77, (op.dst_type.bits() / 8).ilog2());
1000 }
1001
encode_mov(&mut self, op: &OpMov)1002 fn encode_mov(&mut self, op: &OpMov) {
1003 self.encode_alu(
1004 0x002,
1005 Some(op.dst),
1006 ALUSrc::None,
1007 ALUSrc::from_src(&op.src),
1008 ALUSrc::None,
1009 );
1010 self.set_field(72..76, op.quad_lanes);
1011 }
1012
encode_prmt(&mut self, op: &OpPrmt)1013 fn encode_prmt(&mut self, op: &OpPrmt) {
1014 self.encode_alu(
1015 0x16,
1016 Some(op.dst),
1017 ALUSrc::from_src(&op.srcs[0]),
1018 ALUSrc::from_src(&op.sel),
1019 ALUSrc::from_src(&op.srcs[1]),
1020 );
1021
1022 self.set_field(
1023 72..75,
1024 match op.mode {
1025 PrmtMode::Index => 0_u8,
1026 PrmtMode::Forward4Extract => 1_u8,
1027 PrmtMode::Backward4Extract => 2_u8,
1028 PrmtMode::Replicate8 => 3_u8,
1029 PrmtMode::EdgeClampLeft => 4_u8,
1030 PrmtMode::EdgeClampRight => 5_u8,
1031 PrmtMode::Replicate16 => 6_u8,
1032 },
1033 )
1034 }
1035
encode_sel(&mut self, op: &OpSel)1036 fn encode_sel(&mut self, op: &OpSel) {
1037 self.encode_alu(
1038 0x007,
1039 Some(op.dst),
1040 ALUSrc::from_src(&op.srcs[0]),
1041 ALUSrc::from_src(&op.srcs[1]),
1042 ALUSrc::None,
1043 );
1044
1045 self.set_pred_src(87..90, 90, op.cond);
1046 }
1047
encode_shfl(&mut self, op: &OpShfl)1048 fn encode_shfl(&mut self, op: &OpShfl) {
1049 assert!(op.lane.src_mod.is_none());
1050 assert!(op.c.src_mod.is_none());
1051
1052 match &op.lane.src_ref {
1053 SrcRef::Zero | SrcRef::Reg(_) => match &op.c.src_ref {
1054 SrcRef::Zero | SrcRef::Reg(_) => {
1055 self.set_opcode(0x389);
1056 self.set_reg_src(32..40, op.lane);
1057 self.set_reg_src(64..72, op.c);
1058 }
1059 SrcRef::Imm32(imm_c) => {
1060 self.set_opcode(0x589);
1061 self.set_reg_src(32..40, op.lane);
1062 self.set_field(40..53, *imm_c & 0x1f1f);
1063 }
1064 _ => panic!("Invalid instruction form"),
1065 },
1066 SrcRef::Imm32(imm_lane) => match &op.c.src_ref {
1067 SrcRef::Zero | SrcRef::Reg(_) => {
1068 self.set_opcode(0x989);
1069 self.set_field(53..58, *imm_lane & 0x1f);
1070 self.set_reg_src(64..72, op.c);
1071 }
1072 SrcRef::Imm32(imm_c) => {
1073 self.set_opcode(0xf89);
1074 self.set_field(40..53, *imm_c & 0x1f1f);
1075 self.set_field(53..58, *imm_lane & 0x1f);
1076 }
1077 _ => panic!("Invalid instruction form"),
1078 },
1079 _ => panic!("Invalid instruction form"),
1080 };
1081
1082 self.set_dst(op.dst);
1083 self.set_pred_dst(81..84, op.in_bounds);
1084 self.set_reg_src(24..32, op.src);
1085 self.set_field(
1086 58..60,
1087 match op.op {
1088 ShflOp::Idx => 0_u8,
1089 ShflOp::Up => 1_u8,
1090 ShflOp::Down => 2_u8,
1091 ShflOp::Bfly => 3_u8,
1092 },
1093 );
1094 }
1095
encode_plop3(&mut self, op: &OpPLop3)1096 fn encode_plop3(&mut self, op: &OpPLop3) {
1097 self.set_opcode(0x81c);
1098 self.set_field(16..24, op.ops[1].lut);
1099 self.set_field(64..67, op.ops[0].lut & 0x7);
1100 self.set_field(72..77, op.ops[0].lut >> 3);
1101
1102 self.set_pred_src(68..71, 71, op.srcs[2]);
1103
1104 self.set_pred_src(77..80, 80, op.srcs[1]);
1105 self.set_pred_dst(81..84, op.dsts[0]);
1106 self.set_pred_dst(84..87, op.dsts[1]);
1107
1108 self.set_pred_src(87..90, 90, op.srcs[0]);
1109 }
1110
set_tex_dim(&mut self, range: Range<usize>, dim: TexDim)1111 fn set_tex_dim(&mut self, range: Range<usize>, dim: TexDim) {
1112 assert!(range.len() == 3);
1113 self.set_field(
1114 range,
1115 match dim {
1116 TexDim::_1D => 0_u8,
1117 TexDim::Array1D => 4_u8,
1118 TexDim::_2D => 1_u8,
1119 TexDim::Array2D => 5_u8,
1120 TexDim::_3D => 2_u8,
1121 TexDim::Cube => 3_u8,
1122 TexDim::ArrayCube => 7_u8,
1123 },
1124 );
1125 }
1126
set_tex_lod_mode(&mut self, range: Range<usize>, lod_mode: TexLodMode)1127 fn set_tex_lod_mode(&mut self, range: Range<usize>, lod_mode: TexLodMode) {
1128 assert!(range.len() == 3);
1129 self.set_field(
1130 range,
1131 match lod_mode {
1132 TexLodMode::Auto => 0_u8,
1133 TexLodMode::Zero => 1_u8,
1134 TexLodMode::Bias => 2_u8,
1135 TexLodMode::Lod => 3_u8,
1136 TexLodMode::Clamp => 4_u8,
1137 TexLodMode::BiasClamp => 5_u8,
1138 },
1139 );
1140 }
1141
encode_tex(&mut self, op: &OpTex)1142 fn encode_tex(&mut self, op: &OpTex) {
1143 self.set_opcode(0x361);
1144 self.set_bit(59, true); // .B
1145
1146 self.set_dst(op.dsts[0]);
1147 if let Dst::Reg(reg) = op.dsts[1] {
1148 self.set_reg(64..72, reg);
1149 } else {
1150 self.set_field(64..72, 255_u8);
1151 }
1152 self.set_pred_dst(81..84, op.resident);
1153
1154 self.set_reg_src(24..32, op.srcs[0]);
1155 self.set_reg_src(32..40, op.srcs[1]);
1156
1157 self.set_tex_dim(61..64, op.dim);
1158 self.set_field(72..76, op.mask);
1159 self.set_bit(76, op.offset);
1160 self.set_bit(77, false); // ToDo: NDV
1161 self.set_bit(78, op.z_cmpr);
1162 self.set_field(84..87, 1);
1163 self.set_tex_lod_mode(87..90, op.lod_mode);
1164 self.set_bit(90, false); // TODO: .NODEP
1165 }
1166
encode_tld(&mut self, op: &OpTld)1167 fn encode_tld(&mut self, op: &OpTld) {
1168 self.set_opcode(0x367);
1169 self.set_bit(59, true); // .B
1170
1171 self.set_dst(op.dsts[0]);
1172 if let Dst::Reg(reg) = op.dsts[1] {
1173 self.set_reg(64..72, reg);
1174 } else {
1175 self.set_field(64..72, 255_u8);
1176 }
1177 self.set_pred_dst(81..84, op.resident);
1178
1179 self.set_reg_src(24..32, op.srcs[0]);
1180 self.set_reg_src(32..40, op.srcs[1]);
1181
1182 self.set_tex_dim(61..64, op.dim);
1183 self.set_field(72..76, op.mask);
1184 self.set_bit(76, op.offset);
1185 // bit 77: .CL
1186 self.set_bit(78, op.is_ms);
1187 // bits 79..81: .F16
1188 assert!(
1189 op.lod_mode == TexLodMode::Zero || op.lod_mode == TexLodMode::Lod
1190 );
1191 self.set_tex_lod_mode(87..90, op.lod_mode);
1192 self.set_bit(90, false); // TODO: .NODEP
1193 }
1194
encode_tld4(&mut self, op: &OpTld4)1195 fn encode_tld4(&mut self, op: &OpTld4) {
1196 self.set_opcode(0x364);
1197 self.set_bit(59, true); // .B
1198
1199 self.set_dst(op.dsts[0]);
1200 if let Dst::Reg(reg) = op.dsts[1] {
1201 self.set_reg(64..72, reg);
1202 } else {
1203 self.set_field(64..72, 255_u8);
1204 }
1205 self.set_pred_dst(81..84, op.resident);
1206
1207 self.set_reg_src(24..32, op.srcs[0]);
1208 self.set_reg_src(32..40, op.srcs[1]);
1209
1210 self.set_tex_dim(61..64, op.dim);
1211 self.set_field(72..76, op.mask);
1212 self.set_field(
1213 76..78,
1214 match op.offset_mode {
1215 Tld4OffsetMode::None => 0_u8,
1216 Tld4OffsetMode::AddOffI => 1_u8,
1217 Tld4OffsetMode::PerPx => 2_u8,
1218 },
1219 );
1220 // bit 77: .CL
1221 self.set_bit(78, op.z_cmpr);
1222 self.set_bit(84, true); // !.EF
1223 self.set_field(87..89, op.comp);
1224 self.set_bit(90, false); // TODO: .NODEP
1225 }
1226
encode_tmml(&mut self, op: &OpTmml)1227 fn encode_tmml(&mut self, op: &OpTmml) {
1228 self.set_opcode(0x36a);
1229 self.set_bit(59, true); // .B
1230
1231 self.set_dst(op.dsts[0]);
1232 if let Dst::Reg(reg) = op.dsts[1] {
1233 self.set_reg(64..72, reg);
1234 } else {
1235 self.set_field(64..72, 255_u8);
1236 }
1237
1238 self.set_reg_src(24..32, op.srcs[0]);
1239 self.set_reg_src(32..40, op.srcs[1]);
1240
1241 self.set_tex_dim(61..64, op.dim);
1242 self.set_field(72..76, op.mask);
1243 self.set_bit(77, false); // ToDo: NDV
1244 self.set_bit(90, false); // TODO: .NODEP
1245 }
1246
encode_txd(&mut self, op: &OpTxd)1247 fn encode_txd(&mut self, op: &OpTxd) {
1248 self.set_opcode(0x36d);
1249 self.set_bit(59, true); // .B
1250
1251 self.set_dst(op.dsts[0]);
1252 if let Dst::Reg(reg) = op.dsts[1] {
1253 self.set_reg(64..72, reg);
1254 } else {
1255 self.set_field(64..72, 255_u8);
1256 }
1257 self.set_pred_dst(81..84, op.resident);
1258
1259 self.set_reg_src(24..32, op.srcs[0]);
1260 self.set_reg_src(32..40, op.srcs[1]);
1261
1262 self.set_tex_dim(61..64, op.dim);
1263 self.set_field(72..76, op.mask);
1264 self.set_bit(76, op.offset);
1265 self.set_bit(77, false); // ToDo: NDV
1266 self.set_bit(90, false); // TODO: .NODEP
1267 }
1268
encode_txq(&mut self, op: &OpTxq)1269 fn encode_txq(&mut self, op: &OpTxq) {
1270 self.set_opcode(0x370);
1271 self.set_bit(59, true); // .B
1272
1273 self.set_dst(op.dsts[0]);
1274 if let Dst::Reg(reg) = op.dsts[1] {
1275 self.set_reg(64..72, reg);
1276 } else {
1277 self.set_field(64..72, 255_u8);
1278 }
1279
1280 self.set_reg_src(24..32, op.src);
1281 self.set_field(
1282 62..64,
1283 match op.query {
1284 TexQuery::Dimension => 0_u8,
1285 TexQuery::TextureType => 1_u8,
1286 TexQuery::SamplerPos => 2_u8,
1287 },
1288 );
1289 self.set_field(72..76, op.mask);
1290 }
1291
set_image_dim(&mut self, range: Range<usize>, dim: ImageDim)1292 fn set_image_dim(&mut self, range: Range<usize>, dim: ImageDim) {
1293 assert!(range.len() == 3);
1294 self.set_field(
1295 range,
1296 match dim {
1297 ImageDim::_1D => 0_u8,
1298 ImageDim::_1DBuffer => 1_u8,
1299 ImageDim::_1DArray => 2_u8,
1300 ImageDim::_2D => 3_u8,
1301 ImageDim::_2DArray => 4_u8,
1302 ImageDim::_3D => 5_u8,
1303 },
1304 );
1305 }
1306
set_mem_order(&mut self, order: &MemOrder)1307 fn set_mem_order(&mut self, order: &MemOrder) {
1308 if self.sm < 80 {
1309 let scope = match order {
1310 MemOrder::Constant => MemScope::System,
1311 MemOrder::Weak => MemScope::CTA,
1312 MemOrder::Strong(s) => *s,
1313 };
1314 self.set_field(
1315 77..79,
1316 match scope {
1317 MemScope::CTA => 0_u8,
1318 // SM => 1_u8,
1319 MemScope::GPU => 2_u8,
1320 MemScope::System => 3_u8,
1321 },
1322 );
1323 self.set_field(
1324 79..81,
1325 match order {
1326 MemOrder::Constant => 0_u8,
1327 MemOrder::Weak => 1_u8,
1328 MemOrder::Strong(_) => 2_u8,
1329 // MMIO => 3_u8,
1330 },
1331 );
1332 } else {
1333 self.set_field(
1334 77..81,
1335 match order {
1336 MemOrder::Constant => 0x4_u8,
1337 MemOrder::Weak => 0x0_u8,
1338 MemOrder::Strong(MemScope::CTA) => 0x5_u8,
1339 MemOrder::Strong(MemScope::GPU) => 0x7_u8,
1340 MemOrder::Strong(MemScope::System) => 0xa_u8,
1341 },
1342 );
1343 }
1344 }
1345
set_eviction_priority(&mut self, pri: &MemEvictionPriority)1346 fn set_eviction_priority(&mut self, pri: &MemEvictionPriority) {
1347 self.set_field(
1348 84..86,
1349 match pri {
1350 MemEvictionPriority::First => 0_u8,
1351 MemEvictionPriority::Normal => 1_u8,
1352 MemEvictionPriority::Last => 2_u8,
1353 MemEvictionPriority::Unchanged => 3_u8,
1354 },
1355 );
1356 }
1357
encode_suld(&mut self, op: &OpSuLd)1358 fn encode_suld(&mut self, op: &OpSuLd) {
1359 self.set_opcode(0x998);
1360
1361 self.set_dst(op.dst);
1362 self.set_reg_src(24..32, op.coord);
1363 self.set_reg_src(64..72, op.handle);
1364 self.set_pred_dst(81..84, op.resident);
1365
1366 self.set_image_dim(61..64, op.image_dim);
1367 self.set_mem_order(&op.mem_order);
1368 self.set_eviction_priority(&op.mem_eviction_priority);
1369
1370 assert!(op.mask == 0x1 || op.mask == 0x3 || op.mask == 0xf);
1371 self.set_field(72..76, op.mask);
1372 }
1373
encode_sust(&mut self, op: &OpSuSt)1374 fn encode_sust(&mut self, op: &OpSuSt) {
1375 self.set_opcode(0x99c);
1376
1377 self.set_reg_src(24..32, op.coord);
1378 self.set_reg_src(32..40, op.data);
1379 self.set_reg_src(64..72, op.handle);
1380
1381 self.set_image_dim(61..64, op.image_dim);
1382 self.set_mem_order(&op.mem_order);
1383 self.set_eviction_priority(&op.mem_eviction_priority);
1384
1385 assert!(op.mask == 0x1 || op.mask == 0x3 || op.mask == 0xf);
1386 self.set_field(72..76, op.mask);
1387 }
1388
encode_suatom(&mut self, op: &OpSuAtom)1389 fn encode_suatom(&mut self, op: &OpSuAtom) {
1390 if matches!(op.atom_op, AtomOp::CmpExch) {
1391 self.set_opcode(0x396);
1392 } else {
1393 self.set_opcode(0x394);
1394 }
1395
1396 self.set_dst(op.dst);
1397 self.set_reg_src(24..32, op.coord);
1398 self.set_reg_src(32..40, op.data);
1399 self.set_reg_src(64..72, op.handle);
1400 self.set_pred_dst(81..84, op.resident);
1401
1402 self.set_image_dim(61..64, op.image_dim);
1403 self.set_mem_order(&op.mem_order);
1404 self.set_eviction_priority(&op.mem_eviction_priority);
1405
1406 self.set_bit(72, false); // .BA
1407 self.set_atom_type(73..76, op.atom_type);
1408 self.set_atom_op(87..91, op.atom_op);
1409 }
1410
set_mem_type(&mut self, range: Range<usize>, mem_type: MemType)1411 fn set_mem_type(&mut self, range: Range<usize>, mem_type: MemType) {
1412 assert!(range.len() == 3);
1413 self.set_field(
1414 range,
1415 match mem_type {
1416 MemType::U8 => 0_u8,
1417 MemType::I8 => 1_u8,
1418 MemType::U16 => 2_u8,
1419 MemType::I16 => 3_u8,
1420 MemType::B32 => 4_u8,
1421 MemType::B64 => 5_u8,
1422 MemType::B128 => 6_u8,
1423 },
1424 );
1425 }
1426
set_mem_access(&mut self, access: &MemAccess)1427 fn set_mem_access(&mut self, access: &MemAccess) {
1428 self.set_field(
1429 72..73,
1430 match access.space.addr_type() {
1431 MemAddrType::A32 => 0_u8,
1432 MemAddrType::A64 => 1_u8,
1433 },
1434 );
1435 self.set_mem_type(73..76, access.mem_type);
1436 self.set_mem_order(&access.order);
1437 self.set_eviction_priority(&access.eviction_priority);
1438 }
1439
encode_ldg(&mut self, op: &OpLd)1440 fn encode_ldg(&mut self, op: &OpLd) {
1441 self.set_opcode(0x980);
1442
1443 self.set_dst(op.dst);
1444 self.set_reg_src(24..32, op.addr);
1445 self.set_field(32..64, op.offset);
1446
1447 self.set_mem_access(&op.access);
1448 }
1449
encode_ldl(&mut self, op: &OpLd)1450 fn encode_ldl(&mut self, op: &OpLd) {
1451 self.set_opcode(0x983);
1452 self.set_field(84..87, 1_u8);
1453
1454 self.set_dst(op.dst);
1455 self.set_reg_src(24..32, op.addr);
1456 self.set_field(40..64, op.offset);
1457
1458 self.set_mem_type(73..76, op.access.mem_type);
1459 assert!(op.access.order == MemOrder::Strong(MemScope::CTA));
1460 assert!(op.access.eviction_priority == MemEvictionPriority::Normal);
1461 }
1462
encode_lds(&mut self, op: &OpLd)1463 fn encode_lds(&mut self, op: &OpLd) {
1464 self.set_opcode(0x984);
1465
1466 self.set_dst(op.dst);
1467 self.set_reg_src(24..32, op.addr);
1468 self.set_field(40..64, op.offset);
1469
1470 self.set_mem_type(73..76, op.access.mem_type);
1471 assert!(op.access.order == MemOrder::Strong(MemScope::CTA));
1472 assert!(op.access.eviction_priority == MemEvictionPriority::Normal);
1473
1474 self.set_bit(87, false); // !.ZD - Returns a predicate?
1475 }
1476
encode_ld(&mut self, op: &OpLd)1477 fn encode_ld(&mut self, op: &OpLd) {
1478 match op.access.space {
1479 MemSpace::Global(_) => self.encode_ldg(op),
1480 MemSpace::Local => self.encode_ldl(op),
1481 MemSpace::Shared => self.encode_lds(op),
1482 }
1483 }
1484
encode_ldc(&mut self, op: &OpLdc)1485 fn encode_ldc(&mut self, op: &OpLdc) {
1486 self.encode_alu(
1487 0x182,
1488 Some(op.dst),
1489 ALUSrc::from_src(&op.offset),
1490 ALUSrc::from_src(&op.cb),
1491 ALUSrc::None,
1492 );
1493
1494 self.set_mem_type(73..76, op.mem_type);
1495 self.set_field(78..80, 0_u8); // subop
1496 }
1497
encode_stg(&mut self, op: &OpSt)1498 fn encode_stg(&mut self, op: &OpSt) {
1499 self.set_opcode(0x385);
1500
1501 self.set_reg_src(24..32, op.addr);
1502 self.set_field(32..64, op.offset);
1503 self.set_reg_src(64..72, op.data);
1504
1505 self.set_mem_access(&op.access);
1506 }
1507
encode_stl(&mut self, op: &OpSt)1508 fn encode_stl(&mut self, op: &OpSt) {
1509 self.set_opcode(0x387);
1510 self.set_field(84..87, 1_u8);
1511
1512 self.set_reg_src(24..32, op.addr);
1513 self.set_reg_src(32..40, op.data);
1514 self.set_field(40..64, op.offset);
1515
1516 self.set_mem_type(73..76, op.access.mem_type);
1517 assert!(op.access.order == MemOrder::Strong(MemScope::CTA));
1518 assert!(op.access.eviction_priority == MemEvictionPriority::Normal);
1519 }
1520
encode_sts(&mut self, op: &OpSt)1521 fn encode_sts(&mut self, op: &OpSt) {
1522 self.set_opcode(0x388);
1523
1524 self.set_reg_src(24..32, op.addr);
1525 self.set_reg_src(32..40, op.data);
1526 self.set_field(40..64, op.offset);
1527
1528 self.set_mem_type(73..76, op.access.mem_type);
1529 assert!(op.access.order == MemOrder::Strong(MemScope::CTA));
1530 assert!(op.access.eviction_priority == MemEvictionPriority::Normal);
1531 }
1532
encode_st(&mut self, op: &OpSt)1533 fn encode_st(&mut self, op: &OpSt) {
1534 match op.access.space {
1535 MemSpace::Global(_) => self.encode_stg(op),
1536 MemSpace::Local => self.encode_stl(op),
1537 MemSpace::Shared => self.encode_sts(op),
1538 }
1539 }
1540
set_atom_op(&mut self, range: Range<usize>, atom_op: AtomOp)1541 fn set_atom_op(&mut self, range: Range<usize>, atom_op: AtomOp) {
1542 assert!(range.len() == 4);
1543 self.set_field(
1544 range,
1545 match atom_op {
1546 AtomOp::Add | AtomOp::CmpExch => 0_u8,
1547 AtomOp::Min => 1_u8,
1548 AtomOp::Max => 2_u8,
1549 AtomOp::Inc => 3_u8,
1550 AtomOp::Dec => 4_u8,
1551 AtomOp::And => 5_u8,
1552 AtomOp::Or => 6_u8,
1553 AtomOp::Xor => 7_u8,
1554 AtomOp::Exch => 8_u8,
1555 },
1556 );
1557 }
1558
set_atom_type(&mut self, range: Range<usize>, atom_type: AtomType)1559 fn set_atom_type(&mut self, range: Range<usize>, atom_type: AtomType) {
1560 assert!(range.len() == 3);
1561 self.set_field(
1562 range,
1563 match atom_type {
1564 AtomType::U32 => 0_u8,
1565 AtomType::I32 => 1_u8,
1566 AtomType::U64 => 2_u8,
1567 AtomType::F32 => 3_u8,
1568 AtomType::F16x2 => 4_u8,
1569 AtomType::I64 => 5_u8,
1570 AtomType::F64 => 6_u8,
1571 },
1572 );
1573 }
1574
encode_atomg(&mut self, op: &OpAtom)1575 fn encode_atomg(&mut self, op: &OpAtom) {
1576 if op.atom_op == AtomOp::CmpExch {
1577 self.set_opcode(0x38b);
1578
1579 self.set_reg_src(32..40, op.cmpr);
1580 self.set_reg_src(64..72, op.data);
1581 } else {
1582 self.set_opcode(0x38a);
1583
1584 self.set_reg_src(32..40, op.data);
1585
1586 self.set_atom_op(87..91, op.atom_op);
1587 }
1588
1589 self.set_dst(op.dst);
1590 self.set_pred_dst(81..84, Dst::None);
1591
1592 self.set_reg_src(24..32, op.addr);
1593 self.set_field(40..64, op.addr_offset);
1594
1595 self.set_field(
1596 72..73,
1597 match op.mem_space.addr_type() {
1598 MemAddrType::A32 => 0_u8,
1599 MemAddrType::A64 => 1_u8,
1600 },
1601 );
1602
1603 self.set_atom_type(73..76, op.atom_type);
1604 self.set_mem_order(&op.mem_order);
1605 self.set_eviction_priority(&op.mem_eviction_priority);
1606 }
1607
encode_atoms(&mut self, op: &OpAtom)1608 fn encode_atoms(&mut self, op: &OpAtom) {
1609 if op.atom_op == AtomOp::CmpExch {
1610 self.set_opcode(0x38d);
1611
1612 self.set_reg_src(32..40, op.cmpr);
1613 self.set_reg_src(64..72, op.data);
1614 } else {
1615 self.set_opcode(0x38c);
1616
1617 self.set_reg_src(32..40, op.data);
1618
1619 self.set_atom_op(87..91, op.atom_op);
1620 }
1621
1622 self.set_dst(op.dst);
1623 self.set_reg_src(24..32, op.addr);
1624 self.set_field(40..64, op.addr_offset);
1625
1626 assert!(op.mem_order == MemOrder::Strong(MemScope::CTA));
1627 assert!(op.mem_eviction_priority == MemEvictionPriority::Normal);
1628
1629 self.set_atom_type(73..76, op.atom_type);
1630 }
1631
encode_atom(&mut self, op: &OpAtom)1632 fn encode_atom(&mut self, op: &OpAtom) {
1633 match op.mem_space {
1634 MemSpace::Global(_) => self.encode_atomg(op),
1635 MemSpace::Local => panic!("Atomics do not support local"),
1636 MemSpace::Shared => self.encode_atoms(op),
1637 }
1638 }
1639
encode_al2p(&mut self, op: &OpAL2P)1640 fn encode_al2p(&mut self, op: &OpAL2P) {
1641 self.set_opcode(0x920);
1642
1643 self.set_dst(op.dst);
1644 self.set_reg_src(24..32, op.offset);
1645
1646 self.set_field(40..50, op.access.addr);
1647 self.set_field(74..76, 0_u8); // comps
1648 assert!(!op.access.patch);
1649 self.set_bit(79, op.access.output);
1650 }
1651
encode_ald(&mut self, op: &OpALd)1652 fn encode_ald(&mut self, op: &OpALd) {
1653 self.set_opcode(0x321);
1654
1655 self.set_dst(op.dst);
1656 self.set_reg_src(32..40, op.vtx);
1657 self.set_reg_src(24..32, op.offset);
1658
1659 self.set_field(40..50, op.access.addr);
1660 self.set_field(74..76, op.access.comps - 1);
1661 self.set_field(76..77, op.access.patch);
1662 self.set_field(77..78, op.access.phys);
1663 self.set_field(79..80, op.access.output);
1664 }
1665
encode_ast(&mut self, op: &OpASt)1666 fn encode_ast(&mut self, op: &OpASt) {
1667 self.set_opcode(0x322);
1668
1669 self.set_reg_src(32..40, op.data);
1670 self.set_reg_src(64..72, op.vtx);
1671 self.set_reg_src(24..32, op.offset);
1672
1673 self.set_field(40..50, op.access.addr);
1674 self.set_field(74..76, op.access.comps - 1);
1675 self.set_field(76..77, op.access.patch);
1676 self.set_field(77..78, op.access.phys);
1677 assert!(op.access.output);
1678 }
1679
encode_ipa(&mut self, op: &OpIpa)1680 fn encode_ipa(&mut self, op: &OpIpa) {
1681 self.set_opcode(0x326);
1682
1683 self.set_dst(op.dst);
1684
1685 assert!(op.addr % 4 == 0);
1686 self.set_field(64..72, op.addr >> 2);
1687
1688 self.set_field(
1689 76..78,
1690 match op.loc {
1691 InterpLoc::Default => 0_u8,
1692 InterpLoc::Centroid => 1_u8,
1693 InterpLoc::Offset => 2_u8,
1694 },
1695 );
1696 self.set_field(
1697 78..80,
1698 match op.freq {
1699 InterpFreq::Pass => 0_u8,
1700 InterpFreq::Constant => 1_u8,
1701 InterpFreq::State => 2_u8,
1702 InterpFreq::PassMulW => {
1703 panic!("InterpFreq::PassMulW is invalid on SM70+");
1704 }
1705 },
1706 );
1707
1708 assert!(op.inv_w.is_zero());
1709 self.set_reg_src(32..40, op.offset);
1710
1711 // TODO: What is this for?
1712 self.set_pred_dst(81..84, Dst::None);
1713 }
1714
encode_ldtram(&mut self, op: &OpLdTram)1715 fn encode_ldtram(&mut self, op: &OpLdTram) {
1716 self.set_opcode(0x3ad);
1717 self.set_dst(op.dst);
1718 self.set_ureg(24..32, RegRef::zero(RegFile::UGPR, 1));
1719
1720 assert!(op.addr % 4 == 0);
1721 self.set_field(64..72, op.addr >> 2);
1722
1723 self.set_bit(72, op.use_c);
1724
1725 // Unknown but required
1726 self.set_bit(91, true);
1727 }
1728
encode_cctl(&mut self, op: &OpCCtl)1729 fn encode_cctl(&mut self, op: &OpCCtl) {
1730 assert!(matches!(op.mem_space, MemSpace::Global(_)));
1731 self.set_opcode(0x98f);
1732
1733 self.set_reg_src(24..32, op.addr);
1734 self.set_field(32..64, op.addr_offset);
1735
1736 self.set_field(
1737 87..91,
1738 match op.op {
1739 CCtlOp::PF1 => 0_u8,
1740 CCtlOp::PF2 => 1_u8,
1741 CCtlOp::WB => 2_u8,
1742 CCtlOp::IV => 3_u8,
1743 CCtlOp::IVAll => 4_u8,
1744 CCtlOp::RS => 5_u8,
1745 CCtlOp::IVAllP => 6_u8,
1746 CCtlOp::WBAll => 7_u8,
1747 CCtlOp::WBAllP => 8_u8,
1748 },
1749 );
1750 }
1751
encode_membar(&mut self, op: &OpMemBar)1752 fn encode_membar(&mut self, op: &OpMemBar) {
1753 self.set_opcode(0x992);
1754
1755 self.set_bit(72, false); // !.MMIO
1756 self.set_field(
1757 76..79,
1758 match op.scope {
1759 MemScope::CTA => 0_u8,
1760 // SM => 1_u8,
1761 MemScope::GPU => 2_u8,
1762 MemScope::System => 3_u8,
1763 },
1764 );
1765 self.set_bit(80, false); // .SC
1766 }
1767
set_rel_offset( &mut self, range: Range<usize>, label: &Label, ip: usize, labels: &HashMap<Label, usize>, )1768 fn set_rel_offset(
1769 &mut self,
1770 range: Range<usize>,
1771 label: &Label,
1772 ip: usize,
1773 labels: &HashMap<Label, usize>,
1774 ) {
1775 let ip = u64::try_from(ip).unwrap();
1776 let ip = i64::try_from(ip).unwrap();
1777
1778 let target_ip = *labels.get(label).unwrap();
1779 let target_ip = u64::try_from(target_ip).unwrap();
1780 let target_ip = i64::try_from(target_ip).unwrap();
1781
1782 let rel_offset = target_ip - ip - 4;
1783
1784 self.set_field(range, rel_offset);
1785 }
1786
encode_bclear(&mut self, op: &OpBClear)1787 fn encode_bclear(&mut self, op: &OpBClear) {
1788 self.set_opcode(0x355);
1789
1790 self.set_dst(Dst::None);
1791 self.set_bar_dst(24..28, op.dst);
1792
1793 self.set_bit(84, true); // .CLEAR
1794 }
1795
encode_bmov(&mut self, op: &OpBMov)1796 fn encode_bmov(&mut self, op: &OpBMov) {
1797 if dst_is_bar(op.dst) {
1798 self.set_opcode(0x356);
1799
1800 self.set_bar_dst(24..28, op.dst);
1801 self.set_reg_src(32..40, op.src);
1802
1803 self.set_bit(84, op.clear);
1804 } else {
1805 self.set_opcode(0x355);
1806
1807 self.set_dst(op.dst);
1808 self.set_bar_src(24..28, op.src);
1809
1810 self.set_bit(84, op.clear);
1811 }
1812 }
1813
encode_break(&mut self, op: &OpBreak)1814 fn encode_break(&mut self, op: &OpBreak) {
1815 self.set_opcode(0x942);
1816 assert!(op.bar_in.src_ref.as_reg() == op.bar_out.as_reg());
1817 self.set_bar_dst(16..20, op.bar_out);
1818 self.set_pred_src(87..90, 90, op.cond);
1819 }
1820
encode_bssy( &mut self, op: &OpBSSy, ip: usize, labels: &HashMap<Label, usize>, )1821 fn encode_bssy(
1822 &mut self,
1823 op: &OpBSSy,
1824 ip: usize,
1825 labels: &HashMap<Label, usize>,
1826 ) {
1827 self.set_opcode(0x945);
1828 assert!(op.bar_in.src_ref.as_reg() == op.bar_out.as_reg());
1829 self.set_bar_dst(16..20, op.bar_out);
1830 self.set_rel_offset(34..64, &op.target, ip, labels);
1831 self.set_pred_src(87..90, 90, op.cond);
1832 }
1833
encode_bsync(&mut self, op: &OpBSync)1834 fn encode_bsync(&mut self, op: &OpBSync) {
1835 self.set_opcode(0x941);
1836 self.set_bar_src(16..20, op.bar);
1837 self.set_pred_src(87..90, 90, op.cond);
1838 }
1839
encode_bra( &mut self, op: &OpBra, ip: usize, labels: &HashMap<Label, usize>, )1840 fn encode_bra(
1841 &mut self,
1842 op: &OpBra,
1843 ip: usize,
1844 labels: &HashMap<Label, usize>,
1845 ) {
1846 self.set_opcode(0x947);
1847 self.set_rel_offset(34..82, &op.target, ip, labels);
1848 self.set_field(87..90, 0x7_u8); // TODO: Pred?
1849 }
1850
encode_exit(&mut self, _op: &OpExit)1851 fn encode_exit(&mut self, _op: &OpExit) {
1852 self.set_opcode(0x94d);
1853
1854 // ./.KEEPREFCOUNT/.PREEMPTED/.INVALID3
1855 self.set_field(84..85, false);
1856 self.set_field(85..86, false); // .NO_ATEXIT
1857 self.set_field(87..90, 0x7_u8); // TODO: Predicate
1858 self.set_field(90..91, false); // NOT
1859 }
1860
encode_warpsync(&mut self, op: &OpWarpSync)1861 fn encode_warpsync(&mut self, op: &OpWarpSync) {
1862 self.encode_alu(
1863 0x148,
1864 None,
1865 ALUSrc::None,
1866 ALUSrc::Imm32(op.mask),
1867 ALUSrc::None,
1868 );
1869 self.set_pred_src(87..90, 90, SrcRef::True.into());
1870 }
1871
encode_bar(&mut self, _op: &OpBar)1872 fn encode_bar(&mut self, _op: &OpBar) {
1873 self.set_opcode(0xb1d);
1874
1875 // self.set_opcode(0x31d);
1876
1877 // // src0 == src1
1878 // self.set_reg_src(32..40, SrcRef::Zero.into());
1879
1880 // // 00: RED.POPC
1881 // // 01: RED.AND
1882 // // 02: RED.OR
1883 // self.set_field(74..76, 0_u8);
1884
1885 // // 00: SYNC
1886 // // 01: ARV
1887 // // 02: RED
1888 // // 03: SCAN
1889 // self.set_field(77..79, 0_u8);
1890
1891 // self.set_pred_src(87..90, 90, SrcRef::True.into());
1892 }
1893
encode_cs2r(&mut self, op: &OpCS2R)1894 fn encode_cs2r(&mut self, op: &OpCS2R) {
1895 self.set_opcode(0x805);
1896 self.set_dst(op.dst);
1897 self.set_field(72..80, op.idx);
1898 self.set_bit(80, op.dst.as_reg().unwrap().comps() == 2); // .64
1899 }
1900
encode_isberd(&mut self, op: &OpIsberd)1901 fn encode_isberd(&mut self, op: &OpIsberd) {
1902 self.set_opcode(0x923);
1903 self.set_dst(op.dst);
1904 self.set_reg_src(24..32, op.idx);
1905 }
1906
encode_kill(&mut self, _op: &OpKill)1907 fn encode_kill(&mut self, _op: &OpKill) {
1908 self.set_opcode(0x95b);
1909 self.set_pred_src(87..90, 90, SrcRef::True.into());
1910 }
1911
encode_nop(&mut self, _op: &OpNop)1912 fn encode_nop(&mut self, _op: &OpNop) {
1913 self.set_opcode(0x918);
1914 }
1915
encode_pixld(&mut self, op: &OpPixLd)1916 fn encode_pixld(&mut self, op: &OpPixLd) {
1917 self.set_opcode(0x925);
1918 self.set_dst(op.dst);
1919 self.set_field(
1920 78..81,
1921 match op.val {
1922 PixVal::MsCount => 0_u8,
1923 PixVal::CovMask => 1_u8,
1924 PixVal::CentroidOffset => 2_u8,
1925 PixVal::MyIndex => 3_u8,
1926 PixVal::InnerCoverage => 4_u8,
1927 },
1928 );
1929 self.set_pred_dst(81..84, Dst::None);
1930 }
1931
encode_s2r(&mut self, op: &OpS2R)1932 fn encode_s2r(&mut self, op: &OpS2R) {
1933 self.set_opcode(0x919);
1934 self.set_dst(op.dst);
1935 self.set_field(72..80, op.idx);
1936 }
1937
encode_out(&mut self, op: &OpOut)1938 fn encode_out(&mut self, op: &OpOut) {
1939 self.encode_alu(
1940 0x124,
1941 Some(op.dst),
1942 ALUSrc::from_src(&op.handle),
1943 ALUSrc::from_src(&op.stream),
1944 ALUSrc::None,
1945 );
1946
1947 self.set_field(
1948 78..80,
1949 match op.out_type {
1950 OutType::Emit => 1_u8,
1951 OutType::Cut => 2_u8,
1952 OutType::EmitThenCut => 3_u8,
1953 },
1954 );
1955 }
1956
encode_out_final(&mut self, op: &OpOutFinal)1957 fn encode_out_final(&mut self, op: &OpOutFinal) {
1958 self.encode_alu(
1959 0x124,
1960 Some(Dst::None),
1961 ALUSrc::from_src(&op.handle),
1962 ALUSrc::from_src(&Src::new_zero()),
1963 ALUSrc::None,
1964 );
1965 }
1966
encode_vote(&mut self, op: &OpVote)1967 fn encode_vote(&mut self, op: &OpVote) {
1968 self.set_opcode(0x806);
1969 self.set_dst(op.ballot);
1970
1971 self.set_field(
1972 72..74,
1973 match op.op {
1974 VoteOp::All => 0_u8,
1975 VoteOp::Any => 1_u8,
1976 VoteOp::Eq => 2_u8,
1977 },
1978 );
1979
1980 self.set_pred_dst(81..84, op.vote);
1981 self.set_pred_src(87..90, 90, op.pred);
1982 }
1983
encode( instr: &Instr, sm: u8, ip: usize, labels: &HashMap<Label, usize>, ) -> [u32; 4]1984 pub fn encode(
1985 instr: &Instr,
1986 sm: u8,
1987 ip: usize,
1988 labels: &HashMap<Label, usize>,
1989 ) -> [u32; 4] {
1990 assert!(sm >= 70);
1991
1992 let mut si = SM70Instr {
1993 inst: [0; 4],
1994 sm: sm,
1995 };
1996
1997 match &instr.op {
1998 Op::FAdd(op) => si.encode_fadd(op),
1999 Op::FFma(op) => si.encode_ffma(op),
2000 Op::FMnMx(op) => si.encode_fmnmx(op),
2001 Op::FMul(op) => si.encode_fmul(op),
2002 Op::FSet(op) => si.encode_fset(op),
2003 Op::FSetP(op) => si.encode_fsetp(op),
2004 Op::FSwzAdd(op) => si.encode_fswzadd(op),
2005 Op::DAdd(op) => si.encode_dadd(op),
2006 Op::DFma(op) => si.encode_dfma(op),
2007 Op::DMul(op) => si.encode_dmul(op),
2008 Op::DSetP(op) => si.encode_dsetp(op),
2009 Op::MuFu(op) => si.encode_mufu(op),
2010 Op::BMsk(op) => si.encode_bmsk(op),
2011 Op::BRev(op) => si.encode_brev(op),
2012 Op::Flo(op) => si.encode_flo(op),
2013 Op::IAbs(op) => si.encode_iabs(op),
2014 Op::IAdd3(op) => si.encode_iadd3(op),
2015 Op::IAdd3X(op) => si.encode_iadd3x(op),
2016 Op::IDp4(op) => si.encode_idp4(op),
2017 Op::IMad(op) => si.encode_imad(op),
2018 Op::IMad64(op) => si.encode_imad64(op),
2019 Op::IMnMx(op) => si.encode_imnmx(op),
2020 Op::ISetP(op) => si.encode_isetp(op),
2021 Op::Lop3(op) => si.encode_lop3(op),
2022 Op::PopC(op) => si.encode_popc(op),
2023 Op::Shf(op) => si.encode_shf(op),
2024 Op::F2F(op) => si.encode_f2f(op),
2025 Op::F2I(op) => si.encode_f2i(op),
2026 Op::I2F(op) => si.encode_i2f(op),
2027 Op::FRnd(op) => si.encode_frnd(op),
2028 Op::Mov(op) => si.encode_mov(op),
2029 Op::Prmt(op) => si.encode_prmt(op),
2030 Op::Sel(op) => si.encode_sel(op),
2031 Op::Shfl(op) => si.encode_shfl(op),
2032 Op::PLop3(op) => si.encode_plop3(op),
2033 Op::Tex(op) => si.encode_tex(op),
2034 Op::Tld(op) => si.encode_tld(op),
2035 Op::Tld4(op) => si.encode_tld4(op),
2036 Op::Tmml(op) => si.encode_tmml(op),
2037 Op::Txd(op) => si.encode_txd(op),
2038 Op::Txq(op) => si.encode_txq(op),
2039 Op::SuLd(op) => si.encode_suld(op),
2040 Op::SuSt(op) => si.encode_sust(op),
2041 Op::SuAtom(op) => si.encode_suatom(op),
2042 Op::Ld(op) => si.encode_ld(op),
2043 Op::Ldc(op) => si.encode_ldc(op),
2044 Op::St(op) => si.encode_st(op),
2045 Op::Atom(op) => si.encode_atom(op),
2046 Op::AL2P(op) => si.encode_al2p(op),
2047 Op::ALd(op) => si.encode_ald(op),
2048 Op::ASt(op) => si.encode_ast(op),
2049 Op::Ipa(op) => si.encode_ipa(op),
2050 Op::LdTram(op) => si.encode_ldtram(op),
2051 Op::CCtl(op) => si.encode_cctl(op),
2052 Op::MemBar(op) => si.encode_membar(op),
2053 Op::BClear(op) => si.encode_bclear(op),
2054 Op::BMov(op) => si.encode_bmov(op),
2055 Op::Break(op) => si.encode_break(op),
2056 Op::BSSy(op) => si.encode_bssy(op, ip, labels),
2057 Op::BSync(op) => si.encode_bsync(op),
2058 Op::Bra(op) => si.encode_bra(op, ip, labels),
2059 Op::Exit(op) => si.encode_exit(op),
2060 Op::WarpSync(op) => si.encode_warpsync(op),
2061 Op::Bar(op) => si.encode_bar(op),
2062 Op::CS2R(op) => si.encode_cs2r(op),
2063 Op::Isberd(op) => si.encode_isberd(op),
2064 Op::Kill(op) => si.encode_kill(op),
2065 Op::Nop(op) => si.encode_nop(op),
2066 Op::PixLd(op) => si.encode_pixld(op),
2067 Op::S2R(op) => si.encode_s2r(op),
2068 Op::Out(op) => si.encode_out(op),
2069 Op::OutFinal(op) => si.encode_out_final(op),
2070 Op::Vote(op) => si.encode_vote(op),
2071 _ => panic!("Unhandled instruction"),
2072 }
2073
2074 si.set_pred(&instr.pred);
2075 si.set_instr_deps(&instr.deps);
2076
2077 si.inst
2078 }
2079 }
2080
2081 impl Shader {
encode_sm70(&self) -> Vec<u32>2082 pub fn encode_sm70(&self) -> Vec<u32> {
2083 assert!(self.functions.len() == 1);
2084 let func = &self.functions[0];
2085
2086 let mut ip = 0_usize;
2087 let mut labels = HashMap::new();
2088 for b in &func.blocks {
2089 labels.insert(b.label, ip);
2090 for instr in &b.instrs {
2091 if let Op::Nop(op) = &instr.op {
2092 if let Some(label) = op.label {
2093 labels.insert(label, ip);
2094 }
2095 }
2096 ip += 4;
2097 }
2098 }
2099
2100 let mut encoded = Vec::new();
2101 for b in &func.blocks {
2102 for instr in &b.instrs {
2103 let e = SM70Instr::encode(
2104 instr,
2105 self.info.sm,
2106 encoded.len(),
2107 &labels,
2108 );
2109 encoded.extend_from_slice(&e[..]);
2110 }
2111 }
2112 encoded
2113 }
2114 }
2115