1 // Copyright © 2023 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3
4 use crate::ir::*;
5 use crate::legalize::{
6 src_is_reg, swap_srcs_if_not_reg, LegalizeBuildHelpers, LegalizeBuilder,
7 };
8 use bitview::*;
9
10 use std::collections::HashMap;
11 use std::ops::Range;
12
13 pub struct ShaderModel50 {
14 sm: u8,
15 }
16
17 impl ShaderModel50 {
new(sm: u8) -> Self18 pub fn new(sm: u8) -> Self {
19 assert!(sm >= 50 && sm < 70);
20 Self { sm }
21 }
22 }
23
24 impl ShaderModel for ShaderModel50 {
sm(&self) -> u825 fn sm(&self) -> u8 {
26 self.sm
27 }
28
num_regs(&self, file: RegFile) -> u3229 fn num_regs(&self, file: RegFile) -> u32 {
30 match file {
31 RegFile::GPR => 255,
32 RegFile::UGPR => 0,
33 RegFile::Pred => 7,
34 RegFile::UPred => 0,
35 RegFile::Carry => 1,
36 RegFile::Bar => 0,
37 RegFile::Mem => RegRef::MAX_IDX + 1,
38 }
39 }
40
hw_reserved_gprs(&self) -> u3241 fn hw_reserved_gprs(&self) -> u32 {
42 0
43 }
44
crs_size(&self, max_crs_depth: u32) -> u3245 fn crs_size(&self, max_crs_depth: u32) -> u32 {
46 if max_crs_depth <= 16 {
47 0
48 } else if max_crs_depth <= 32 {
49 1024
50 } else {
51 ((max_crs_depth + 32) * 16).next_multiple_of(512)
52 }
53 }
54
op_can_be_uniform(&self, _op: &Op) -> bool55 fn op_can_be_uniform(&self, _op: &Op) -> bool {
56 false
57 }
58
legalize_op(&self, b: &mut LegalizeBuilder, op: &mut Op)59 fn legalize_op(&self, b: &mut LegalizeBuilder, op: &mut Op) {
60 as_sm50_op_mut(op).legalize(b);
61 }
62
encode_shader(&self, s: &Shader<'_>) -> Vec<u32>63 fn encode_shader(&self, s: &Shader<'_>) -> Vec<u32> {
64 encode_sm50_shader(self, s)
65 }
66 }
67
68 trait SM50Op {
legalize(&mut self, b: &mut LegalizeBuilder)69 fn legalize(&mut self, b: &mut LegalizeBuilder);
encode(&self, e: &mut SM50Encoder<'_>)70 fn encode(&self, e: &mut SM50Encoder<'_>);
71 }
72
73 struct SM50Encoder<'a> {
74 sm: &'a ShaderModel50,
75 ip: usize,
76 labels: &'a HashMap<Label, usize>,
77 inst: [u32; 2],
78 sched: u32,
79 }
80
81 impl BitViewable for SM50Encoder<'_> {
bits(&self) -> usize82 fn bits(&self) -> usize {
83 BitView::new(&self.inst).bits()
84 }
85
get_bit_range_u64(&self, range: Range<usize>) -> u6486 fn get_bit_range_u64(&self, range: Range<usize>) -> u64 {
87 BitView::new(&self.inst).get_bit_range_u64(range)
88 }
89 }
90
91 impl BitMutViewable for SM50Encoder<'_> {
set_bit_range_u64(&mut self, range: Range<usize>, val: u64)92 fn set_bit_range_u64(&mut self, range: Range<usize>, val: u64) {
93 BitMutView::new(&mut self.inst).set_bit_range_u64(range, val);
94 }
95 }
96
97 impl SetFieldU64 for SM50Encoder<'_> {
set_field_u64(&mut self, range: Range<usize>, val: u64)98 fn set_field_u64(&mut self, range: Range<usize>, val: u64) {
99 BitMutView::new(&mut self.inst).set_field_u64(range, val);
100 }
101 }
102
103 impl SM50Encoder<'_> {
set_opcode(&mut self, opcode: u16)104 fn set_opcode(&mut self, opcode: u16) {
105 self.set_field(48..64, opcode);
106 }
107
set_pred_reg(&mut self, range: Range<usize>, reg: RegRef)108 fn set_pred_reg(&mut self, range: Range<usize>, reg: RegRef) {
109 assert!(range.len() == 3);
110 assert!(reg.file() == RegFile::Pred);
111 assert!(reg.base_idx() <= 7);
112 assert!(reg.comps() == 1);
113 self.set_field(range, reg.base_idx());
114 }
115
set_pred(&mut self, pred: &Pred)116 fn set_pred(&mut self, pred: &Pred) {
117 assert!(!pred.is_false());
118 self.set_pred_reg(
119 16..19,
120 match pred.pred_ref {
121 PredRef::None => RegRef::zero(RegFile::Pred, 1),
122 PredRef::Reg(reg) => reg,
123 PredRef::SSA(_) => panic!("SSA values must be lowered"),
124 },
125 );
126 self.set_bit(19, pred.pred_inv);
127 }
128
set_instr_deps(&mut self, deps: &InstrDeps)129 fn set_instr_deps(&mut self, deps: &InstrDeps) {
130 let mut sched = BitMutView::new(&mut self.sched);
131
132 sched.set_field(0..4, deps.delay);
133 sched.set_bit(4, deps.yld);
134 sched.set_field(5..8, deps.wr_bar().unwrap_or(7));
135 sched.set_field(8..11, deps.rd_bar().unwrap_or(7));
136 sched.set_field(11..17, deps.wt_bar_mask);
137 sched.set_field(17..21, deps.reuse_mask);
138 }
139
set_reg(&mut self, range: Range<usize>, reg: RegRef)140 fn set_reg(&mut self, range: Range<usize>, reg: RegRef) {
141 assert!(range.len() == 8);
142 assert!(reg.file() == RegFile::GPR);
143 self.set_field(range, reg.base_idx());
144 }
145
set_reg_src_ref(&mut self, range: Range<usize>, src_ref: SrcRef)146 fn set_reg_src_ref(&mut self, range: Range<usize>, src_ref: SrcRef) {
147 match src_ref {
148 SrcRef::Zero => self.set_reg(range, RegRef::zero(RegFile::GPR, 1)),
149 SrcRef::Reg(reg) => self.set_reg(range, reg),
150 _ => panic!("Not a register"),
151 }
152 }
153
set_reg_src(&mut self, range: Range<usize>, src: Src)154 fn set_reg_src(&mut self, range: Range<usize>, src: Src) {
155 assert!(src.src_mod.is_none());
156 self.set_reg_src_ref(range, src.src_ref);
157 }
158
set_reg_fmod_src( &mut self, range: Range<usize>, abs_bit: usize, neg_bit: usize, src: Src, )159 fn set_reg_fmod_src(
160 &mut self,
161 range: Range<usize>,
162 abs_bit: usize,
163 neg_bit: usize,
164 src: Src,
165 ) {
166 self.set_reg_src_ref(range, src.src_ref);
167 self.set_bit(abs_bit, src.src_mod.has_fabs());
168 self.set_bit(neg_bit, src.src_mod.has_fneg());
169 }
170
set_reg_ineg_src( &mut self, range: Range<usize>, neg_bit: usize, src: Src, )171 fn set_reg_ineg_src(
172 &mut self,
173 range: Range<usize>,
174 neg_bit: usize,
175 src: Src,
176 ) {
177 self.set_reg_src_ref(range, src.src_ref);
178 self.set_bit(neg_bit, src.src_mod.is_ineg());
179 }
180
set_reg_bnot_src( &mut self, range: Range<usize>, not_bit: usize, src: Src, )181 fn set_reg_bnot_src(
182 &mut self,
183 range: Range<usize>,
184 not_bit: usize,
185 src: Src,
186 ) {
187 self.set_reg_src_ref(range, src.src_ref);
188 self.set_bit(not_bit, src.src_mod.is_bnot());
189 }
190
set_pred_dst(&mut self, range: Range<usize>, dst: Dst)191 fn set_pred_dst(&mut self, range: Range<usize>, dst: Dst) {
192 match dst {
193 Dst::None => {
194 self.set_pred_reg(range, RegRef::zero(RegFile::Pred, 1));
195 }
196 Dst::Reg(reg) => self.set_pred_reg(range, reg),
197 _ => panic!("Not a register"),
198 }
199 }
200
set_pred_src(&mut self, range: Range<usize>, not_bit: usize, src: Src)201 fn set_pred_src(&mut self, range: Range<usize>, not_bit: usize, src: Src) {
202 // The default for predicates is true
203 let true_reg = RegRef::new(RegFile::Pred, 7, 1);
204
205 let (not, reg) = match src.src_ref {
206 SrcRef::True => (false, true_reg),
207 SrcRef::False => (true, true_reg),
208 SrcRef::Reg(reg) => (false, reg),
209 _ => panic!("Not a register"),
210 };
211 self.set_pred_reg(range, reg);
212 self.set_bit(not_bit, not ^ src.src_mod.is_bnot());
213 }
214
set_dst(&mut self, dst: Dst)215 fn set_dst(&mut self, dst: Dst) {
216 let reg = match dst {
217 Dst::None => RegRef::zero(RegFile::GPR, 1),
218 Dst::Reg(reg) => reg,
219 _ => panic!("invalid dst {dst}"),
220 };
221 self.set_reg(0..8, reg);
222 }
223
set_src_imm32(&mut self, range: Range<usize>, u: u32)224 fn set_src_imm32(&mut self, range: Range<usize>, u: u32) {
225 assert!(range.len() == 32);
226 self.set_field(range, u);
227 }
228
set_src_imm_i20( &mut self, range: Range<usize>, sign_bit: usize, i: u32, )229 fn set_src_imm_i20(
230 &mut self,
231 range: Range<usize>,
232 sign_bit: usize,
233 i: u32,
234 ) {
235 assert!(range.len() == 19);
236 assert!((i & 0xfff80000) == 0 || (i & 0xfff80000) == 0xfff80000);
237
238 self.set_field(range, i & 0x7ffff);
239 self.set_field(sign_bit..sign_bit + 1, (i & 0x80000) >> 19);
240 }
241
set_src_imm_f20( &mut self, range: Range<usize>, sign_bit: usize, f: u32, )242 fn set_src_imm_f20(
243 &mut self,
244 range: Range<usize>,
245 sign_bit: usize,
246 f: u32,
247 ) {
248 assert!(range.len() == 19);
249 assert!((f & 0x00000fff) == 0);
250
251 self.set_field(range, (f >> 12) & 0x7ffff);
252 self.set_field(sign_bit..sign_bit + 1, f >> 31);
253 }
254
set_src_cb(&mut self, range: Range<usize>, cb: &CBufRef)255 fn set_src_cb(&mut self, range: Range<usize>, cb: &CBufRef) {
256 let mut v = BitMutView::new_subset(self, range);
257
258 assert!(cb.offset % 4 == 0);
259
260 v.set_field(0..14, cb.offset >> 2);
261 if let CBuf::Binding(idx) = cb.buf {
262 v.set_field(14..19, idx);
263 } else {
264 panic!("Must be a bound constant buffer");
265 }
266 }
267
set_cb_fmod_src( &mut self, range: Range<usize>, abs_bit: usize, neg_bit: usize, src: Src, )268 fn set_cb_fmod_src(
269 &mut self,
270 range: Range<usize>,
271 abs_bit: usize,
272 neg_bit: usize,
273 src: Src,
274 ) {
275 if let SrcRef::CBuf(cb) = &src.src_ref {
276 self.set_src_cb(range, cb);
277 } else {
278 panic!("Not a CBuf source");
279 }
280
281 self.set_bit(abs_bit, src.src_mod.has_fabs());
282 self.set_bit(neg_bit, src.src_mod.has_fneg());
283 }
284
set_cb_ineg_src( &mut self, range: Range<usize>, neg_bit: usize, src: Src, )285 fn set_cb_ineg_src(
286 &mut self,
287 range: Range<usize>,
288 neg_bit: usize,
289 src: Src,
290 ) {
291 if let SrcRef::CBuf(cb) = &src.src_ref {
292 self.set_src_cb(range, cb);
293 } else {
294 panic!("Not a CBuf source");
295 }
296
297 self.set_bit(neg_bit, src.src_mod.is_ineg());
298 }
299
set_cb_bnot_src( &mut self, range: Range<usize>, not_bit: usize, src: Src, )300 fn set_cb_bnot_src(
301 &mut self,
302 range: Range<usize>,
303 not_bit: usize,
304 src: Src,
305 ) {
306 if let SrcRef::CBuf(cb) = &src.src_ref {
307 self.set_src_cb(range, cb);
308 } else {
309 panic!("Not a CBuf source");
310 }
311
312 self.set_bit(not_bit, src.src_mod.is_bnot());
313 }
314 }
315
316 //
317 // Legalization helpers
318 //
319
320 pub trait SM50LegalizeBuildHelpers: LegalizeBuildHelpers {
copy_alu_src_if_fabs(&mut self, src: &mut Src, src_type: SrcType)321 fn copy_alu_src_if_fabs(&mut self, src: &mut Src, src_type: SrcType) {
322 if src.src_mod.has_fabs() {
323 self.copy_alu_src_and_lower_fmod(src, src_type);
324 }
325 }
326
copy_alu_src_if_i20_overflow( &mut self, src: &mut Src, reg_file: RegFile, src_type: SrcType, )327 fn copy_alu_src_if_i20_overflow(
328 &mut self,
329 src: &mut Src,
330 reg_file: RegFile,
331 src_type: SrcType,
332 ) {
333 if src.as_imm_not_i20().is_some() {
334 self.copy_alu_src(src, reg_file, src_type);
335 }
336 }
337
copy_alu_src_if_f20_overflow( &mut self, src: &mut Src, reg_file: RegFile, src_type: SrcType, )338 fn copy_alu_src_if_f20_overflow(
339 &mut self,
340 src: &mut Src,
341 reg_file: RegFile,
342 src_type: SrcType,
343 ) {
344 if src.as_imm_not_f20().is_some() {
345 self.copy_alu_src(src, reg_file, src_type);
346 }
347 }
348 }
349
350 impl SM50LegalizeBuildHelpers for LegalizeBuilder<'_> {}
351
352 /// Helper to legalize extended or external instructions
353 ///
354 /// These are instructions which reach out external units such as load/store
355 /// and texture ops. They typically can't take anything but GPRs and are the
356 /// only types of instructions that support vectors.
357 ///
legalize_ext_instr(op: &mut impl SrcsAsSlice, _b: &mut LegalizeBuilder)358 fn legalize_ext_instr(op: &mut impl SrcsAsSlice, _b: &mut LegalizeBuilder) {
359 let src_types = op.src_types();
360 for (i, src) in op.srcs_as_mut_slice().iter_mut().enumerate() {
361 match src_types[i] {
362 SrcType::SSA => {
363 assert!(src.as_ssa().is_some());
364 }
365 SrcType::GPR => {
366 assert!(src_is_reg(src, RegFile::GPR));
367 }
368 SrcType::ALU
369 | SrcType::F16
370 | SrcType::F16v2
371 | SrcType::F32
372 | SrcType::F64
373 | SrcType::I32
374 | SrcType::B32 => {
375 panic!("ALU srcs must be legalized explicitly");
376 }
377 SrcType::Pred => {
378 panic!("Predicates must be legalized explicitly");
379 }
380 SrcType::Carry => {
381 panic!("Carry values must be legalized explicitly");
382 }
383 SrcType::Bar => panic!("Barrier regs are Volta+"),
384 }
385 }
386 }
387
388 //
389 // Implementations of SM50Op for each op we support on Maxwell/Pascal
390 //
391
392 impl SM50Encoder<'_> {
set_rnd_mode(&mut self, range: Range<usize>, rnd_mode: FRndMode)393 fn set_rnd_mode(&mut self, range: Range<usize>, rnd_mode: FRndMode) {
394 assert!(range.len() == 2);
395 self.set_field(
396 range,
397 match rnd_mode {
398 FRndMode::NearestEven => 0_u8,
399 FRndMode::NegInf => 1_u8,
400 FRndMode::PosInf => 2_u8,
401 FRndMode::Zero => 3_u8,
402 },
403 );
404 }
405 }
406
407 impl SM50Op for OpFAdd {
legalize(&mut self, b: &mut LegalizeBuilder)408 fn legalize(&mut self, b: &mut LegalizeBuilder) {
409 use RegFile::GPR;
410 let [src0, src1] = &mut self.srcs;
411 swap_srcs_if_not_reg(src0, src1, GPR);
412 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32);
413 }
414
encode(&self, e: &mut SM50Encoder<'_>)415 fn encode(&self, e: &mut SM50Encoder<'_>) {
416 if let Some(imm32) = self.srcs[1].as_imm_not_f20() {
417 e.set_opcode(0x0800);
418 e.set_dst(self.dst);
419 e.set_reg_fmod_src(8..16, 54, 56, self.srcs[0]);
420 e.set_src_imm32(20..52, imm32);
421 e.set_bit(55, self.ftz);
422 } else {
423 match &self.srcs[1].src_ref {
424 SrcRef::Zero | SrcRef::Reg(_) => {
425 e.set_opcode(0x5c58);
426 e.set_reg_fmod_src(20..28, 49, 45, self.srcs[1]);
427 }
428 SrcRef::Imm32(imm32) => {
429 e.set_opcode(0x3858);
430 e.set_src_imm_f20(20..39, 56, *imm32);
431 assert!(self.srcs[1].src_mod.is_none());
432 }
433 SrcRef::CBuf(_) => {
434 e.set_opcode(0x4c58);
435 e.set_cb_fmod_src(20..39, 49, 45, self.srcs[1]);
436 }
437 src => panic!("Invalid fadd src1: {src}"),
438 }
439
440 e.set_dst(self.dst);
441 e.set_reg_fmod_src(8..16, 46, 48, self.srcs[0]);
442
443 e.set_rnd_mode(39..41, self.rnd_mode);
444 e.set_bit(44, self.ftz);
445 e.set_bit(50, self.saturate);
446 }
447 }
448 }
449
450 impl SM50Op for OpFFma {
legalize(&mut self, b: &mut LegalizeBuilder)451 fn legalize(&mut self, b: &mut LegalizeBuilder) {
452 use RegFile::GPR;
453 let [src0, src1, src2] = &mut self.srcs;
454 b.copy_alu_src_if_fabs(src0, SrcType::F32);
455 b.copy_alu_src_if_fabs(src1, SrcType::F32);
456 b.copy_alu_src_if_fabs(src2, SrcType::F32);
457 swap_srcs_if_not_reg(src0, src1, GPR);
458 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32);
459 b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F32);
460 if src_is_reg(src1, GPR) {
461 b.copy_alu_src_if_imm(src2, GPR, SrcType::F32);
462 } else {
463 b.copy_alu_src_if_not_reg(src2, GPR, SrcType::F32);
464 }
465 }
466
encode(&self, e: &mut SM50Encoder<'_>)467 fn encode(&self, e: &mut SM50Encoder<'_>) {
468 // ffma doesn't have any abs flags.
469 assert!(!self.srcs[0].src_mod.has_fabs());
470 assert!(!self.srcs[1].src_mod.has_fabs());
471 assert!(!self.srcs[2].src_mod.has_fabs());
472
473 // There is one fneg bit shared by the two fmul sources
474 let fneg_fmul =
475 self.srcs[0].src_mod.has_fneg() ^ self.srcs[1].src_mod.has_fneg();
476 let fneg_src2 = self.srcs[2].src_mod.has_fneg();
477
478 match &self.srcs[2].src_ref {
479 SrcRef::Zero | SrcRef::Reg(_) => {
480 match &self.srcs[1].src_ref {
481 SrcRef::Zero | SrcRef::Reg(_) => {
482 e.set_opcode(0x5980);
483 e.set_reg_src_ref(20..28, self.srcs[1].src_ref);
484 }
485 SrcRef::Imm32(imm32) => {
486 e.set_opcode(0x3280);
487
488 // Technically, ffma also supports a 32-bit immediate,
489 // but only in the case where the destination is the
490 // same as src2. We don't support that right now.
491 e.set_src_imm_f20(20..39, 56, *imm32);
492 }
493 SrcRef::CBuf(cb) => {
494 e.set_opcode(0x4980);
495 e.set_src_cb(20..39, cb);
496 }
497 src => panic!("Invalid ffma src1: {src}"),
498 }
499
500 e.set_reg_src_ref(39..47, self.srcs[2].src_ref);
501 }
502 SrcRef::CBuf(cb) => {
503 e.set_opcode(0x5180);
504 e.set_src_cb(20..39, cb);
505 e.set_reg_src_ref(39..47, self.srcs[1].src_ref);
506 }
507 src => panic!("Invalid ffma src2: {src}"),
508 }
509
510 e.set_dst(self.dst);
511 e.set_reg_src_ref(8..16, self.srcs[0].src_ref);
512
513 e.set_bit(48, fneg_fmul);
514 e.set_bit(49, fneg_src2);
515 e.set_bit(50, self.saturate);
516 e.set_rnd_mode(51..53, self.rnd_mode);
517
518 e.set_bit(53, self.ftz);
519 e.set_bit(54, self.dnz);
520 }
521 }
522
523 impl SM50Op for OpFMnMx {
legalize(&mut self, b: &mut LegalizeBuilder)524 fn legalize(&mut self, b: &mut LegalizeBuilder) {
525 use RegFile::GPR;
526 let [src0, src1] = &mut self.srcs;
527 swap_srcs_if_not_reg(src0, src1, GPR);
528 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32);
529 b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F32);
530 }
531
encode(&self, e: &mut SM50Encoder<'_>)532 fn encode(&self, e: &mut SM50Encoder<'_>) {
533 match &self.srcs[1].src_ref {
534 SrcRef::Zero | SrcRef::Reg(_) => {
535 e.set_opcode(0x5c60);
536 e.set_reg_fmod_src(20..28, 49, 45, self.srcs[1]);
537 }
538 SrcRef::Imm32(imm32) => {
539 e.set_opcode(0x3860);
540 e.set_src_imm_f20(20..39, 56, *imm32);
541 assert!(self.srcs[1].src_mod.is_none());
542 }
543 SrcRef::CBuf(_) => {
544 e.set_opcode(0x4c60);
545 e.set_cb_fmod_src(20..39, 49, 45, self.srcs[1]);
546 }
547 src => panic!("Invalid fmnmx src2: {src}"),
548 }
549
550 e.set_reg_fmod_src(8..16, 46, 48, self.srcs[0]);
551 e.set_dst(self.dst);
552 e.set_pred_src(39..42, 42, self.min);
553 e.set_bit(44, self.ftz);
554 }
555 }
556
557 impl SM50Op for OpFMul {
legalize(&mut self, b: &mut LegalizeBuilder)558 fn legalize(&mut self, b: &mut LegalizeBuilder) {
559 use RegFile::GPR;
560 let [src0, src1] = &mut self.srcs;
561 b.copy_alu_src_if_fabs(src0, SrcType::F32);
562 b.copy_alu_src_if_fabs(src1, SrcType::F32);
563 swap_srcs_if_not_reg(src0, src1, GPR);
564 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32);
565 }
566
encode(&self, e: &mut SM50Encoder<'_>)567 fn encode(&self, e: &mut SM50Encoder<'_>) {
568 // fmul doesn't have any abs flags.
569 assert!(!self.srcs[0].src_mod.has_fabs());
570 assert!(!self.srcs[1].src_mod.has_fabs());
571
572 // There is one fneg bit shared by both sources
573 let fneg =
574 self.srcs[0].src_mod.has_fneg() ^ self.srcs[1].src_mod.has_fneg();
575
576 if let Some(mut imm32) = self.srcs[1].as_imm_not_f20() {
577 e.set_opcode(0x1e00);
578
579 e.set_bit(53, self.ftz);
580 e.set_bit(54, self.dnz);
581 e.set_bit(55, self.saturate);
582
583 if fneg {
584 // Flip the immediate sign bit
585 imm32 ^= 0x80000000;
586 }
587 e.set_src_imm32(20..52, imm32);
588 } else {
589 match &self.srcs[1].src_ref {
590 SrcRef::Zero | SrcRef::Reg(_) => {
591 e.set_opcode(0x5c68);
592 e.set_reg_src(20..28, self.srcs[1]);
593 }
594 SrcRef::Imm32(imm32) => {
595 e.set_opcode(0x3868);
596 e.set_src_imm_f20(20..39, 56, *imm32);
597 }
598 SrcRef::CBuf(cbuf) => {
599 e.set_opcode(0x4c68);
600 e.set_src_cb(20..39, cbuf);
601 }
602 src => panic!("Invalid fmul src1: {src}"),
603 }
604
605 e.set_rnd_mode(39..41, self.rnd_mode);
606 e.set_field(41..44, 0x0_u8); // TODO: PDIV
607 e.set_bit(44, self.ftz);
608 e.set_bit(45, self.dnz);
609 e.set_bit(48, fneg);
610 e.set_bit(50, self.saturate);
611 }
612
613 e.set_reg_src_ref(8..16, self.srcs[0].src_ref);
614 e.set_dst(self.dst);
615 }
616 }
617
618 impl SM50Op for OpRro {
legalize(&mut self, b: &mut LegalizeBuilder)619 fn legalize(&mut self, b: &mut LegalizeBuilder) {
620 use RegFile::GPR;
621 b.copy_alu_src_if_f20_overflow(&mut self.src, GPR, SrcType::F32);
622 }
623
encode(&self, e: &mut SM50Encoder<'_>)624 fn encode(&self, e: &mut SM50Encoder<'_>) {
625 match &self.src.src_ref {
626 SrcRef::Zero | SrcRef::Reg(_) => {
627 e.set_opcode(0x5c90);
628 e.set_reg_fmod_src(20..28, 49, 45, self.src);
629 }
630 SrcRef::Imm32(imm32) => {
631 e.set_opcode(0x3890);
632 e.set_src_imm_f20(20..39, 56, *imm32);
633 assert!(self.src.src_mod.is_none());
634 }
635 SrcRef::CBuf(_) => {
636 e.set_opcode(0x4c90);
637 e.set_cb_fmod_src(20..39, 49, 45, self.src);
638 }
639 src => panic!("Invalid rro src: {src}"),
640 }
641
642 e.set_dst(self.dst);
643 e.set_field(
644 39..40,
645 match self.op {
646 RroOp::SinCos => 0u8,
647 RroOp::Exp2 => 1u8,
648 },
649 );
650 }
651 }
652
653 impl SM50Op for OpMuFu {
legalize(&mut self, b: &mut LegalizeBuilder)654 fn legalize(&mut self, b: &mut LegalizeBuilder) {
655 b.copy_alu_src_if_not_reg(&mut self.src, RegFile::GPR, SrcType::GPR);
656 }
657
encode(&self, e: &mut SM50Encoder<'_>)658 fn encode(&self, e: &mut SM50Encoder<'_>) {
659 e.set_opcode(0x5080);
660
661 e.set_dst(self.dst);
662 e.set_reg_fmod_src(8..16, 46, 48, self.src);
663
664 e.set_field(
665 20..24,
666 match self.op {
667 MuFuOp::Cos => 0_u8,
668 MuFuOp::Sin => 1_u8,
669 MuFuOp::Exp2 => 2_u8,
670 MuFuOp::Log2 => 3_u8,
671 MuFuOp::Rcp => 4_u8,
672 MuFuOp::Rsq => 5_u8,
673 MuFuOp::Rcp64H => 6_u8,
674 MuFuOp::Rsq64H => 7_u8,
675 // SQRT is only on SM52 and later
676 MuFuOp::Sqrt if e.sm.sm >= 52 => 8_u8,
677 MuFuOp::Sqrt => panic!("MUFU.SQRT not supported on SM50"),
678 MuFuOp::Tanh => panic!("MUFU.TANH not supported on SM50"),
679 },
680 );
681 }
682 }
683
684 impl SM50Encoder<'_> {
set_float_cmp_op(&mut self, range: Range<usize>, op: FloatCmpOp)685 fn set_float_cmp_op(&mut self, range: Range<usize>, op: FloatCmpOp) {
686 assert!(range.len() == 4);
687 self.set_field(
688 range,
689 match op {
690 FloatCmpOp::OrdLt => 0x01_u8,
691 FloatCmpOp::OrdEq => 0x02_u8,
692 FloatCmpOp::OrdLe => 0x03_u8,
693 FloatCmpOp::OrdGt => 0x04_u8,
694 FloatCmpOp::OrdNe => 0x05_u8,
695 FloatCmpOp::OrdGe => 0x06_u8,
696 FloatCmpOp::UnordLt => 0x09_u8,
697 FloatCmpOp::UnordEq => 0x0a_u8,
698 FloatCmpOp::UnordLe => 0x0b_u8,
699 FloatCmpOp::UnordGt => 0x0c_u8,
700 FloatCmpOp::UnordNe => 0x0d_u8,
701 FloatCmpOp::UnordGe => 0x0e_u8,
702 FloatCmpOp::IsNum => 0x07_u8,
703 FloatCmpOp::IsNan => 0x08_u8,
704 },
705 );
706 }
707
set_pred_set_op(&mut self, range: Range<usize>, op: PredSetOp)708 fn set_pred_set_op(&mut self, range: Range<usize>, op: PredSetOp) {
709 assert!(range.len() == 2);
710 self.set_field(
711 range,
712 match op {
713 PredSetOp::And => 0_u8,
714 PredSetOp::Or => 1_u8,
715 PredSetOp::Xor => 2_u8,
716 },
717 );
718 }
719
set_int_cmp_op(&mut self, range: Range<usize>, op: IntCmpOp)720 fn set_int_cmp_op(&mut self, range: Range<usize>, op: IntCmpOp) {
721 assert!(range.len() == 3);
722 self.set_field(
723 range,
724 match op {
725 IntCmpOp::Eq => 2_u8,
726 IntCmpOp::Ne => 5_u8,
727 IntCmpOp::Lt => 1_u8,
728 IntCmpOp::Le => 3_u8,
729 IntCmpOp::Gt => 4_u8,
730 IntCmpOp::Ge => 6_u8,
731 },
732 );
733 }
734 }
735
736 impl SM50Op for OpFSet {
legalize(&mut self, b: &mut LegalizeBuilder)737 fn legalize(&mut self, b: &mut LegalizeBuilder) {
738 use RegFile::GPR;
739 let [src0, src1] = &mut self.srcs;
740 if swap_srcs_if_not_reg(src0, src1, GPR) {
741 self.cmp_op = self.cmp_op.flip();
742 }
743 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32);
744 b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F32);
745 }
746
encode(&self, e: &mut SM50Encoder<'_>)747 fn encode(&self, e: &mut SM50Encoder<'_>) {
748 match &self.srcs[1].src_ref {
749 SrcRef::Zero | SrcRef::Reg(_) => {
750 e.set_opcode(0x5800);
751 e.set_reg_fmod_src(20..28, 44, 53, self.srcs[1]);
752 }
753 SrcRef::Imm32(imm32) => {
754 e.set_opcode(0x3000);
755 e.set_src_imm_f20(20..39, 56, *imm32);
756 assert!(self.srcs[1].src_mod.is_none());
757 }
758 SrcRef::CBuf(_) => {
759 e.set_opcode(0x4800);
760 e.set_cb_fmod_src(20..39, 44, 6, self.srcs[1]);
761 }
762 src => panic!("Invalid fset src1: {src}"),
763 }
764
765 e.set_reg_fmod_src(8..16, 54, 43, self.srcs[0]);
766 e.set_pred_src(39..42, 42, SrcRef::True.into());
767 e.set_float_cmp_op(48..52, self.cmp_op);
768 e.set_bit(52, true); // bool float
769 e.set_bit(55, self.ftz);
770 e.set_dst(self.dst);
771 }
772 }
773
774 impl SM50Op for OpFSetP {
legalize(&mut self, b: &mut LegalizeBuilder)775 fn legalize(&mut self, b: &mut LegalizeBuilder) {
776 use RegFile::GPR;
777 let [src0, src1] = &mut self.srcs;
778 if swap_srcs_if_not_reg(src0, src1, GPR) {
779 self.cmp_op = self.cmp_op.flip();
780 }
781 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F32);
782 b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F32);
783 }
784
encode(&self, e: &mut SM50Encoder<'_>)785 fn encode(&self, e: &mut SM50Encoder<'_>) {
786 match &self.srcs[1].src_ref {
787 SrcRef::Zero | SrcRef::Reg(_) => {
788 e.set_opcode(0x5bb0);
789 e.set_reg_fmod_src(20..28, 44, 6, self.srcs[1]);
790 }
791 SrcRef::Imm32(imm32) => {
792 e.set_opcode(0x36b0);
793 e.set_src_imm_f20(20..39, 56, *imm32);
794 assert!(self.srcs[1].src_mod.is_none());
795 }
796 SrcRef::CBuf(_) => {
797 e.set_opcode(0x4bb0);
798 e.set_cb_fmod_src(20..39, 44, 6, self.srcs[1]);
799 }
800 src => panic!("Invalid fsetp src1: {src}"),
801 }
802
803 e.set_pred_dst(3..6, self.dst);
804 e.set_pred_dst(0..3, Dst::None); // dst1
805 e.set_reg_fmod_src(8..16, 7, 43, self.srcs[0]);
806 e.set_pred_src(39..42, 42, self.accum);
807 e.set_pred_set_op(45..47, self.set_op);
808 e.set_bit(47, self.ftz);
809 e.set_float_cmp_op(48..52, self.cmp_op);
810 }
811 }
812
813 impl SM50Op for OpFSwzAdd {
legalize(&mut self, b: &mut LegalizeBuilder)814 fn legalize(&mut self, b: &mut LegalizeBuilder) {
815 use RegFile::GPR;
816 b.copy_alu_src_if_not_reg(&mut self.srcs[0], GPR, SrcType::GPR);
817 b.copy_alu_src_if_not_reg(&mut self.srcs[1], GPR, SrcType::GPR);
818 }
819
encode(&self, e: &mut SM50Encoder<'_>)820 fn encode(&self, e: &mut SM50Encoder<'_>) {
821 e.set_opcode(0x50f8);
822
823 e.set_dst(self.dst);
824 e.set_reg_src(8..16, self.srcs[0]);
825 e.set_reg_src(20..28, self.srcs[1]);
826
827 e.set_field(
828 39..41,
829 match self.rnd_mode {
830 FRndMode::NearestEven => 0u8,
831 FRndMode::NegInf => 1u8,
832 FRndMode::PosInf => 2u8,
833 FRndMode::Zero => 3u8,
834 },
835 );
836
837 for (i, op) in self.ops.iter().enumerate() {
838 e.set_field(
839 28 + i * 2..28 + (i + 1) * 2,
840 match op {
841 FSwzAddOp::Add => 0u8,
842 FSwzAddOp::SubLeft => 1u8,
843 FSwzAddOp::SubRight => 2u8,
844 FSwzAddOp::MoveLeft => 3u8,
845 },
846 );
847 }
848
849 e.set_bit(38, false); /* .NDV */
850 e.set_bit(44, self.ftz);
851 e.set_bit(47, false); /* dst.CC */
852 }
853 }
854
855 impl SM50Op for OpDAdd {
legalize(&mut self, b: &mut LegalizeBuilder)856 fn legalize(&mut self, b: &mut LegalizeBuilder) {
857 use RegFile::GPR;
858 let [src0, src1] = &mut self.srcs;
859 swap_srcs_if_not_reg(src0, src1, GPR);
860 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64);
861 b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64);
862 }
863
encode(&self, e: &mut SM50Encoder<'_>)864 fn encode(&self, e: &mut SM50Encoder<'_>) {
865 match &self.srcs[1].src_ref {
866 SrcRef::Zero | SrcRef::Reg(_) => {
867 e.set_opcode(0x5c70);
868 e.set_reg_fmod_src(20..28, 49, 45, self.srcs[1]);
869 }
870 SrcRef::Imm32(imm32) => {
871 e.set_opcode(0x3870);
872 e.set_src_imm_f20(20..39, 56, *imm32);
873 assert!(self.srcs[1].src_mod.is_none());
874 }
875 SrcRef::CBuf(_) => {
876 e.set_opcode(0x4c70);
877 e.set_cb_fmod_src(20..39, 49, 45, self.srcs[1]);
878 }
879 src => panic!("Invalid dadd src1: {src}"),
880 }
881
882 e.set_dst(self.dst);
883 e.set_reg_fmod_src(8..16, 46, 48, self.srcs[0]);
884 e.set_rnd_mode(39..41, self.rnd_mode);
885 }
886 }
887
888 impl SM50Op for OpDFma {
legalize(&mut self, b: &mut LegalizeBuilder)889 fn legalize(&mut self, b: &mut LegalizeBuilder) {
890 use RegFile::GPR;
891 let [src0, src1, src2] = &mut self.srcs;
892 b.copy_alu_src_if_fabs(src0, SrcType::F64);
893 b.copy_alu_src_if_fabs(src1, SrcType::F64);
894 b.copy_alu_src_if_fabs(src2, SrcType::F64);
895 swap_srcs_if_not_reg(src0, src1, GPR);
896 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64);
897 b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64);
898 if src_is_reg(src1, GPR) {
899 b.copy_alu_src_if_imm(src2, GPR, SrcType::F64);
900 } else {
901 b.copy_alu_src_if_not_reg(src2, GPR, SrcType::F64);
902 }
903 }
904
encode(&self, e: &mut SM50Encoder<'_>)905 fn encode(&self, e: &mut SM50Encoder<'_>) {
906 // dfma doesn't have any abs flags.
907 assert!(!self.srcs[0].src_mod.has_fabs());
908 assert!(!self.srcs[1].src_mod.has_fabs());
909 assert!(!self.srcs[2].src_mod.has_fabs());
910
911 // There is one fneg bit shared by the two fmul sources
912 let fneg_fmul =
913 self.srcs[0].src_mod.has_fneg() ^ self.srcs[1].src_mod.has_fneg();
914 let fneg_src2 = self.srcs[2].src_mod.has_fneg();
915
916 match &self.srcs[2].src_ref {
917 SrcRef::Zero | SrcRef::Reg(_) => {
918 match &self.srcs[1].src_ref {
919 SrcRef::Zero | SrcRef::Reg(_) => {
920 e.set_opcode(0x5b70);
921 e.set_reg_src_ref(20..28, self.srcs[1].src_ref);
922 }
923 SrcRef::Imm32(imm32) => {
924 e.set_opcode(0x3670);
925 e.set_src_imm_f20(20..39, 56, *imm32);
926 }
927 SrcRef::CBuf(cb) => {
928 e.set_opcode(0x4b70);
929 e.set_src_cb(20..39, cb);
930 }
931 src => panic!("Invalid dfma src1: {src}"),
932 }
933
934 e.set_reg_src_ref(39..47, self.srcs[2].src_ref);
935 }
936 SrcRef::CBuf(cb) => {
937 e.set_opcode(0x5370);
938 e.set_src_cb(20..39, cb);
939 e.set_reg_src_ref(39..47, self.srcs[1].src_ref);
940 }
941 src => panic!("Invalid dfma src2: {src}"),
942 }
943
944 e.set_dst(self.dst);
945 e.set_reg_src_ref(8..16, self.srcs[0].src_ref);
946
947 e.set_bit(48, fneg_fmul);
948 e.set_bit(49, fneg_src2);
949
950 e.set_rnd_mode(50..52, self.rnd_mode);
951 }
952 }
953
954 impl SM50Op for OpDMnMx {
legalize(&mut self, b: &mut LegalizeBuilder)955 fn legalize(&mut self, b: &mut LegalizeBuilder) {
956 use RegFile::GPR;
957 let [src0, src1] = &mut self.srcs;
958 swap_srcs_if_not_reg(src0, src1, GPR);
959 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64);
960 b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64);
961 }
962
encode(&self, e: &mut SM50Encoder<'_>)963 fn encode(&self, e: &mut SM50Encoder<'_>) {
964 match &self.srcs[1].src_ref {
965 SrcRef::Zero | SrcRef::Reg(_) => {
966 e.set_opcode(0x5c50);
967 e.set_reg_fmod_src(20..28, 49, 45, self.srcs[1]);
968 }
969 SrcRef::Imm32(imm32) => {
970 e.set_opcode(0x3850);
971 e.set_src_imm_f20(20..39, 56, *imm32);
972 assert!(self.srcs[1].src_mod.is_none());
973 }
974 SrcRef::CBuf(_) => {
975 e.set_opcode(0x4c50);
976 e.set_cb_fmod_src(20..39, 49, 45, self.srcs[1]);
977 }
978 src => panic!("Invalid dmnmx src1: {src}"),
979 }
980
981 e.set_reg_fmod_src(8..16, 46, 48, self.srcs[0]);
982 e.set_dst(self.dst);
983 e.set_pred_src(39..42, 42, self.min);
984 }
985 }
986
987 impl SM50Op for OpDMul {
legalize(&mut self, b: &mut LegalizeBuilder)988 fn legalize(&mut self, b: &mut LegalizeBuilder) {
989 use RegFile::GPR;
990 let [src0, src1] = &mut self.srcs;
991 b.copy_alu_src_if_fabs(src0, SrcType::F64);
992 b.copy_alu_src_if_fabs(src1, SrcType::F64);
993 swap_srcs_if_not_reg(src0, src1, GPR);
994 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64);
995 b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64);
996 }
997
encode(&self, e: &mut SM50Encoder<'_>)998 fn encode(&self, e: &mut SM50Encoder<'_>) {
999 assert!(!self.srcs[0].src_mod.has_fabs());
1000 assert!(!self.srcs[1].src_mod.has_fabs());
1001
1002 // There is one fneg bit shared by both sources
1003 let fneg =
1004 self.srcs[0].src_mod.has_fneg() ^ self.srcs[1].src_mod.has_fneg();
1005
1006 match &self.srcs[1].src_ref {
1007 SrcRef::Zero | SrcRef::Reg(_) => {
1008 e.set_opcode(0x5c80);
1009 e.set_reg_src_ref(20..28, self.srcs[1].src_ref);
1010 }
1011 SrcRef::Imm32(imm32) => {
1012 e.set_opcode(0x3880);
1013 e.set_src_imm_f20(20..39, 56, *imm32);
1014 }
1015 SrcRef::CBuf(cb) => {
1016 e.set_opcode(0x4c80);
1017 e.set_src_cb(20..39, cb);
1018 }
1019 src => panic!("Invalid dmul src1: {src}"),
1020 }
1021
1022 e.set_dst(self.dst);
1023 e.set_reg_src_ref(8..16, self.srcs[0].src_ref);
1024
1025 e.set_rnd_mode(39..41, self.rnd_mode);
1026 e.set_bit(48, fneg);
1027 }
1028 }
1029
1030 impl SM50Op for OpDSetP {
legalize(&mut self, b: &mut LegalizeBuilder)1031 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1032 use RegFile::GPR;
1033 let [src0, src1] = &mut self.srcs;
1034 if swap_srcs_if_not_reg(src0, src1, GPR) {
1035 self.cmp_op = self.cmp_op.flip();
1036 }
1037 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::F64);
1038 b.copy_alu_src_if_f20_overflow(src1, GPR, SrcType::F64);
1039 }
1040
encode(&self, e: &mut SM50Encoder<'_>)1041 fn encode(&self, e: &mut SM50Encoder<'_>) {
1042 match &self.srcs[1].src_ref {
1043 SrcRef::Zero | SrcRef::Reg(_) => {
1044 e.set_opcode(0x5b80);
1045 e.set_reg_fmod_src(20..28, 44, 6, self.srcs[1]);
1046 }
1047 SrcRef::Imm32(imm32) => {
1048 e.set_opcode(0x3680);
1049 e.set_src_imm_f20(20..39, 56, *imm32);
1050 assert!(self.srcs[1].src_mod.is_none());
1051 }
1052 SrcRef::CBuf(_) => {
1053 e.set_opcode(0x4b80);
1054 e.set_reg_fmod_src(20..39, 44, 6, self.srcs[1]);
1055 }
1056 src => panic!("Invalid dsetp src1: {src}"),
1057 }
1058
1059 e.set_pred_dst(3..6, self.dst);
1060 e.set_pred_dst(0..3, Dst::None); // dst1
1061 e.set_pred_src(39..42, 42, self.accum);
1062 e.set_pred_set_op(45..47, self.set_op);
1063 e.set_float_cmp_op(48..52, self.cmp_op);
1064 e.set_reg_fmod_src(8..16, 7, 43, self.srcs[0]);
1065 }
1066 }
1067
1068 impl SM50Op for OpBfe {
legalize(&mut self, b: &mut LegalizeBuilder)1069 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1070 use RegFile::GPR;
1071 b.copy_alu_src_if_not_reg(&mut self.base, GPR, SrcType::ALU);
1072 }
1073
encode(&self, e: &mut SM50Encoder<'_>)1074 fn encode(&self, e: &mut SM50Encoder<'_>) {
1075 match &self.range.src_ref {
1076 SrcRef::Zero | SrcRef::Reg(_) => {
1077 e.set_opcode(0x5c00);
1078 e.set_reg_src(20..28, self.range);
1079 }
1080 SrcRef::Imm32(imm32) => {
1081 e.set_opcode(0x3800);
1082 // Only the bottom 16 bits of the immediate matter
1083 e.set_src_imm_i20(20..39, 56, *imm32 & 0xffff);
1084 }
1085 SrcRef::CBuf(cbuf) => {
1086 e.set_opcode(0x4c00);
1087 e.set_src_cb(20..39, cbuf);
1088 }
1089 src => panic!("Invalid bfe range: {src}"),
1090 }
1091
1092 if self.signed {
1093 e.set_bit(48, true);
1094 }
1095
1096 if self.reverse {
1097 e.set_bit(40, true);
1098 }
1099
1100 e.set_reg_src(8..16, self.base);
1101 e.set_dst(self.dst);
1102 }
1103 }
1104
1105 impl SM50Op for OpFlo {
legalize(&mut self, b: &mut LegalizeBuilder)1106 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1107 use RegFile::GPR;
1108 b.copy_alu_src_if_i20_overflow(&mut self.src, GPR, SrcType::ALU);
1109 }
1110
encode(&self, e: &mut SM50Encoder<'_>)1111 fn encode(&self, e: &mut SM50Encoder<'_>) {
1112 match &self.src.src_ref {
1113 SrcRef::Zero | SrcRef::Reg(_) => {
1114 e.set_opcode(0x5c30);
1115 e.set_reg_src_ref(20..28, self.src.src_ref);
1116 }
1117 SrcRef::Imm32(imm32) => {
1118 e.set_opcode(0x3830);
1119 e.set_src_imm_i20(20..39, 56, *imm32);
1120 assert!(self.src.src_mod.is_none());
1121 }
1122 SrcRef::CBuf(cb) => {
1123 e.set_opcode(0x4c30);
1124 e.set_src_cb(20..39, cb);
1125 }
1126 src => panic!("Invalid flo src: {src}"),
1127 }
1128
1129 e.set_dst(self.dst);
1130 e.set_bit(40, self.src.src_mod.is_bnot());
1131 e.set_bit(48, self.signed);
1132 e.set_bit(41, self.return_shift_amount);
1133 e.set_bit(47, false); /* dst.CC */
1134 }
1135 }
1136
1137 impl SM50Op for OpIAdd2 {
legalize(&mut self, b: &mut LegalizeBuilder)1138 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1139 use RegFile::GPR;
1140 let [src0, src1] = &mut self.srcs;
1141 swap_srcs_if_not_reg(src0, src1, GPR);
1142 if src0.src_mod.is_ineg() && src1.src_mod.is_ineg() {
1143 assert!(self.carry_out.is_none());
1144 let val = b.alloc_ssa(GPR, 1);
1145 b.push_op(OpIAdd2 {
1146 dst: val.into(),
1147 carry_out: Dst::None,
1148 srcs: [Src::new_zero(), *src0],
1149 });
1150 *src0 = val.into();
1151 }
1152 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::I32);
1153 if !self.carry_out.is_none() {
1154 b.copy_alu_src_if_ineg_imm(src1, GPR, SrcType::I32);
1155 }
1156 }
1157
encode(&self, e: &mut SM50Encoder<'_>)1158 fn encode(&self, e: &mut SM50Encoder<'_>) {
1159 // Hardware requires at least one of these be unmodified. Otherwise, it
1160 // encodes as iadd.po which isn't what we want.
1161 assert!(
1162 self.srcs[0].src_mod.is_none() || self.srcs[1].src_mod.is_none()
1163 );
1164
1165 let carry_out = match self.carry_out {
1166 Dst::Reg(reg) if reg.file() == RegFile::Carry => true,
1167 Dst::None => false,
1168 dst => panic!("Invalid iadd carry_out: {dst}"),
1169 };
1170
1171 if let Some(imm32) = self.srcs[1].as_imm_not_i20() {
1172 e.set_opcode(0x1c00);
1173
1174 e.set_dst(self.dst);
1175 e.set_reg_ineg_src(8..16, 56, self.srcs[0]);
1176 e.set_src_imm32(20..52, imm32);
1177
1178 e.set_bit(52, carry_out);
1179 e.set_bit(53, false); // .X
1180 } else {
1181 match &self.srcs[1].src_ref {
1182 SrcRef::Zero | SrcRef::Reg(_) => {
1183 e.set_opcode(0x5c10);
1184 e.set_reg_ineg_src(20..28, 48, self.srcs[1]);
1185 }
1186 SrcRef::Imm32(imm32) => {
1187 e.set_opcode(0x3810);
1188 e.set_src_imm_i20(20..39, 56, *imm32);
1189 assert!(self.srcs[1].src_mod.is_none());
1190 }
1191 SrcRef::CBuf(_) => {
1192 e.set_opcode(0x4c10);
1193 e.set_cb_ineg_src(20..39, 48, self.srcs[1]);
1194 }
1195 src => panic!("Invalid iadd src1: {src}"),
1196 }
1197
1198 e.set_dst(self.dst);
1199 e.set_reg_ineg_src(8..16, 49, self.srcs[0]);
1200
1201 e.set_bit(43, false); // .X
1202 e.set_bit(47, carry_out);
1203 }
1204 }
1205 }
1206
1207 impl SM50Op for OpIAdd2X {
legalize(&mut self, b: &mut LegalizeBuilder)1208 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1209 use RegFile::GPR;
1210 let [src0, src1] = &mut self.srcs;
1211 swap_srcs_if_not_reg(src0, src1, GPR);
1212 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::I32);
1213 }
1214
encode(&self, e: &mut SM50Encoder<'_>)1215 fn encode(&self, e: &mut SM50Encoder<'_>) {
1216 match self.carry_in.src_ref {
1217 SrcRef::Reg(reg) if reg.file() == RegFile::Carry => (),
1218 src => panic!("Invalid iadd.x carry_in: {src}"),
1219 }
1220
1221 let carry_out = match self.carry_out {
1222 Dst::Reg(reg) if reg.file() == RegFile::Carry => true,
1223 Dst::None => false,
1224 dst => panic!("Invalid iadd.x carry_out: {dst}"),
1225 };
1226
1227 if let Some(imm32) = self.srcs[1].as_imm_not_i20() {
1228 e.set_opcode(0x1c00);
1229
1230 e.set_dst(self.dst);
1231 e.set_reg_bnot_src(8..16, 56, self.srcs[0]);
1232 e.set_src_imm32(20..52, imm32);
1233
1234 e.set_bit(52, carry_out);
1235 e.set_bit(53, true); // .X
1236 } else {
1237 match &self.srcs[1].src_ref {
1238 SrcRef::Zero | SrcRef::Reg(_) => {
1239 e.set_opcode(0x5c10);
1240 e.set_reg_bnot_src(20..28, 48, self.srcs[1]);
1241 }
1242 SrcRef::Imm32(imm32) => {
1243 e.set_opcode(0x3810);
1244 e.set_src_imm_i20(20..39, 56, *imm32);
1245 assert!(self.srcs[1].src_mod.is_none());
1246 }
1247 SrcRef::CBuf(_) => {
1248 e.set_opcode(0x4c10);
1249 e.set_cb_bnot_src(20..39, 48, self.srcs[1]);
1250 }
1251 src => panic!("Invalid iadd.x src1: {src}"),
1252 }
1253
1254 e.set_dst(self.dst);
1255 e.set_reg_bnot_src(8..16, 49, self.srcs[0]);
1256
1257 e.set_bit(43, true); // .X
1258 e.set_bit(47, carry_out);
1259 }
1260 }
1261 }
1262
1263 impl SM50Op for OpIMad {
legalize(&mut self, b: &mut LegalizeBuilder)1264 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1265 use RegFile::GPR;
1266 let [src0, src1, src2] = &mut self.srcs;
1267 swap_srcs_if_not_reg(src0, src1, GPR);
1268 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU);
1269 b.copy_alu_src_if_i20_overflow(src1, GPR, SrcType::ALU);
1270 if src_is_reg(src1, GPR) {
1271 b.copy_alu_src_if_imm(src2, GPR, SrcType::ALU);
1272 } else {
1273 b.copy_alu_src_if_not_reg(src2, GPR, SrcType::ALU);
1274 }
1275 }
1276
encode(&self, e: &mut SM50Encoder<'_>)1277 fn encode(&self, e: &mut SM50Encoder<'_>) {
1278 // There is one ineg bit shared by the two imul sources
1279 let ineg_imul =
1280 self.srcs[0].src_mod.is_ineg() ^ self.srcs[1].src_mod.is_ineg();
1281 let ineg_src2 = self.srcs[2].src_mod.is_ineg();
1282
1283 match &self.srcs[2].src_ref {
1284 SrcRef::Zero | SrcRef::Reg(_) => {
1285 match &self.srcs[1].src_ref {
1286 SrcRef::Zero | SrcRef::Reg(_) => {
1287 e.set_opcode(0x5a00);
1288 e.set_reg_src_ref(20..28, self.srcs[1].src_ref);
1289 }
1290 SrcRef::Imm32(imm32) => {
1291 e.set_opcode(0x3400);
1292 e.set_src_imm_i20(20..39, 56, *imm32);
1293 }
1294 SrcRef::CBuf(cb) => {
1295 e.set_opcode(0x4a00);
1296 e.set_src_cb(20..39, cb);
1297 }
1298 src => panic!("Invalid imad src1: {src}"),
1299 }
1300
1301 e.set_reg_src_ref(39..47, self.srcs[2].src_ref);
1302 }
1303 SrcRef::CBuf(cb) => {
1304 e.set_opcode(0x5200);
1305 e.set_src_cb(20..39, cb);
1306 e.set_reg_src_ref(39..47, self.srcs[1].src_ref);
1307 }
1308 src => panic!("Invalid imad src2: {src}"),
1309 }
1310
1311 e.set_dst(self.dst);
1312 e.set_reg_src(8..16, self.srcs[0]);
1313
1314 e.set_bit(48, self.signed); // src0 signed
1315 e.set_bit(51, ineg_imul);
1316 e.set_bit(52, ineg_src2);
1317 e.set_bit(53, self.signed); // src1 signed
1318 }
1319 }
1320
1321 impl SM50Op for OpIMul {
legalize(&mut self, b: &mut LegalizeBuilder)1322 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1323 use RegFile::GPR;
1324 let [src0, src1] = &mut self.srcs;
1325 if swap_srcs_if_not_reg(src0, src1, GPR) {
1326 self.signed.swap(0, 1);
1327 }
1328 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU);
1329 }
1330
encode(&self, e: &mut SM50Encoder<'_>)1331 fn encode(&self, e: &mut SM50Encoder<'_>) {
1332 assert!(self.srcs[0].src_mod.is_none());
1333 assert!(self.srcs[1].src_mod.is_none());
1334
1335 if let Some(i) = self.srcs[1].as_imm_not_i20() {
1336 e.set_opcode(0x1fc0);
1337 e.set_src_imm32(20..52, i);
1338
1339 e.set_bit(53, self.high);
1340 e.set_bit(54, self.signed[0]);
1341 e.set_bit(55, self.signed[1]);
1342 } else {
1343 match &self.srcs[1].src_ref {
1344 SrcRef::Zero | SrcRef::Reg(_) => {
1345 e.set_opcode(0x5c38);
1346 e.set_reg_src(20..28, self.srcs[1]);
1347 }
1348 SrcRef::Imm32(imm32) => {
1349 e.set_opcode(0x3838);
1350 e.set_src_imm_i20(20..39, 56, *imm32);
1351 }
1352 SrcRef::CBuf(cb) => {
1353 e.set_opcode(0x4c38);
1354 e.set_src_cb(20..39, cb);
1355 }
1356 src => panic!("Invalid imul src1: {src}"),
1357 };
1358
1359 e.set_bit(39, self.high);
1360 e.set_bit(40, self.signed[0]);
1361 e.set_bit(41, self.signed[1]);
1362 }
1363
1364 e.set_dst(self.dst);
1365 e.set_reg_src(8..16, self.srcs[0]);
1366 }
1367 }
1368
1369 impl SM50Op for OpIMnMx {
legalize(&mut self, b: &mut LegalizeBuilder)1370 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1371 use RegFile::GPR;
1372 let [src0, src1] = &mut self.srcs;
1373 swap_srcs_if_not_reg(src0, src1, GPR);
1374 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU);
1375 b.copy_alu_src_if_i20_overflow(src1, GPR, SrcType::ALU);
1376 }
1377
encode(&self, e: &mut SM50Encoder<'_>)1378 fn encode(&self, e: &mut SM50Encoder<'_>) {
1379 match &self.srcs[1].src_ref {
1380 SrcRef::Zero | SrcRef::Reg(_) => {
1381 e.set_opcode(0x5c20);
1382 e.set_reg_src(20..28, self.srcs[1]);
1383 }
1384 SrcRef::Imm32(imm32) => {
1385 e.set_opcode(0x3820);
1386 e.set_src_imm_i20(20..39, 56, *imm32);
1387 assert!(self.srcs[1].src_mod.is_none());
1388 }
1389 SrcRef::CBuf(cb) => {
1390 e.set_opcode(0x4c20);
1391 e.set_src_cb(20..39, cb);
1392 }
1393 src => panic!("Invalid imnmx src1: {src}"),
1394 }
1395
1396 e.set_dst(self.dst);
1397 e.set_reg_src(8..16, self.srcs[0]);
1398 e.set_pred_src(39..42, 42, self.min);
1399 e.set_bit(47, false); // .CC
1400 e.set_bit(
1401 48,
1402 match self.cmp_type {
1403 IntCmpType::U32 => false,
1404 IntCmpType::I32 => true,
1405 },
1406 );
1407 }
1408 }
1409
1410 impl SM50Op for OpISetP {
legalize(&mut self, b: &mut LegalizeBuilder)1411 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1412 use RegFile::GPR;
1413 let [src0, src1] = &mut self.srcs;
1414 if swap_srcs_if_not_reg(src0, src1, GPR) {
1415 self.cmp_op = self.cmp_op.flip();
1416 }
1417 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU);
1418 b.copy_alu_src_if_i20_overflow(src1, GPR, SrcType::ALU);
1419 }
1420
encode(&self, e: &mut SM50Encoder<'_>)1421 fn encode(&self, e: &mut SM50Encoder<'_>) {
1422 match &self.srcs[1].src_ref {
1423 SrcRef::Zero | SrcRef::Reg(_) => {
1424 e.set_opcode(0x5b60);
1425 e.set_reg_src(20..28, self.srcs[1]);
1426 }
1427 SrcRef::Imm32(imm32) => {
1428 e.set_opcode(0x3660);
1429 e.set_src_imm_i20(20..39, 56, *imm32);
1430 assert!(self.srcs[1].src_mod.is_none());
1431 }
1432 SrcRef::CBuf(cb) => {
1433 e.set_opcode(0x4b60);
1434 e.set_src_cb(20..39, cb);
1435 }
1436 src => panic!("Invalid isetp src1: {src}"),
1437 }
1438
1439 e.set_pred_dst(0..3, Dst::None); // dst1
1440 e.set_pred_dst(3..6, self.dst);
1441 e.set_reg_src(8..16, self.srcs[0]);
1442 e.set_pred_src(39..42, 42, self.accum);
1443
1444 // isetp.x seems to take the accumulator into account and we don't fully
1445 // understand how. Until we do, disallow it.
1446 assert!(!self.ex);
1447 e.set_bit(43, self.ex);
1448 e.set_pred_set_op(45..47, self.set_op);
1449
1450 e.set_field(
1451 48..49,
1452 match self.cmp_type {
1453 IntCmpType::U32 => 0_u32,
1454 IntCmpType::I32 => 1_u32,
1455 },
1456 );
1457 e.set_int_cmp_op(49..52, self.cmp_op);
1458 }
1459 }
1460
1461 impl SM50Op for OpLop2 {
legalize(&mut self, b: &mut LegalizeBuilder)1462 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1463 use RegFile::GPR;
1464 let [src0, src1] = &mut self.srcs;
1465 match self.op {
1466 LogicOp2::PassB => {
1467 *src0 = 0.into();
1468 b.copy_alu_src_if_i20_overflow(src1, GPR, SrcType::ALU);
1469 }
1470 LogicOp2::And | LogicOp2::Or | LogicOp2::Xor => {
1471 swap_srcs_if_not_reg(src0, src1, GPR);
1472 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU);
1473 }
1474 }
1475 }
1476
encode(&self, e: &mut SM50Encoder<'_>)1477 fn encode(&self, e: &mut SM50Encoder<'_>) {
1478 if let Some(imm32) = self.srcs[1].as_imm_not_i20() {
1479 e.set_opcode(0x0400);
1480
1481 e.set_dst(self.dst);
1482 e.set_reg_bnot_src(8..16, 55, self.srcs[0]);
1483 e.set_src_imm32(20..52, imm32);
1484 e.set_field(
1485 53..55,
1486 match self.op {
1487 LogicOp2::And => 0_u8,
1488 LogicOp2::Or => 1_u8,
1489 LogicOp2::Xor => 2_u8,
1490 LogicOp2::PassB => {
1491 panic!("PASS_B is not supported for LOP32I");
1492 }
1493 },
1494 );
1495 e.set_bit(56, self.srcs[1].src_mod.is_bnot());
1496 } else {
1497 match &self.srcs[1].src_ref {
1498 SrcRef::Zero | SrcRef::Reg(_) => {
1499 e.set_opcode(0x5c40);
1500 e.set_reg_bnot_src(20..28, 40, self.srcs[1]);
1501 }
1502 SrcRef::Imm32(imm32) => {
1503 e.set_opcode(0x3840);
1504 e.set_src_imm_i20(20..39, 56, *imm32);
1505 assert!(self.srcs[1].src_mod.is_none());
1506 }
1507 SrcRef::CBuf(_) => {
1508 e.set_opcode(0x4c40);
1509 e.set_cb_bnot_src(20..39, 40, self.srcs[1]);
1510 }
1511 src => panic!("Invalid lop2 src1: {src}"),
1512 }
1513
1514 e.set_dst(self.dst);
1515 e.set_reg_bnot_src(8..16, 39, self.srcs[0]);
1516
1517 e.set_field(
1518 41..43,
1519 match self.op {
1520 LogicOp2::And => 0_u8,
1521 LogicOp2::Or => 1_u8,
1522 LogicOp2::Xor => 2_u8,
1523 LogicOp2::PassB => 3_u8,
1524 },
1525 );
1526
1527 e.set_pred_dst(48..51, Dst::None);
1528 }
1529 }
1530 }
1531
1532 impl SM50Op for OpPopC {
legalize(&mut self, b: &mut LegalizeBuilder)1533 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1534 use RegFile::GPR;
1535 b.copy_alu_src_if_i20_overflow(&mut self.src, GPR, SrcType::ALU);
1536 }
1537
encode(&self, e: &mut SM50Encoder<'_>)1538 fn encode(&self, e: &mut SM50Encoder<'_>) {
1539 match &self.src.src_ref {
1540 SrcRef::Zero | SrcRef::Reg(_) => {
1541 e.set_opcode(0x5c08);
1542 e.set_reg_bnot_src(20..28, 40, self.src);
1543 }
1544 SrcRef::Imm32(imm32) => {
1545 e.set_opcode(0x3808);
1546 e.set_src_imm_i20(20..39, 56, *imm32);
1547 e.set_bit(40, self.src.src_mod.is_bnot());
1548 }
1549 SrcRef::CBuf(_) => {
1550 e.set_opcode(0x4c08);
1551 e.set_cb_bnot_src(20..39, 40, self.src);
1552 }
1553 src => panic!("Invalid popc src1: {src}"),
1554 }
1555
1556 e.set_dst(self.dst);
1557 }
1558 }
1559
1560 impl SM50Op for OpShf {
legalize(&mut self, b: &mut LegalizeBuilder)1561 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1562 use RegFile::GPR;
1563 b.copy_alu_src_if_not_reg(&mut self.high, GPR, SrcType::ALU);
1564 b.copy_alu_src_if_not_reg(&mut self.low, GPR, SrcType::GPR);
1565 b.copy_alu_src_if_not_reg_or_imm(&mut self.shift, GPR, SrcType::GPR);
1566 b.copy_alu_src_if_i20_overflow(&mut self.shift, GPR, SrcType::GPR);
1567 }
1568
encode(&self, e: &mut SM50Encoder<'_>)1569 fn encode(&self, e: &mut SM50Encoder<'_>) {
1570 match &self.shift.src_ref {
1571 SrcRef::Zero | SrcRef::Reg(_) => {
1572 e.set_opcode(if self.right { 0x5cf8 } else { 0x5bf8 });
1573 e.set_reg_src(20..28, self.shift);
1574 }
1575 SrcRef::Imm32(imm32) => {
1576 e.set_opcode(if self.right { 0x38f8 } else { 0x36f8 });
1577 e.set_src_imm_i20(20..39, 56, *imm32);
1578 assert!(self.shift.src_mod.is_none());
1579 }
1580 src => panic!("Invalid shf shift: {src}"),
1581 }
1582
1583 e.set_field(
1584 37..39,
1585 match self.data_type {
1586 IntType::I32 => 0_u8,
1587 IntType::U32 => 0_u8,
1588 IntType::U64 => 2_u8,
1589 IntType::I64 => 3_u8,
1590 _ => panic!("Invalid shift data type"),
1591 },
1592 );
1593
1594 e.set_dst(self.dst);
1595 e.set_reg_src(8..16, self.low);
1596 e.set_reg_src(39..47, self.high);
1597
1598 e.set_bit(47, false); // .CC
1599
1600 // If we're shifting left, the HW will throw an illegal instrucction
1601 // encoding error if we set .high and will give us the high part anyway
1602 // if we don't. This makes everything a bit more consistent.
1603 assert!(self.right || self.dst_high);
1604 e.set_bit(48, self.dst_high && self.right); // .high
1605
1606 e.set_bit(49, false); // .X
1607 e.set_bit(50, self.wrap);
1608 }
1609 }
1610
1611 impl SM50Op for OpShl {
legalize(&mut self, b: &mut LegalizeBuilder)1612 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1613 use RegFile::GPR;
1614 b.copy_alu_src_if_not_reg(&mut self.src, GPR, SrcType::GPR);
1615 b.copy_alu_src_if_i20_overflow(&mut self.shift, GPR, SrcType::ALU);
1616 }
1617
encode(&self, e: &mut SM50Encoder<'_>)1618 fn encode(&self, e: &mut SM50Encoder<'_>) {
1619 e.set_dst(self.dst);
1620 e.set_reg_src(8..16, self.src);
1621 match &self.shift.src_ref {
1622 SrcRef::Zero | SrcRef::Reg(_) => {
1623 e.set_opcode(0x5c48);
1624 e.set_reg_src(20..28, self.shift);
1625 }
1626 SrcRef::Imm32(imm32) => {
1627 e.set_opcode(0x3848);
1628 e.set_src_imm_i20(20..39, 56, *imm32);
1629 }
1630 SrcRef::CBuf(cb) => {
1631 e.set_opcode(0x4c48);
1632 e.set_src_cb(20..39, cb);
1633 }
1634 src => panic!("Invalid shl shift: {src}"),
1635 }
1636
1637 e.set_bit(39, self.wrap);
1638 }
1639 }
1640
1641 impl SM50Op for OpShr {
legalize(&mut self, b: &mut LegalizeBuilder)1642 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1643 use RegFile::GPR;
1644 b.copy_alu_src_if_not_reg(&mut self.src, GPR, SrcType::GPR);
1645 b.copy_alu_src_if_i20_overflow(&mut self.shift, GPR, SrcType::ALU);
1646 }
1647
encode(&self, e: &mut SM50Encoder<'_>)1648 fn encode(&self, e: &mut SM50Encoder<'_>) {
1649 e.set_dst(self.dst);
1650 e.set_reg_src(8..16, self.src);
1651 match &self.shift.src_ref {
1652 SrcRef::Zero | SrcRef::Reg(_) => {
1653 e.set_opcode(0x5c28);
1654 e.set_reg_src(20..28, self.shift);
1655 }
1656 SrcRef::Imm32(imm32) => {
1657 e.set_opcode(0x3828);
1658 e.set_src_imm_i20(20..39, 56, *imm32);
1659 }
1660 SrcRef::CBuf(cb) => {
1661 e.set_opcode(0x4c28);
1662 e.set_src_cb(20..39, cb);
1663 }
1664 src => panic!("Invalid shr shift: {src}"),
1665 }
1666
1667 e.set_bit(39, self.wrap);
1668 e.set_bit(48, self.signed);
1669 }
1670 }
1671
1672 impl SM50Op for OpF2F {
legalize(&mut self, b: &mut LegalizeBuilder)1673 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1674 use RegFile::GPR;
1675 b.copy_alu_src_if_f20_overflow(&mut self.src, GPR, SrcType::ALU);
1676 }
1677
encode(&self, e: &mut SM50Encoder<'_>)1678 fn encode(&self, e: &mut SM50Encoder<'_>) {
1679 match &self.src.src_ref {
1680 SrcRef::Zero | SrcRef::Reg(_) => {
1681 e.set_opcode(0x5ca8);
1682 e.set_reg_fmod_src(20..28, 49, 45, self.src);
1683 }
1684 SrcRef::Imm32(imm32) => {
1685 e.set_opcode(0x38a8);
1686 e.set_src_imm_i20(20..39, 56, *imm32);
1687 assert!(self.src.src_mod.is_none());
1688 }
1689 SrcRef::CBuf(_) => {
1690 e.set_opcode(0x4ca8);
1691 e.set_cb_fmod_src(20..39, 49, 45, self.src);
1692 }
1693 src => panic!("Invalid f2f src: {src}"),
1694 }
1695
1696 // We can't span 32 bits
1697 assert!(
1698 (self.dst_type.bits() <= 32 && self.src_type.bits() <= 32)
1699 || (self.dst_type.bits() >= 32 && self.src_type.bits() >= 32)
1700 );
1701 e.set_field(8..10, (self.dst_type.bits() / 8).ilog2());
1702 e.set_field(10..12, (self.src_type.bits() / 8).ilog2());
1703
1704 e.set_rnd_mode(39..41, self.rnd_mode);
1705 e.set_bit(41, self.high);
1706 e.set_bit(42, self.integer_rnd);
1707 e.set_bit(44, self.ftz);
1708 e.set_bit(50, false); // saturate
1709
1710 e.set_dst(self.dst);
1711 }
1712 }
1713
1714 impl SM50Op for OpF2I {
legalize(&mut self, b: &mut LegalizeBuilder)1715 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1716 use RegFile::GPR;
1717 b.copy_alu_src_if_f20_overflow(&mut self.src, GPR, SrcType::ALU);
1718 }
1719
encode(&self, e: &mut SM50Encoder<'_>)1720 fn encode(&self, e: &mut SM50Encoder<'_>) {
1721 match &self.src.src_ref {
1722 SrcRef::Zero | SrcRef::Reg(_) => {
1723 e.set_opcode(0x5cb0);
1724 e.set_reg_fmod_src(20..28, 49, 45, self.src);
1725 }
1726 SrcRef::Imm32(imm32) => {
1727 e.set_opcode(0x38b0);
1728 e.set_src_imm_f20(20..39, 56, *imm32);
1729 assert!(self.src.src_mod.is_none());
1730 }
1731 SrcRef::CBuf(_) => {
1732 e.set_opcode(0x4cb0);
1733 e.set_cb_fmod_src(20..39, 49, 45, self.src);
1734 }
1735 src => panic!("Invalid f2i src: {src}"),
1736 }
1737
1738 e.set_dst(self.dst);
1739
1740 // We can't span 32 bits
1741 assert!(
1742 (self.dst_type.bits() <= 32 && self.src_type.bits() <= 32)
1743 || (self.dst_type.bits() >= 32 && self.src_type.bits() >= 32)
1744 );
1745 e.set_field(8..10, (self.dst_type.bits() / 8).ilog2());
1746 e.set_field(10..12, (self.src_type.bits() / 8).ilog2());
1747 e.set_bit(12, self.dst_type.is_signed());
1748
1749 e.set_rnd_mode(39..41, self.rnd_mode);
1750 e.set_bit(44, self.ftz);
1751 e.set_bit(47, false); // .CC
1752 }
1753 }
1754
1755 impl SM50Op for OpI2F {
legalize(&mut self, b: &mut LegalizeBuilder)1756 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1757 use RegFile::GPR;
1758 b.copy_alu_src_if_i20_overflow(&mut self.src, GPR, SrcType::ALU);
1759 }
1760
encode(&self, e: &mut SM50Encoder<'_>)1761 fn encode(&self, e: &mut SM50Encoder<'_>) {
1762 match &self.src.src_ref {
1763 SrcRef::Zero | SrcRef::Reg(_) => {
1764 e.set_opcode(0x5cb8);
1765 e.set_reg_ineg_src(20..28, 45, self.src);
1766 }
1767 SrcRef::Imm32(imm32) => {
1768 e.set_opcode(0x38b8);
1769 e.set_src_imm_i20(20..39, 56, *imm32);
1770 assert!(self.src.src_mod.is_none());
1771 }
1772 SrcRef::CBuf(_) => {
1773 e.set_opcode(0x4cb8);
1774 e.set_cb_ineg_src(20..39, 45, self.src);
1775 }
1776 src => panic!("Invalid i2f src: {src}"),
1777 }
1778
1779 e.set_dst(self.dst);
1780
1781 // We can't span 32 bits
1782 assert!(
1783 (self.dst_type.bits() <= 32 && self.src_type.bits() <= 32)
1784 || (self.dst_type.bits() >= 32 && self.src_type.bits() >= 32)
1785 );
1786 e.set_field(8..10, (self.dst_type.bits() / 8).ilog2());
1787 e.set_field(10..12, (self.src_type.bits() / 8).ilog2());
1788 e.set_bit(13, self.src_type.is_signed());
1789
1790 e.set_rnd_mode(39..41, self.rnd_mode);
1791 e.set_field(41..43, 0_u8); // TODO: subop
1792 e.set_bit(49, false); // iabs
1793 }
1794 }
1795
1796 impl SM50Op for OpI2I {
legalize(&mut self, b: &mut LegalizeBuilder)1797 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1798 use RegFile::GPR;
1799 b.copy_alu_src_if_i20_overflow(&mut self.src, GPR, SrcType::ALU);
1800 }
1801
encode(&self, e: &mut SM50Encoder<'_>)1802 fn encode(&self, e: &mut SM50Encoder<'_>) {
1803 match &self.src.src_ref {
1804 SrcRef::Zero | SrcRef::Reg(_) => {
1805 e.set_opcode(0x5ce0);
1806 e.set_reg_src(20..28, self.src);
1807 }
1808 SrcRef::Imm32(imm32) => {
1809 e.set_opcode(0x38e0);
1810 e.set_src_imm_i20(20..39, 56, *imm32);
1811 }
1812 SrcRef::CBuf(cbuf) => {
1813 e.set_opcode(0x4ce0);
1814 e.set_src_cb(20..39, cbuf);
1815 }
1816 src => panic!("Invalid i2i src: {src}"),
1817 }
1818
1819 e.set_dst(self.dst);
1820
1821 // We can't span 32 bits
1822 assert!(
1823 (self.dst_type.bits() <= 32 && self.src_type.bits() <= 32)
1824 || (self.dst_type.bits() >= 32 && self.src_type.bits() >= 32)
1825 );
1826 e.set_field(8..10, (self.dst_type.bits() / 8).ilog2());
1827 e.set_field(10..12, (self.src_type.bits() / 8).ilog2());
1828 e.set_bit(12, self.dst_type.is_signed());
1829 e.set_bit(13, self.src_type.is_signed());
1830
1831 e.set_field(41..43, 0u8); // src.B1-3
1832 e.set_bit(45, self.neg);
1833 e.set_bit(47, false); // dst.CC
1834 e.set_bit(49, self.abs);
1835 e.set_bit(50, self.saturate);
1836 }
1837 }
1838
1839 impl SM50Op for OpMov {
legalize(&mut self, _b: &mut LegalizeBuilder)1840 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
1841 // Nothing to do
1842 }
1843
encode(&self, e: &mut SM50Encoder<'_>)1844 fn encode(&self, e: &mut SM50Encoder<'_>) {
1845 match &self.src.src_ref {
1846 SrcRef::Zero | SrcRef::Reg(_) => {
1847 e.set_opcode(0x5c98);
1848 e.set_reg_src(20..28, self.src);
1849 e.set_field(39..43, self.quad_lanes);
1850 }
1851 SrcRef::Imm32(imm32) => {
1852 e.set_opcode(0x0100);
1853 e.set_src_imm32(20..52, *imm32);
1854 e.set_field(12..16, self.quad_lanes);
1855 }
1856 SrcRef::CBuf(cb) => {
1857 e.set_opcode(0x4c98);
1858 e.set_src_cb(20..39, cb);
1859 e.set_field(39..43, self.quad_lanes);
1860 }
1861 src => panic!("Invalid mov src: {src}"),
1862 }
1863
1864 e.set_dst(self.dst);
1865 }
1866 }
1867
1868 impl SM50Op for OpPrmt {
legalize(&mut self, b: &mut LegalizeBuilder)1869 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1870 use RegFile::GPR;
1871 b.copy_alu_src_if_not_reg(&mut self.srcs[0], GPR, SrcType::GPR);
1872 b.copy_alu_src_if_not_reg(&mut self.srcs[1], GPR, SrcType::GPR);
1873 }
1874
encode(&self, e: &mut SM50Encoder<'_>)1875 fn encode(&self, e: &mut SM50Encoder<'_>) {
1876 match &self.sel.src_ref {
1877 SrcRef::Zero | SrcRef::Reg(_) => {
1878 e.set_opcode(0x5bc0);
1879 e.set_reg_src(20..28, self.sel);
1880 }
1881 SrcRef::Imm32(imm32) => {
1882 e.set_opcode(0x36c0);
1883 // Only the bottom 16 bits matter
1884 e.set_src_imm_i20(20..39, 56, *imm32 & 0xffff);
1885 }
1886 SrcRef::CBuf(cb) => {
1887 e.set_opcode(0x4bc0);
1888 e.set_src_cb(20..39, cb);
1889 }
1890 src => panic!("Invalid prmt selector: {src}"),
1891 }
1892
1893 e.set_dst(self.dst);
1894 e.set_reg_src(8..16, self.srcs[0]);
1895 e.set_reg_src(39..47, self.srcs[1]);
1896 e.set_field(
1897 48..51,
1898 match self.mode {
1899 PrmtMode::Index => 0_u8,
1900 PrmtMode::Forward4Extract => 1_u8,
1901 PrmtMode::Backward4Extract => 2_u8,
1902 PrmtMode::Replicate8 => 3_u8,
1903 PrmtMode::EdgeClampLeft => 4_u8,
1904 PrmtMode::EdgeClampRight => 5_u8,
1905 PrmtMode::Replicate16 => 6_u8,
1906 },
1907 );
1908 }
1909 }
1910
1911 impl SM50Op for OpSel {
legalize(&mut self, b: &mut LegalizeBuilder)1912 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1913 use RegFile::GPR;
1914 let [src0, src1] = &mut self.srcs;
1915 if swap_srcs_if_not_reg(src0, src1, GPR) {
1916 self.cond = self.cond.bnot();
1917 }
1918 b.copy_alu_src_if_not_reg(src0, GPR, SrcType::ALU);
1919 b.copy_alu_src_if_i20_overflow(src1, GPR, SrcType::ALU);
1920 }
1921
encode(&self, e: &mut SM50Encoder<'_>)1922 fn encode(&self, e: &mut SM50Encoder<'_>) {
1923 match &self.srcs[1].src_ref {
1924 SrcRef::Zero | SrcRef::Reg(_) => {
1925 e.set_opcode(0x5ca0);
1926 e.set_reg_src_ref(20..28, self.srcs[1].src_ref);
1927 }
1928 SrcRef::Imm32(imm32) => {
1929 e.set_opcode(0x38a0);
1930 e.set_src_imm_i20(20..39, 56, *imm32);
1931 }
1932 SrcRef::CBuf(cbuf) => {
1933 e.set_opcode(0x4ca0);
1934 e.set_src_cb(20..39, cbuf);
1935 }
1936 src => panic!("Invalid sel src1: {src}"),
1937 }
1938
1939 e.set_dst(self.dst);
1940 e.set_reg_src(8..16, self.srcs[0]);
1941 e.set_pred_src(39..42, 42, self.cond);
1942 }
1943 }
1944
1945 impl SM50Op for OpShfl {
legalize(&mut self, b: &mut LegalizeBuilder)1946 fn legalize(&mut self, b: &mut LegalizeBuilder) {
1947 use RegFile::GPR;
1948 b.copy_alu_src_if_not_reg(&mut self.src, GPR, SrcType::GPR);
1949 b.copy_alu_src_if_not_reg_or_imm(&mut self.lane, GPR, SrcType::ALU);
1950 b.copy_alu_src_if_not_reg_or_imm(&mut self.c, GPR, SrcType::ALU);
1951 }
1952
encode(&self, e: &mut SM50Encoder<'_>)1953 fn encode(&self, e: &mut SM50Encoder<'_>) {
1954 e.set_opcode(0xef10);
1955
1956 e.set_dst(self.dst);
1957 e.set_pred_dst(48..51, self.in_bounds);
1958 e.set_reg_src(8..16, self.src);
1959
1960 match &self.lane.src_ref {
1961 SrcRef::Zero | SrcRef::Reg(_) => {
1962 e.set_bit(28, false);
1963 e.set_reg_src(20..28, self.lane);
1964 }
1965 SrcRef::Imm32(imm32) => {
1966 e.set_bit(28, true);
1967 e.set_field(20..25, *imm32 & 0x1f);
1968 }
1969 src => panic!("Invalid shfl lane: {src}"),
1970 }
1971 match &self.c.src_ref {
1972 SrcRef::Zero | SrcRef::Reg(_) => {
1973 e.set_bit(29, false);
1974 e.set_reg_src(39..47, self.c);
1975 }
1976 SrcRef::Imm32(imm32) => {
1977 e.set_bit(29, true);
1978 e.set_field(34..47, *imm32 & 0x1f1f);
1979 }
1980 src => panic!("Invalid shfl c: {src}"),
1981 }
1982
1983 e.set_field(
1984 30..32,
1985 match self.op {
1986 ShflOp::Idx => 0u8,
1987 ShflOp::Up => 1u8,
1988 ShflOp::Down => 2u8,
1989 ShflOp::Bfly => 3u8,
1990 },
1991 );
1992 }
1993 }
1994
1995 impl SM50Op for OpPSetP {
legalize(&mut self, _b: &mut LegalizeBuilder)1996 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
1997 // Nothing to do
1998 }
1999
encode(&self, e: &mut SM50Encoder<'_>)2000 fn encode(&self, e: &mut SM50Encoder<'_>) {
2001 e.set_opcode(0x5090);
2002
2003 e.set_pred_dst(3..6, self.dsts[0]);
2004 e.set_pred_dst(0..3, self.dsts[1]);
2005
2006 e.set_pred_src(12..15, 15, self.srcs[0]);
2007 e.set_pred_src(29..32, 32, self.srcs[1]);
2008 e.set_pred_src(39..42, 42, self.srcs[2]);
2009
2010 e.set_pred_set_op(24..26, self.ops[0]);
2011 e.set_pred_set_op(45..47, self.ops[1]);
2012 }
2013 }
2014
2015 impl SM50Encoder<'_> {
set_tex_dim(&mut self, range: Range<usize>, dim: TexDim)2016 fn set_tex_dim(&mut self, range: Range<usize>, dim: TexDim) {
2017 assert!(range.len() == 3);
2018 self.set_field(
2019 range,
2020 match dim {
2021 TexDim::_1D => 0_u8,
2022 TexDim::Array1D => 1_u8,
2023 TexDim::_2D => 2_u8,
2024 TexDim::Array2D => 3_u8,
2025 TexDim::_3D => 4_u8,
2026 TexDim::Cube => 6_u8,
2027 TexDim::ArrayCube => 7_u8,
2028 },
2029 );
2030 }
2031
set_tex_lod_mode(&mut self, range: Range<usize>, lod_mode: TexLodMode)2032 fn set_tex_lod_mode(&mut self, range: Range<usize>, lod_mode: TexLodMode) {
2033 assert!(range.len() == 2);
2034 self.set_field(
2035 range,
2036 match lod_mode {
2037 TexLodMode::Auto => 0_u8,
2038 TexLodMode::Zero => 1_u8,
2039 TexLodMode::Bias => 2_u8,
2040 TexLodMode::Lod => 3_u8,
2041 _ => panic!("Unknown LOD mode"),
2042 },
2043 );
2044 }
2045 }
2046
2047 impl SM50Op for OpTex {
legalize(&mut self, b: &mut LegalizeBuilder)2048 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2049 legalize_ext_instr(self, b);
2050 }
2051
encode(&self, e: &mut SM50Encoder<'_>)2052 fn encode(&self, e: &mut SM50Encoder<'_>) {
2053 e.set_opcode(0xdeb8);
2054
2055 e.set_dst(self.dsts[0]);
2056 assert!(self.dsts[1].is_none());
2057 assert!(self.fault.is_none());
2058 e.set_reg_src(8..16, self.srcs[0]);
2059 e.set_reg_src(20..28, self.srcs[1]);
2060
2061 e.set_tex_dim(28..31, self.dim);
2062 e.set_field(31..35, self.mask);
2063 e.set_bit(35, false); // ToDo: NDV
2064 e.set_bit(36, self.offset);
2065 e.set_tex_lod_mode(37..39, self.lod_mode);
2066 e.set_bit(49, false); // TODO: .NODEP
2067 e.set_bit(50, self.z_cmpr);
2068 }
2069 }
2070
2071 impl SM50Op for OpTld {
legalize(&mut self, b: &mut LegalizeBuilder)2072 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2073 legalize_ext_instr(self, b);
2074 }
2075
encode(&self, e: &mut SM50Encoder<'_>)2076 fn encode(&self, e: &mut SM50Encoder<'_>) {
2077 e.set_opcode(0xdd38);
2078
2079 e.set_dst(self.dsts[0]);
2080 assert!(self.dsts[1].is_none());
2081 assert!(self.fault.is_none());
2082 e.set_reg_src(8..16, self.srcs[0]);
2083 e.set_reg_src(20..28, self.srcs[1]);
2084
2085 e.set_tex_dim(28..31, self.dim);
2086 e.set_field(31..35, self.mask);
2087 e.set_bit(35, self.offset);
2088 e.set_bit(49, false); // TODO: .NODEP
2089 e.set_bit(50, self.is_ms);
2090
2091 assert!(
2092 self.lod_mode == TexLodMode::Zero
2093 || self.lod_mode == TexLodMode::Lod
2094 );
2095 e.set_bit(55, self.lod_mode == TexLodMode::Lod);
2096 }
2097 }
2098
2099 impl SM50Op for OpTld4 {
legalize(&mut self, b: &mut LegalizeBuilder)2100 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2101 legalize_ext_instr(self, b);
2102 }
2103
encode(&self, e: &mut SM50Encoder<'_>)2104 fn encode(&self, e: &mut SM50Encoder<'_>) {
2105 e.set_opcode(0xdef8);
2106
2107 e.set_dst(self.dsts[0]);
2108 assert!(self.dsts[1].is_none());
2109 assert!(self.fault.is_none());
2110 e.set_reg_src(8..16, self.srcs[0]);
2111 e.set_reg_src(20..28, self.srcs[1]);
2112
2113 e.set_tex_dim(28..31, self.dim);
2114 e.set_field(31..35, self.mask);
2115 e.set_bit(35, false); // ToDo: NDV
2116 e.set_field(
2117 36..38,
2118 match self.offset_mode {
2119 Tld4OffsetMode::None => 0_u8,
2120 Tld4OffsetMode::AddOffI => 1_u8,
2121 Tld4OffsetMode::PerPx => 2_u8,
2122 },
2123 );
2124 e.set_field(38..40, self.comp);
2125 e.set_bit(49, false); // TODO: .NODEP
2126 e.set_bit(50, self.z_cmpr);
2127 }
2128 }
2129
2130 impl SM50Op for OpTmml {
legalize(&mut self, b: &mut LegalizeBuilder)2131 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2132 legalize_ext_instr(self, b);
2133 }
2134
encode(&self, e: &mut SM50Encoder<'_>)2135 fn encode(&self, e: &mut SM50Encoder<'_>) {
2136 e.set_opcode(0xdf60);
2137
2138 e.set_dst(self.dsts[0]);
2139 assert!(self.dsts[1].is_none());
2140 e.set_reg_src(8..16, self.srcs[0]);
2141 e.set_reg_src(20..28, self.srcs[1]);
2142
2143 e.set_tex_dim(28..31, self.dim);
2144 e.set_field(31..35, self.mask);
2145 e.set_bit(35, false); // ToDo: NDV
2146 e.set_bit(49, false); // TODO: .NODEP
2147 }
2148 }
2149
2150 impl SM50Op for OpTxd {
legalize(&mut self, b: &mut LegalizeBuilder)2151 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2152 legalize_ext_instr(self, b);
2153 }
2154
encode(&self, e: &mut SM50Encoder<'_>)2155 fn encode(&self, e: &mut SM50Encoder<'_>) {
2156 e.set_opcode(0xde78);
2157
2158 e.set_dst(self.dsts[0]);
2159 assert!(self.dsts[1].is_none());
2160 assert!(self.fault.is_none());
2161 e.set_reg_src(8..16, self.srcs[0]);
2162 e.set_reg_src(20..28, self.srcs[1]);
2163
2164 e.set_tex_dim(28..31, self.dim);
2165 e.set_field(31..35, self.mask);
2166 e.set_bit(35, self.offset);
2167 e.set_bit(49, false); // TODO: .NODEP
2168 }
2169 }
2170
2171 impl SM50Op for OpTxq {
legalize(&mut self, b: &mut LegalizeBuilder)2172 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2173 legalize_ext_instr(self, b);
2174 }
2175
encode(&self, e: &mut SM50Encoder<'_>)2176 fn encode(&self, e: &mut SM50Encoder<'_>) {
2177 e.set_opcode(0xdf50);
2178
2179 e.set_dst(self.dsts[0]);
2180 assert!(self.dsts[1].is_none());
2181 e.set_reg_src(8..16, self.src);
2182
2183 e.set_field(
2184 22..28,
2185 match self.query {
2186 TexQuery::Dimension => 1_u8,
2187 TexQuery::TextureType => 2_u8,
2188 TexQuery::SamplerPos => 5_u8,
2189 // TexQuery::Filter => 0x10_u8,
2190 // TexQuery::Lod => 0x12_u8,
2191 // TexQuery::Wrap => 0x14_u8,
2192 // TexQuery::BorderColour => 0x16,
2193 },
2194 );
2195 e.set_field(31..35, self.mask);
2196 e.set_bit(49, false); // TODO: .NODEP
2197 }
2198 }
2199
2200 impl SM50Encoder<'_> {
set_mem_type(&mut self, range: Range<usize>, mem_type: MemType)2201 fn set_mem_type(&mut self, range: Range<usize>, mem_type: MemType) {
2202 assert!(range.len() == 3);
2203 self.set_field(
2204 range,
2205 match mem_type {
2206 MemType::U8 => 0_u8,
2207 MemType::I8 => 1_u8,
2208 MemType::U16 => 2_u8,
2209 MemType::I16 => 3_u8,
2210 MemType::B32 => 4_u8,
2211 MemType::B64 => 5_u8,
2212 MemType::B128 => 6_u8,
2213 },
2214 );
2215 }
2216
set_mem_order(&mut self, _order: &MemOrder)2217 fn set_mem_order(&mut self, _order: &MemOrder) {
2218 // TODO: order and scope aren't present before SM70, what should we do?
2219 }
2220
set_mem_access(&mut self, access: &MemAccess)2221 fn set_mem_access(&mut self, access: &MemAccess) {
2222 self.set_field(
2223 45..46,
2224 match access.space.addr_type() {
2225 MemAddrType::A32 => 0_u8,
2226 MemAddrType::A64 => 1_u8,
2227 },
2228 );
2229 self.set_mem_type(48..51, access.mem_type);
2230 self.set_mem_order(&access.order);
2231 }
2232
set_image_dim(&mut self, range: Range<usize>, dim: ImageDim)2233 fn set_image_dim(&mut self, range: Range<usize>, dim: ImageDim) {
2234 assert!(range.len() == 3);
2235 self.set_field(
2236 range,
2237 match dim {
2238 ImageDim::_1D => 0_u8,
2239 ImageDim::_1DBuffer => 1_u8,
2240 ImageDim::_1DArray => 2_u8,
2241 ImageDim::_2D => 3_u8,
2242 ImageDim::_2DArray => 4_u8,
2243 ImageDim::_3D => 5_u8,
2244 },
2245 );
2246 }
2247 }
2248
2249 impl SM50Op for OpSuLd {
legalize(&mut self, b: &mut LegalizeBuilder)2250 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2251 legalize_ext_instr(self, b);
2252 }
2253
encode(&self, e: &mut SM50Encoder<'_>)2254 fn encode(&self, e: &mut SM50Encoder<'_>) {
2255 e.set_opcode(0xeb00);
2256
2257 assert!(self.mask == 0x1 || self.mask == 0x3 || self.mask == 0xf);
2258 e.set_field(20..24, self.mask);
2259 e.set_image_dim(33..36, self.image_dim);
2260
2261 // mem_eviction_policy not a thing for sm < 70
2262
2263 let scope = match self.mem_order {
2264 MemOrder::Constant => MemScope::System,
2265 MemOrder::Weak => MemScope::CTA,
2266 MemOrder::Strong(s) => s,
2267 };
2268
2269 e.set_field(
2270 24..26,
2271 match scope {
2272 MemScope::CTA => 0_u8,
2273 /* SM => 1_u8, */
2274 MemScope::GPU => 2_u8,
2275 MemScope::System => 3_u8,
2276 },
2277 );
2278
2279 e.set_dst(self.dst);
2280
2281 e.set_reg_src(8..16, self.coord);
2282 e.set_reg_src(39..47, self.handle);
2283 }
2284 }
2285
2286 impl SM50Op for OpSuSt {
legalize(&mut self, b: &mut LegalizeBuilder)2287 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2288 legalize_ext_instr(self, b);
2289 }
2290
encode(&self, e: &mut SM50Encoder<'_>)2291 fn encode(&self, e: &mut SM50Encoder<'_>) {
2292 e.set_opcode(0xeb20);
2293
2294 e.set_reg_src(8..16, self.coord);
2295 e.set_reg_src(0..8, self.data);
2296 e.set_reg_src(39..47, self.handle);
2297
2298 e.set_image_dim(33..36, self.image_dim);
2299 e.set_mem_order(&self.mem_order);
2300
2301 assert!(self.mask == 0x1 || self.mask == 0x3 || self.mask == 0xf);
2302 e.set_field(20..24, self.mask);
2303 }
2304 }
2305
2306 impl SM50Encoder<'_> {
set_atom_op(&mut self, range: Range<usize>, atom_op: AtomOp)2307 fn set_atom_op(&mut self, range: Range<usize>, atom_op: AtomOp) {
2308 self.set_field(
2309 range,
2310 match atom_op {
2311 AtomOp::Add => 0_u8,
2312 AtomOp::Min => 1_u8,
2313 AtomOp::Max => 2_u8,
2314 AtomOp::Inc => 3_u8,
2315 AtomOp::Dec => 4_u8,
2316 AtomOp::And => 5_u8,
2317 AtomOp::Or => 6_u8,
2318 AtomOp::Xor => 7_u8,
2319 AtomOp::Exch => 8_u8,
2320 AtomOp::CmpExch(_) => panic!("CmpExch is a separate opcode"),
2321 },
2322 );
2323 }
2324 }
2325
2326 impl SM50Op for OpSuAtom {
legalize(&mut self, b: &mut LegalizeBuilder)2327 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2328 legalize_ext_instr(self, b);
2329 }
2330
encode(&self, e: &mut SM50Encoder<'_>)2331 fn encode(&self, e: &mut SM50Encoder<'_>) {
2332 if let AtomOp::CmpExch(cmp_src) = self.atom_op {
2333 e.set_opcode(0xeac0);
2334 assert!(cmp_src == AtomCmpSrc::Packed);
2335 } else {
2336 e.set_opcode(0xea60);
2337 e.set_atom_op(29..33, self.atom_op);
2338 }
2339
2340 let atom_type: u8 = match self.atom_type {
2341 AtomType::U32 => 0,
2342 AtomType::I32 => 1,
2343 AtomType::F32 => 3,
2344 AtomType::U64 => 2,
2345 AtomType::I64 => 5,
2346 _ => panic!("Unsupported atom type {}", self.atom_type),
2347 };
2348
2349 e.set_image_dim(33..36, self.image_dim);
2350 e.set_field(36..39, atom_type);
2351
2352 // The hardware requires that we set .D on atomics. This is safe to do
2353 // in in the emit code because it only affects format conversion, not
2354 // surface coordinates and atomics are required to be performed with
2355 // image formats that that exactly match the shader data type. So, for
2356 // instance, a uint32_t atomic has to happen on an R32_UINT or R32_SINT
2357 // image.
2358 e.set_bit(52, true); // .D
2359
2360 e.set_dst(self.dst);
2361
2362 e.set_reg_src(20..28, self.data);
2363 e.set_reg_src(8..16, self.coord);
2364 e.set_reg_src(39..47, self.handle);
2365 }
2366 }
2367
2368 impl SM50Op for OpLd {
legalize(&mut self, b: &mut LegalizeBuilder)2369 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2370 legalize_ext_instr(self, b);
2371 }
2372
encode(&self, e: &mut SM50Encoder<'_>)2373 fn encode(&self, e: &mut SM50Encoder<'_>) {
2374 e.set_opcode(match self.access.space {
2375 MemSpace::Global(_) => 0xeed0,
2376 MemSpace::Local => 0xef40,
2377 MemSpace::Shared => 0xef48,
2378 });
2379
2380 e.set_dst(self.dst);
2381 e.set_reg_src(8..16, self.addr);
2382 e.set_field(20..44, self.offset);
2383
2384 e.set_mem_access(&self.access);
2385 }
2386 }
2387
2388 impl SM50Op for OpLdc {
legalize(&mut self, b: &mut LegalizeBuilder)2389 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2390 use RegFile::GPR;
2391 b.copy_alu_src_if_not_reg(&mut self.offset, GPR, SrcType::GPR);
2392 }
2393
encode(&self, e: &mut SM50Encoder<'_>)2394 fn encode(&self, e: &mut SM50Encoder<'_>) {
2395 assert!(self.cb.src_mod.is_none());
2396 let SrcRef::CBuf(cb) = &self.cb.src_ref else {
2397 panic!("Not a CBuf source");
2398 };
2399 let CBuf::Binding(cb_idx) = cb.buf else {
2400 panic!("Must be a bound constant buffer");
2401 };
2402
2403 e.set_opcode(0xef90);
2404
2405 e.set_dst(self.dst);
2406 e.set_reg_src(8..16, self.offset);
2407 e.set_field(20..36, cb.offset);
2408 e.set_field(36..41, cb_idx);
2409 e.set_field(
2410 44..46,
2411 match self.mode {
2412 LdcMode::Indexed => 0_u8,
2413 LdcMode::IndexedLinear => 1_u8,
2414 LdcMode::IndexedSegmented => 2_u8,
2415 LdcMode::IndexedSegmentedLinear => 3_u8,
2416 },
2417 );
2418 e.set_mem_type(48..51, self.mem_type);
2419 }
2420 }
2421
2422 impl SM50Op for OpSt {
legalize(&mut self, b: &mut LegalizeBuilder)2423 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2424 legalize_ext_instr(self, b);
2425 }
2426
encode(&self, e: &mut SM50Encoder<'_>)2427 fn encode(&self, e: &mut SM50Encoder<'_>) {
2428 e.set_opcode(match self.access.space {
2429 MemSpace::Global(_) => 0xeed8,
2430 MemSpace::Local => 0xef50,
2431 MemSpace::Shared => 0xef58,
2432 });
2433
2434 e.set_reg_src(0..8, self.data);
2435 e.set_reg_src(8..16, self.addr);
2436 e.set_field(20..44, self.offset);
2437 e.set_mem_access(&self.access);
2438 }
2439 }
2440
atom_src_as_ssa( b: &mut LegalizeBuilder, src: Src, atom_type: AtomType, ) -> SSARef2441 fn atom_src_as_ssa(
2442 b: &mut LegalizeBuilder,
2443 src: Src,
2444 atom_type: AtomType,
2445 ) -> SSARef {
2446 if let Some(ssa) = src.as_ssa() {
2447 return *ssa;
2448 }
2449
2450 let tmp;
2451 if atom_type.bits() == 32 {
2452 tmp = b.alloc_ssa(RegFile::GPR, 1);
2453 b.copy_to(tmp.into(), 0.into());
2454 } else {
2455 debug_assert!(atom_type.bits() == 64);
2456 tmp = b.alloc_ssa(RegFile::GPR, 2);
2457 b.copy_to(tmp[0].into(), 0.into());
2458 b.copy_to(tmp[1].into(), 0.into());
2459 }
2460 tmp
2461 }
2462
2463 impl SM50Op for OpAtom {
legalize(&mut self, b: &mut LegalizeBuilder)2464 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2465 if self.atom_op == AtomOp::CmpExch(AtomCmpSrc::Separate) {
2466 let cmpr = atom_src_as_ssa(b, self.cmpr, self.atom_type);
2467 let data = atom_src_as_ssa(b, self.data, self.atom_type);
2468
2469 let mut cmpr_data = Vec::new();
2470 cmpr_data.extend_from_slice(&cmpr);
2471 cmpr_data.extend_from_slice(&data);
2472 let cmpr_data = SSARef::try_from(cmpr_data).unwrap();
2473
2474 self.cmpr = 0.into();
2475 self.data = cmpr_data.into();
2476 self.atom_op = AtomOp::CmpExch(AtomCmpSrc::Packed);
2477 }
2478 legalize_ext_instr(self, b);
2479 }
2480
encode(&self, e: &mut SM50Encoder<'_>)2481 fn encode(&self, e: &mut SM50Encoder<'_>) {
2482 match self.mem_space {
2483 MemSpace::Global(addr_type) => {
2484 if self.dst.is_none() {
2485 e.set_opcode(0xebf8);
2486
2487 e.set_reg_src(0..8, self.data);
2488
2489 let data_type = match self.atom_type {
2490 AtomType::U32 => 0_u8,
2491 AtomType::I32 => 1_u8,
2492 AtomType::U64 => 2_u8,
2493 AtomType::F32 => 3_u8,
2494 // NOTE: U128 => 4_u8,
2495 AtomType::I64 => 5_u8,
2496 _ => panic!("Unsupported data type"),
2497 };
2498 e.set_field(20..23, data_type);
2499 e.set_atom_op(23..26, self.atom_op);
2500 } else if let AtomOp::CmpExch(cmp_src) = self.atom_op {
2501 e.set_opcode(0xee00);
2502
2503 e.set_dst(self.dst);
2504
2505 // TODO: These are all supported by the disassembler but
2506 // only the packed layout appears to be supported by real
2507 // hardware
2508 let (data_src, data_layout) = match cmp_src {
2509 AtomCmpSrc::Separate => {
2510 if self.data.is_zero() {
2511 (self.cmpr, 1_u8)
2512 } else {
2513 assert!(self.cmpr.is_zero());
2514 (self.data, 2_u8)
2515 }
2516 }
2517 AtomCmpSrc::Packed => (self.data, 0_u8),
2518 };
2519 e.set_reg_src(20..28, data_src);
2520
2521 let data_type = match self.atom_type {
2522 AtomType::U32 => 0_u8,
2523 AtomType::U64 => 1_u8,
2524 _ => panic!("Unsupported data type"),
2525 };
2526 e.set_field(49..50, data_type);
2527 e.set_field(50..52, data_layout);
2528 e.set_field(52..56, 15_u8); // subOp
2529 } else {
2530 e.set_opcode(0xed00);
2531
2532 e.set_dst(self.dst);
2533 e.set_reg_src(20..28, self.data);
2534
2535 let data_type = match self.atom_type {
2536 AtomType::U32 => 0_u8,
2537 AtomType::I32 => 1_u8,
2538 AtomType::U64 => 2_u8,
2539 AtomType::F32 => 3_u8,
2540 // NOTE: U128 => 4_u8,
2541 AtomType::I64 => 5_u8,
2542 _ => panic!("Unsupported data type"),
2543 };
2544 e.set_field(49..52, data_type);
2545 e.set_atom_op(52..56, self.atom_op);
2546 }
2547
2548 e.set_mem_order(&self.mem_order);
2549
2550 e.set_reg_src(8..16, self.addr);
2551 e.set_field(28..48, self.addr_offset);
2552 e.set_field(
2553 48..49,
2554 match addr_type {
2555 MemAddrType::A32 => 0_u8,
2556 MemAddrType::A64 => 1_u8,
2557 },
2558 );
2559 }
2560 MemSpace::Local => panic!("Atomics do not support local"),
2561 MemSpace::Shared => {
2562 if let AtomOp::CmpExch(cmp_src) = self.atom_op {
2563 e.set_opcode(0xee00);
2564
2565 assert!(cmp_src == AtomCmpSrc::Packed);
2566 assert!(self.cmpr.is_zero());
2567 e.set_reg_src(20..28, self.data);
2568
2569 let subop = match self.atom_type {
2570 AtomType::U32 => 4_u8,
2571 AtomType::U64 => 5_u8,
2572 _ => panic!("Unsupported data type"),
2573 };
2574 e.set_field(52..56, subop);
2575 } else {
2576 e.set_opcode(0xec00);
2577
2578 e.set_reg_src(20..28, self.data);
2579
2580 let data_type = match self.atom_type {
2581 AtomType::U32 => 0_u8,
2582 AtomType::I32 => 1_u8,
2583 AtomType::U64 => 2_u8,
2584 AtomType::I64 => 3_u8,
2585 _ => panic!("Unsupported data type"),
2586 };
2587 e.set_field(28..30, data_type);
2588 e.set_atom_op(52..56, self.atom_op);
2589 }
2590
2591 e.set_dst(self.dst);
2592 e.set_reg_src(8..16, self.addr);
2593 assert_eq!(self.addr_offset % 4, 0);
2594 e.set_field(30..52, self.addr_offset / 4);
2595 }
2596 }
2597 }
2598 }
2599
2600 impl SM50Op for OpAL2P {
legalize(&mut self, b: &mut LegalizeBuilder)2601 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2602 legalize_ext_instr(self, b);
2603 }
2604
encode(&self, e: &mut SM50Encoder<'_>)2605 fn encode(&self, e: &mut SM50Encoder<'_>) {
2606 e.set_opcode(0xefa0);
2607
2608 e.set_dst(self.dst);
2609 e.set_reg_src(8..16, self.offset);
2610
2611 e.set_field(20..31, self.access.addr);
2612 assert!(!self.access.patch);
2613 e.set_bit(32, self.access.output);
2614
2615 e.set_field(47..49, 0_u8); // comps
2616 e.set_pred_dst(44..47, Dst::None);
2617 }
2618 }
2619
2620 impl SM50Op for OpALd {
legalize(&mut self, b: &mut LegalizeBuilder)2621 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2622 legalize_ext_instr(self, b);
2623 }
2624
encode(&self, e: &mut SM50Encoder<'_>)2625 fn encode(&self, e: &mut SM50Encoder<'_>) {
2626 e.set_opcode(0xefd8);
2627
2628 e.set_dst(self.dst);
2629 if self.access.phys {
2630 assert!(!self.access.patch);
2631 assert!(self.offset.src_ref.as_reg().is_some());
2632 } else if !self.access.patch {
2633 assert!(self.offset.is_zero());
2634 }
2635 e.set_reg_src(8..16, self.offset);
2636 e.set_reg_src(39..47, self.vtx);
2637
2638 e.set_field(20..30, self.access.addr);
2639 e.set_bit(31, self.access.patch);
2640 e.set_bit(32, self.access.output);
2641 e.set_field(47..49, self.access.comps - 1);
2642 }
2643 }
2644
2645 impl SM50Op for OpASt {
legalize(&mut self, b: &mut LegalizeBuilder)2646 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2647 legalize_ext_instr(self, b);
2648 }
2649
encode(&self, e: &mut SM50Encoder<'_>)2650 fn encode(&self, e: &mut SM50Encoder<'_>) {
2651 e.set_opcode(0xeff0);
2652
2653 e.set_reg_src(0..8, self.data);
2654 e.set_reg_src(8..16, self.offset);
2655 e.set_reg_src(39..47, self.vtx);
2656
2657 assert!(!self.access.phys);
2658 assert!(self.access.output);
2659 e.set_field(20..30, self.access.addr);
2660 e.set_bit(31, self.access.patch);
2661 e.set_bit(32, self.access.output);
2662 e.set_field(47..49, self.access.comps - 1);
2663 }
2664 }
2665
2666 impl SM50Op for OpIpa {
legalize(&mut self, b: &mut LegalizeBuilder)2667 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2668 legalize_ext_instr(self, b);
2669 }
2670
encode(&self, e: &mut SM50Encoder<'_>)2671 fn encode(&self, e: &mut SM50Encoder<'_>) {
2672 e.set_opcode(0xe000);
2673
2674 e.set_dst(self.dst);
2675 e.set_reg_src(8..16, 0.into()); // addr
2676 e.set_reg_src(20..28, self.inv_w);
2677 e.set_reg_src(39..47, self.offset);
2678
2679 assert!(self.addr % 4 == 0);
2680 e.set_field(28..38, self.addr);
2681 e.set_bit(38, false); // .IDX
2682 e.set_pred_dst(47..50, Dst::None); // TODO: What is this for?
2683 e.set_bit(51, false); // .SAT
2684 e.set_field(
2685 52..54,
2686 match self.loc {
2687 InterpLoc::Default => 0_u8,
2688 InterpLoc::Centroid => 1_u8,
2689 InterpLoc::Offset => 2_u8,
2690 },
2691 );
2692 e.set_field(
2693 54..56,
2694 match self.freq {
2695 InterpFreq::Pass => 0_u8,
2696 InterpFreq::PassMulW => 1_u8,
2697 InterpFreq::Constant => 2_u8,
2698 InterpFreq::State => 3_u8,
2699 },
2700 );
2701 }
2702 }
2703
2704 impl SM50Op for OpCCtl {
legalize(&mut self, b: &mut LegalizeBuilder)2705 fn legalize(&mut self, b: &mut LegalizeBuilder) {
2706 legalize_ext_instr(self, b);
2707 }
2708
encode(&self, e: &mut SM50Encoder<'_>)2709 fn encode(&self, e: &mut SM50Encoder<'_>) {
2710 match self.mem_space {
2711 MemSpace::Global(addr_type) => {
2712 e.set_opcode(0xef60);
2713
2714 assert!(self.addr_offset % 4 == 0);
2715 e.set_field(22..52, self.addr_offset / 4);
2716 e.set_field(
2717 52..53,
2718 match addr_type {
2719 MemAddrType::A32 => 0_u8,
2720 MemAddrType::A64 => 1_u8,
2721 },
2722 );
2723 }
2724 MemSpace::Local => panic!("cctl does not support local"),
2725 MemSpace::Shared => {
2726 e.set_opcode(0xef80);
2727
2728 assert!(self.addr_offset % 4 == 0);
2729 e.set_field(22..44, self.addr_offset / 4);
2730 }
2731 }
2732
2733 e.set_field(
2734 0..4,
2735 match self.op {
2736 CCtlOp::Qry1 => 0_u8,
2737 CCtlOp::PF1 => 1_u8,
2738 CCtlOp::PF1_5 => 2_u8,
2739 CCtlOp::PF2 => 3_u8,
2740 CCtlOp::WB => 4_u8,
2741 CCtlOp::IV => 5_u8,
2742 CCtlOp::IVAll => 6_u8,
2743 CCtlOp::RS => 7_u8,
2744 CCtlOp::RSLB => 7_u8,
2745 op => panic!("Unsupported cache control {op:?}"),
2746 },
2747 );
2748 e.set_reg_src(8..16, self.addr);
2749 }
2750 }
2751
2752 impl SM50Op for OpMemBar {
legalize(&mut self, _b: &mut LegalizeBuilder)2753 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2754 // Nothing to do
2755 }
2756
encode(&self, e: &mut SM50Encoder<'_>)2757 fn encode(&self, e: &mut SM50Encoder<'_>) {
2758 e.set_opcode(0xef98);
2759
2760 e.set_field(
2761 8..10,
2762 match self.scope {
2763 MemScope::CTA => 0_u8,
2764 MemScope::GPU => 1_u8,
2765 MemScope::System => 2_u8,
2766 },
2767 );
2768 }
2769 }
2770
2771 impl SM50Encoder<'_> {
set_rel_offset(&mut self, range: Range<usize>, label: &Label)2772 fn set_rel_offset(&mut self, range: Range<usize>, label: &Label) {
2773 let ip = u32::try_from(self.ip).unwrap();
2774 let ip = i32::try_from(ip).unwrap();
2775
2776 let target_ip = *self.labels.get(label).unwrap();
2777 let target_ip = u32::try_from(target_ip).unwrap();
2778 let target_ip = i32::try_from(target_ip).unwrap();
2779
2780 let rel_offset = target_ip - ip - 8;
2781
2782 self.set_field(range, rel_offset);
2783 }
2784 }
2785
2786 impl SM50Op for OpBra {
legalize(&mut self, _b: &mut LegalizeBuilder)2787 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2788 // Nothing to do
2789 }
2790
encode(&self, e: &mut SM50Encoder<'_>)2791 fn encode(&self, e: &mut SM50Encoder<'_>) {
2792 e.set_opcode(0xe240);
2793 e.set_rel_offset(20..44, &self.target);
2794 e.set_field(0..5, 0xF_u8); // TODO: Pred?
2795 }
2796 }
2797
2798 impl SM50Op for OpSSy {
legalize(&mut self, _b: &mut LegalizeBuilder)2799 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2800 // Nothing to do
2801 }
2802
encode(&self, e: &mut SM50Encoder<'_>)2803 fn encode(&self, e: &mut SM50Encoder<'_>) {
2804 e.set_opcode(0xe290);
2805 e.set_rel_offset(20..44, &self.target);
2806 e.set_field(0..5, 0xF_u8); // TODO: Pred?
2807 }
2808 }
2809
2810 impl SM50Op for OpSync {
legalize(&mut self, _b: &mut LegalizeBuilder)2811 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2812 // Nothing to do
2813 }
2814
encode(&self, e: &mut SM50Encoder<'_>)2815 fn encode(&self, e: &mut SM50Encoder<'_>) {
2816 e.set_opcode(0xf0f8);
2817 e.set_field(0..5, 0xF_u8); // TODO: Pred?
2818 }
2819 }
2820
2821 impl SM50Op for OpBrk {
legalize(&mut self, _b: &mut LegalizeBuilder)2822 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2823 // Nothing to do
2824 }
2825
encode(&self, e: &mut SM50Encoder<'_>)2826 fn encode(&self, e: &mut SM50Encoder<'_>) {
2827 e.set_opcode(0xe340);
2828 e.set_field(0..5, 0xF_u8); // TODO: Pred?
2829 }
2830 }
2831
2832 impl SM50Op for OpPBk {
legalize(&mut self, _b: &mut LegalizeBuilder)2833 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2834 // Nothing to do
2835 }
2836
encode(&self, e: &mut SM50Encoder<'_>)2837 fn encode(&self, e: &mut SM50Encoder<'_>) {
2838 e.set_opcode(0xe2a0);
2839 e.set_rel_offset(20..44, &self.target);
2840 e.set_field(0..5, 0xF_u8); // TODO: Pred?
2841 }
2842 }
2843
2844 impl SM50Op for OpCont {
legalize(&mut self, _b: &mut LegalizeBuilder)2845 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2846 // Nothing to do
2847 }
2848
encode(&self, e: &mut SM50Encoder<'_>)2849 fn encode(&self, e: &mut SM50Encoder<'_>) {
2850 e.set_opcode(0xe350);
2851 e.set_field(0..5, 0xF_u8); // TODO: Pred?
2852 }
2853 }
2854
2855 impl SM50Op for OpPCnt {
legalize(&mut self, _b: &mut LegalizeBuilder)2856 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2857 // Nothing to do
2858 }
2859
encode(&self, e: &mut SM50Encoder<'_>)2860 fn encode(&self, e: &mut SM50Encoder<'_>) {
2861 e.set_opcode(0xe2b0);
2862 e.set_rel_offset(20..44, &self.target);
2863 e.set_field(0..5, 0xF_u8); // TODO: Pred?
2864 }
2865 }
2866
2867 impl SM50Op for OpExit {
legalize(&mut self, _b: &mut LegalizeBuilder)2868 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2869 // Nothing to do
2870 }
2871
encode(&self, e: &mut SM50Encoder<'_>)2872 fn encode(&self, e: &mut SM50Encoder<'_>) {
2873 e.set_opcode(0xe300);
2874
2875 // TODO: CC flags
2876 e.set_field(0..4, 0xf_u8); // CC.T
2877 }
2878 }
2879
2880 impl SM50Op for OpBar {
legalize(&mut self, _b: &mut LegalizeBuilder)2881 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2882 // Nothing to do
2883 }
2884
encode(&self, e: &mut SM50Encoder<'_>)2885 fn encode(&self, e: &mut SM50Encoder<'_>) {
2886 e.set_opcode(0xf0a8);
2887
2888 e.set_reg_src(8..16, SrcRef::Zero.into());
2889
2890 // 00: RED.POPC
2891 // 01: RED.AND
2892 // 02: RED.OR
2893 e.set_field(35..37, 0_u8);
2894
2895 // 00: SYNC
2896 // 01: ARV
2897 // 02: RED
2898 // 03: SCAN
2899 e.set_field(32..35, 0_u8);
2900
2901 e.set_pred_src(39..42, 42, SrcRef::True.into());
2902 }
2903 }
2904
2905 impl SM50Op for OpCS2R {
legalize(&mut self, _b: &mut LegalizeBuilder)2906 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2907 // Nothing to do
2908 }
2909
encode(&self, e: &mut SM50Encoder<'_>)2910 fn encode(&self, e: &mut SM50Encoder<'_>) {
2911 e.set_opcode(0x50c8);
2912 e.set_dst(self.dst);
2913 e.set_field(20..28, self.idx);
2914 }
2915 }
2916
2917 impl SM50Op for OpIsberd {
legalize(&mut self, _b: &mut LegalizeBuilder)2918 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2919 // Nothing to do
2920 }
2921
encode(&self, e: &mut SM50Encoder<'_>)2922 fn encode(&self, e: &mut SM50Encoder<'_>) {
2923 e.set_opcode(0xefd0);
2924 e.set_dst(self.dst);
2925 e.set_reg_src(8..16, self.idx);
2926 }
2927 }
2928
2929 impl SM50Op for OpKill {
legalize(&mut self, _b: &mut LegalizeBuilder)2930 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2931 // Nothing to do
2932 }
2933
encode(&self, e: &mut SM50Encoder<'_>)2934 fn encode(&self, e: &mut SM50Encoder<'_>) {
2935 e.set_opcode(0xe330);
2936 e.set_field(0..5, 0x0f_u8);
2937 }
2938 }
2939
2940 impl SM50Op for OpNop {
legalize(&mut self, _b: &mut LegalizeBuilder)2941 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2942 // Nothing to do
2943 }
2944
encode(&self, e: &mut SM50Encoder<'_>)2945 fn encode(&self, e: &mut SM50Encoder<'_>) {
2946 e.set_opcode(0x50b0);
2947
2948 // TODO: CC flags
2949 e.set_field(8..12, 0xf_u8); // CC.T
2950 }
2951 }
2952
2953 impl SM50Op for OpPixLd {
legalize(&mut self, _b: &mut LegalizeBuilder)2954 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2955 // Nothing to do
2956 }
2957
encode(&self, e: &mut SM50Encoder<'_>)2958 fn encode(&self, e: &mut SM50Encoder<'_>) {
2959 e.set_opcode(0xefe8);
2960 e.set_dst(self.dst);
2961 e.set_reg_src(8..16, 0.into());
2962 e.set_field(
2963 31..34,
2964 match &self.val {
2965 PixVal::CovMask => 1_u8,
2966 PixVal::Covered => 2_u8,
2967 PixVal::Offset => 3_u8,
2968 PixVal::CentroidOffset => 4_u8,
2969 PixVal::MyIndex => 5_u8,
2970 other => panic!("Unsupported PixVal: {other}"),
2971 },
2972 );
2973 e.set_pred_dst(45..48, Dst::None);
2974 }
2975 }
2976
2977 impl SM50Op for OpS2R {
legalize(&mut self, _b: &mut LegalizeBuilder)2978 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2979 // Nothing to do
2980 }
2981
encode(&self, e: &mut SM50Encoder<'_>)2982 fn encode(&self, e: &mut SM50Encoder<'_>) {
2983 e.set_opcode(0xf0c8);
2984 e.set_dst(self.dst);
2985 e.set_field(20..28, self.idx);
2986 }
2987 }
2988
2989 impl SM50Op for OpVote {
legalize(&mut self, _b: &mut LegalizeBuilder)2990 fn legalize(&mut self, _b: &mut LegalizeBuilder) {
2991 // Nothing to do
2992 }
2993
encode(&self, e: &mut SM50Encoder<'_>)2994 fn encode(&self, e: &mut SM50Encoder<'_>) {
2995 e.set_opcode(0x50d8);
2996
2997 e.set_dst(self.ballot);
2998 e.set_pred_dst(45..48, self.vote);
2999 e.set_pred_src(39..42, 42, self.pred);
3000
3001 e.set_field(
3002 48..50,
3003 match self.op {
3004 VoteOp::All => 0u8,
3005 VoteOp::Any => 1u8,
3006 VoteOp::Eq => 2u8,
3007 },
3008 );
3009 }
3010 }
3011
3012 impl SM50Op for OpOut {
legalize(&mut self, b: &mut LegalizeBuilder)3013 fn legalize(&mut self, b: &mut LegalizeBuilder) {
3014 use RegFile::GPR;
3015 b.copy_alu_src_if_not_reg(&mut self.handle, GPR, SrcType::GPR);
3016 b.copy_alu_src_if_i20_overflow(&mut self.stream, GPR, SrcType::ALU);
3017 }
3018
encode(&self, e: &mut SM50Encoder<'_>)3019 fn encode(&self, e: &mut SM50Encoder<'_>) {
3020 match &self.stream.src_ref {
3021 SrcRef::Zero | SrcRef::Reg(_) => {
3022 e.set_opcode(0xfbe0);
3023 e.set_reg_src(20..28, self.stream);
3024 }
3025 SrcRef::Imm32(imm32) => {
3026 e.set_opcode(0xf6e0);
3027 e.set_src_imm_i20(20..39, 56, *imm32);
3028 }
3029 SrcRef::CBuf(cbuf) => {
3030 e.set_opcode(0xebe0);
3031 e.set_src_cb(20..39, cbuf);
3032 }
3033 src => panic!("Invalid out stream: {src}"),
3034 }
3035
3036 e.set_field(
3037 39..41,
3038 match self.out_type {
3039 OutType::Emit => 1_u8,
3040 OutType::Cut => 2_u8,
3041 OutType::EmitThenCut => 3_u8,
3042 },
3043 );
3044
3045 e.set_reg_src(8..16, self.handle);
3046 e.set_dst(self.dst);
3047 }
3048 }
3049
3050 macro_rules! as_sm50_op_match {
3051 ($op: expr) => {
3052 match $op {
3053 Op::FAdd(op) => op,
3054 Op::FMnMx(op) => op,
3055 Op::FMul(op) => op,
3056 Op::FFma(op) => op,
3057 Op::FSet(op) => op,
3058 Op::FSetP(op) => op,
3059 Op::FSwzAdd(op) => op,
3060 Op::Rro(op) => op,
3061 Op::MuFu(op) => op,
3062 Op::Flo(op) => op,
3063 Op::DAdd(op) => op,
3064 Op::DFma(op) => op,
3065 Op::DMnMx(op) => op,
3066 Op::DMul(op) => op,
3067 Op::DSetP(op) => op,
3068 Op::IAdd2(op) => op,
3069 Op::IAdd2X(op) => op,
3070 Op::Mov(op) => op,
3071 Op::Sel(op) => op,
3072 Op::Shfl(op) => op,
3073 Op::Vote(op) => op,
3074 Op::PSetP(op) => op,
3075 Op::SuSt(op) => op,
3076 Op::S2R(op) => op,
3077 Op::PopC(op) => op,
3078 Op::Prmt(op) => op,
3079 Op::Ld(op) => op,
3080 Op::Ldc(op) => op,
3081 Op::St(op) => op,
3082 Op::Lop2(op) => op,
3083 Op::Shf(op) => op,
3084 Op::Shl(op) => op,
3085 Op::Shr(op) => op,
3086 Op::F2F(op) => op,
3087 Op::F2I(op) => op,
3088 Op::I2F(op) => op,
3089 Op::I2I(op) => op,
3090 Op::IMad(op) => op,
3091 Op::IMul(op) => op,
3092 Op::IMnMx(op) => op,
3093 Op::ISetP(op) => op,
3094 Op::Tex(op) => op,
3095 Op::Tld(op) => op,
3096 Op::Tld4(op) => op,
3097 Op::Tmml(op) => op,
3098 Op::Txd(op) => op,
3099 Op::Txq(op) => op,
3100 Op::Ipa(op) => op,
3101 Op::AL2P(op) => op,
3102 Op::ALd(op) => op,
3103 Op::ASt(op) => op,
3104 Op::CCtl(op) => op,
3105 Op::MemBar(op) => op,
3106 Op::Atom(op) => op,
3107 Op::Bra(op) => op,
3108 Op::SSy(op) => op,
3109 Op::Sync(op) => op,
3110 Op::Brk(op) => op,
3111 Op::PBk(op) => op,
3112 Op::Cont(op) => op,
3113 Op::PCnt(op) => op,
3114 Op::Exit(op) => op,
3115 Op::Bar(op) => op,
3116 Op::SuLd(op) => op,
3117 Op::SuAtom(op) => op,
3118 Op::Kill(op) => op,
3119 Op::CS2R(op) => op,
3120 Op::Nop(op) => op,
3121 Op::PixLd(op) => op,
3122 Op::Isberd(op) => op,
3123 Op::Out(op) => op,
3124 Op::Bfe(op) => op,
3125 _ => panic!("Unhandled instruction {}", $op),
3126 }
3127 };
3128 }
3129
as_sm50_op(op: &Op) -> &dyn SM50Op3130 fn as_sm50_op(op: &Op) -> &dyn SM50Op {
3131 as_sm50_op_match!(op)
3132 }
3133
as_sm50_op_mut(op: &mut Op) -> &mut dyn SM50Op3134 fn as_sm50_op_mut(op: &mut Op) -> &mut dyn SM50Op {
3135 as_sm50_op_match!(op)
3136 }
3137
encode_instr( instr_index: usize, instr: Option<&Box<Instr>>, sm: &ShaderModel50, labels: &HashMap<Label, usize>, ip: &mut usize, sched_instr: &mut [u32; 2], ) -> [u32; 2]3138 fn encode_instr(
3139 instr_index: usize,
3140 instr: Option<&Box<Instr>>,
3141 sm: &ShaderModel50,
3142 labels: &HashMap<Label, usize>,
3143 ip: &mut usize,
3144 sched_instr: &mut [u32; 2],
3145 ) -> [u32; 2] {
3146 let mut e = SM50Encoder {
3147 sm,
3148 ip: *ip,
3149 labels,
3150 inst: [0_u32; 2],
3151 sched: 0,
3152 };
3153
3154 if let Some(instr) = instr {
3155 as_sm50_op(&instr.op).encode(&mut e);
3156 e.set_pred(&instr.pred);
3157 e.set_instr_deps(&instr.deps);
3158 } else {
3159 let nop = OpNop { label: None };
3160 nop.encode(&mut e);
3161 e.set_pred(&true.into());
3162 e.set_instr_deps(&InstrDeps::new());
3163 }
3164
3165 *ip += 8;
3166
3167 BitMutView::new(sched_instr)
3168 .set_field(21 * instr_index..21 * (instr_index + 1), e.sched);
3169
3170 e.inst
3171 }
3172
encode_sm50_shader(sm: &ShaderModel50, s: &Shader<'_>) -> Vec<u32>3173 fn encode_sm50_shader(sm: &ShaderModel50, s: &Shader<'_>) -> Vec<u32> {
3174 assert!(s.functions.len() == 1);
3175 let func = &s.functions[0];
3176
3177 let mut num_instrs = 0_usize;
3178 let mut labels = HashMap::new();
3179 for b in &func.blocks {
3180 // We ensure blocks will have groups of 3 instructions with a
3181 // schedule instruction before each groups. As we should never jump
3182 // to a schedule instruction, we account for that here.
3183 labels.insert(b.label, num_instrs + 8);
3184
3185 let block_num_instrs = b.instrs.len().next_multiple_of(3);
3186
3187 // Every 3 instructions, we have a new schedule instruction so we
3188 // need to account for that.
3189 num_instrs += (block_num_instrs + (block_num_instrs / 3)) * 8;
3190 }
3191
3192 let mut encoded = Vec::new();
3193 for b in &func.blocks {
3194 // A block is composed of groups of 3 instructions.
3195 let block_num_instrs = b.instrs.len().next_multiple_of(3);
3196
3197 let mut instrs_iter = b.instrs.iter();
3198
3199 for _ in 0..(block_num_instrs / 3) {
3200 let mut ip = ((encoded.len() / 2) + 1) * 8;
3201
3202 let mut sched_instr = [0x0; 2];
3203
3204 let instr0 = encode_instr(
3205 0,
3206 instrs_iter.next(),
3207 sm,
3208 &labels,
3209 &mut ip,
3210 &mut sched_instr,
3211 );
3212 let instr1 = encode_instr(
3213 1,
3214 instrs_iter.next(),
3215 sm,
3216 &labels,
3217 &mut ip,
3218 &mut sched_instr,
3219 );
3220 let instr2 = encode_instr(
3221 2,
3222 instrs_iter.next(),
3223 sm,
3224 &labels,
3225 &mut ip,
3226 &mut sched_instr,
3227 );
3228
3229 encoded.extend_from_slice(&sched_instr[..]);
3230 encoded.extend_from_slice(&instr0[..]);
3231 encoded.extend_from_slice(&instr1[..]);
3232 encoded.extend_from_slice(&instr2[..]);
3233 }
3234 }
3235
3236 encoded
3237 }
3238