1 // Copyright © 2022 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3
4 use crate::api::{GetDebugFlags, DEBUG};
5 use crate::ir::*;
6 use crate::liveness::{BlockLiveness, Liveness, SimpleLiveness};
7
8 use std::collections::{HashMap, HashSet};
9
src_is_reg(src: &Src) -> bool10 fn src_is_reg(src: &Src) -> bool {
11 match src.src_ref {
12 SrcRef::Zero | SrcRef::True | SrcRef::False | SrcRef::SSA(_) => true,
13 SrcRef::Imm32(_) | SrcRef::CBuf(_) => false,
14 SrcRef::Reg(_) => panic!("Not in SSA form"),
15 }
16 }
17
src_as_lop_imm(src: &Src) -> Option<bool>18 fn src_as_lop_imm(src: &Src) -> Option<bool> {
19 let x = match src.src_ref {
20 SrcRef::Zero => false,
21 SrcRef::True => true,
22 SrcRef::False => false,
23 SrcRef::Imm32(i) => {
24 if i == 0 {
25 false
26 } else if i == !0 {
27 true
28 } else {
29 return None;
30 }
31 }
32 _ => return None,
33 };
34 Some(x ^ src.src_mod.is_bnot())
35 }
36
fold_lop_src(src: &Src, x: &mut u8)37 fn fold_lop_src(src: &Src, x: &mut u8) {
38 if let Some(i) = src_as_lop_imm(src) {
39 *x = if i { !0 } else { 0 };
40 }
41 if src.src_mod.is_bnot() {
42 *x = !*x;
43 }
44 }
45
copy_alu_src(b: &mut impl SSABuilder, src: &mut Src, src_type: SrcType)46 fn copy_alu_src(b: &mut impl SSABuilder, src: &mut Src, src_type: SrcType) {
47 let val = match src_type {
48 SrcType::GPR
49 | SrcType::ALU
50 | SrcType::F32
51 | SrcType::I32
52 | SrcType::B32 => b.alloc_ssa(RegFile::GPR, 1),
53 SrcType::F64 => b.alloc_ssa(RegFile::GPR, 2),
54 SrcType::Pred => b.alloc_ssa(RegFile::Pred, 1),
55 _ => panic!("Unknown source type"),
56 };
57
58 if DEBUG.annotate() {
59 b.push_instr(Instr::new_boxed(OpAnnotate {
60 annotation: "copy generated by legalizer".into(),
61 }));
62 }
63
64 if val.comps() == 1 {
65 b.copy_to(val.into(), src.src_ref.into());
66 } else {
67 match src.src_ref {
68 SrcRef::Imm32(u) => {
69 // Immediates go in the top bits
70 b.copy_to(val[0].into(), 0.into());
71 b.copy_to(val[1].into(), u.into());
72 }
73 SrcRef::CBuf(cb) => {
74 // CBufs load 8B
75 b.copy_to(val[0].into(), cb.into());
76 b.copy_to(val[1].into(), cb.offset(4).into());
77 }
78 SrcRef::SSA(vec) => {
79 assert!(vec.comps() == 2);
80 b.copy_to(val[0].into(), vec[0].into());
81 b.copy_to(val[1].into(), vec[1].into());
82 }
83 _ => panic!("Invalid 64-bit SrcRef"),
84 }
85 }
86
87 src.src_ref = val.into();
88 }
89
copy_alu_src_if_cbuf( b: &mut impl SSABuilder, src: &mut Src, src_type: SrcType, )90 fn copy_alu_src_if_cbuf(
91 b: &mut impl SSABuilder,
92 src: &mut Src,
93 src_type: SrcType,
94 ) {
95 if matches!(src.src_ref, SrcRef::CBuf(_)) {
96 copy_alu_src(b, src, src_type);
97 }
98 }
99
copy_alu_src_if_not_reg( b: &mut impl SSABuilder, src: &mut Src, src_type: SrcType, )100 fn copy_alu_src_if_not_reg(
101 b: &mut impl SSABuilder,
102 src: &mut Src,
103 src_type: SrcType,
104 ) {
105 if !src_is_reg(src) {
106 copy_alu_src(b, src, src_type);
107 }
108 }
109
src_is_imm(src: &Src) -> bool110 fn src_is_imm(src: &Src) -> bool {
111 matches!(src.src_ref, SrcRef::Imm32(_))
112 }
113
copy_alu_src_if_imm( b: &mut impl SSABuilder, src: &mut Src, src_type: SrcType, )114 fn copy_alu_src_if_imm(
115 b: &mut impl SSABuilder,
116 src: &mut Src,
117 src_type: SrcType,
118 ) {
119 if src_is_imm(src) {
120 copy_alu_src(b, src, src_type);
121 }
122 }
123
copy_alu_src_if_both_not_reg( b: &mut impl SSABuilder, src1: &Src, src2: &mut Src, src_type: SrcType, )124 fn copy_alu_src_if_both_not_reg(
125 b: &mut impl SSABuilder,
126 src1: &Src,
127 src2: &mut Src,
128 src_type: SrcType,
129 ) {
130 if !src_is_reg(src1) && !src_is_reg(src2) {
131 copy_alu_src(b, src2, src_type);
132 }
133 }
134
swap_srcs_if_not_reg(x: &mut Src, y: &mut Src) -> bool135 fn swap_srcs_if_not_reg(x: &mut Src, y: &mut Src) -> bool {
136 if !src_is_reg(x) && src_is_reg(y) {
137 std::mem::swap(x, y);
138 true
139 } else {
140 false
141 }
142 }
143
copy_alu_src_if_i20_overflow( b: &mut impl SSABuilder, src: &mut Src, src_type: SrcType, )144 fn copy_alu_src_if_i20_overflow(
145 b: &mut impl SSABuilder,
146 src: &mut Src,
147 src_type: SrcType,
148 ) {
149 if src.as_imm_not_i20().is_some() {
150 copy_alu_src(b, src, src_type);
151 }
152 }
153
copy_alu_src_if_f20_overflow( b: &mut impl SSABuilder, src: &mut Src, src_type: SrcType, )154 fn copy_alu_src_if_f20_overflow(
155 b: &mut impl SSABuilder,
156 src: &mut Src,
157 src_type: SrcType,
158 ) {
159 if src.as_imm_not_f20().is_some() {
160 copy_alu_src(b, src, src_type);
161 }
162 }
163
copy_alu_src_if_fabs( b: &mut impl SSABuilder, src: &mut Src, src_type: SrcType, )164 fn copy_alu_src_if_fabs(
165 b: &mut impl SSABuilder,
166 src: &mut Src,
167 src_type: SrcType,
168 ) {
169 if src.src_mod.has_fabs() {
170 match src_type {
171 SrcType::F32 => {
172 let val = b.alloc_ssa(RegFile::GPR, 1);
173 b.push_op(OpFAdd {
174 dst: val.into(),
175 srcs: [Src::new_zero().fneg(), *src],
176 saturate: false,
177 rnd_mode: FRndMode::NearestEven,
178 ftz: false,
179 });
180 *src = val.into();
181 }
182 SrcType::F64 => {
183 let val = b.alloc_ssa(RegFile::GPR, 2);
184 b.push_op(OpDAdd {
185 dst: val.into(),
186 srcs: [Src::new_zero().fneg(), *src],
187 rnd_mode: FRndMode::NearestEven,
188 });
189 *src = val.into();
190 }
191 _ => panic!("Invalid ffabs srouce type"),
192 }
193 }
194 }
195
legalize_sm50_instr( b: &mut impl SSABuilder, _bl: &impl BlockLiveness, _ip: usize, instr: &mut Instr, )196 fn legalize_sm50_instr(
197 b: &mut impl SSABuilder,
198 _bl: &impl BlockLiveness,
199 _ip: usize,
200 instr: &mut Instr,
201 ) {
202 match &mut instr.op {
203 Op::Shf(op) => {
204 copy_alu_src_if_not_reg(b, &mut op.shift, SrcType::GPR);
205 copy_alu_src_if_not_reg(b, &mut op.high, SrcType::ALU);
206 copy_alu_src_if_not_reg(b, &mut op.low, SrcType::GPR);
207 copy_alu_src_if_i20_overflow(b, &mut op.shift, SrcType::GPR);
208 }
209 Op::Shl(op) => {
210 copy_alu_src_if_not_reg(b, &mut op.src, SrcType::GPR);
211 copy_alu_src_if_i20_overflow(b, &mut op.shift, SrcType::ALU);
212 }
213 Op::Shr(op) => {
214 copy_alu_src_if_not_reg(b, &mut op.src, SrcType::GPR);
215 copy_alu_src_if_i20_overflow(b, &mut op.shift, SrcType::ALU);
216 }
217 Op::FAdd(op) => {
218 let [ref mut src0, ref mut src1] = op.srcs;
219 swap_srcs_if_not_reg(src0, src1);
220 copy_alu_src_if_not_reg(b, src0, SrcType::F32);
221 }
222 Op::FMul(op) => {
223 let [ref mut src0, ref mut src1] = op.srcs;
224 swap_srcs_if_not_reg(src0, src1);
225 copy_alu_src_if_not_reg(b, &mut op.srcs[0], SrcType::F32);
226 }
227 Op::FSet(op) => {
228 let [ref mut src0, ref mut src1] = op.srcs;
229 if swap_srcs_if_not_reg(src0, src1) {
230 op.cmp_op = op.cmp_op.flip();
231 }
232 copy_alu_src_if_not_reg(b, src0, SrcType::F32);
233 copy_alu_src_if_f20_overflow(b, src1, SrcType::F32);
234 }
235 Op::FSetP(op) => {
236 let [ref mut src0, ref mut src1] = op.srcs;
237 if swap_srcs_if_not_reg(src0, src1) {
238 op.cmp_op = op.cmp_op.flip();
239 }
240 copy_alu_src_if_not_reg(b, src0, SrcType::F32);
241 copy_alu_src_if_f20_overflow(b, src1, SrcType::F32);
242 }
243 Op::FSwzAdd(op) => {
244 copy_alu_src_if_not_reg(b, &mut op.srcs[0], SrcType::GPR);
245 copy_alu_src_if_not_reg(b, &mut op.srcs[1], SrcType::GPR);
246 }
247 Op::ISetP(op) => {
248 let [ref mut src0, ref mut src1] = op.srcs;
249 if swap_srcs_if_not_reg(src0, src1) {
250 op.cmp_op = op.cmp_op.flip();
251 }
252 copy_alu_src_if_not_reg(b, src0, SrcType::ALU);
253 copy_alu_src_if_i20_overflow(b, src1, SrcType::ALU);
254 }
255 Op::Lop2(op) => {
256 let [ref mut src0, ref mut src1] = op.srcs;
257 swap_srcs_if_not_reg(src0, src1);
258 copy_alu_src_if_not_reg(b, &mut op.srcs[0], SrcType::ALU);
259 }
260 Op::Rro(op) => {
261 copy_alu_src_if_f20_overflow(b, &mut op.src, SrcType::F32);
262 }
263 Op::PSetP(_) => {}
264 Op::MuFu(op) => {
265 copy_alu_src_if_not_reg(b, &mut op.src, SrcType::GPR);
266 }
267 Op::DAdd(op) => {
268 let [ref mut src0, ref mut src1] = op.srcs;
269 swap_srcs_if_not_reg(src0, src1);
270 copy_alu_src_if_not_reg(b, src0, SrcType::F64);
271 copy_alu_src_if_f20_overflow(b, src1, SrcType::F64);
272 }
273 Op::DFma(op) => {
274 let [ref mut src0, ref mut src1, ref mut src2] = op.srcs;
275 copy_alu_src_if_fabs(b, src0, SrcType::F64);
276 copy_alu_src_if_fabs(b, src1, SrcType::F64);
277 copy_alu_src_if_fabs(b, src2, SrcType::F64);
278 swap_srcs_if_not_reg(src0, src1);
279 copy_alu_src_if_not_reg(b, src0, SrcType::F64);
280 copy_alu_src_if_f20_overflow(b, src1, SrcType::F64);
281 if src_is_reg(src1) {
282 copy_alu_src_if_imm(b, src2, SrcType::F64);
283 } else {
284 copy_alu_src_if_not_reg(b, src2, SrcType::F64);
285 }
286 }
287 Op::DMnMx(op) => {
288 let [ref mut src0, ref mut src1] = op.srcs;
289 swap_srcs_if_not_reg(src0, src1);
290 copy_alu_src_if_not_reg(b, src0, SrcType::F64);
291 copy_alu_src_if_f20_overflow(b, src1, SrcType::F64);
292 }
293 Op::DMul(op) => {
294 let [ref mut src0, ref mut src1] = op.srcs;
295 copy_alu_src_if_fabs(b, src0, SrcType::F64);
296 copy_alu_src_if_fabs(b, src1, SrcType::F64);
297 swap_srcs_if_not_reg(src0, src1);
298 copy_alu_src_if_not_reg(b, src0, SrcType::F64);
299 copy_alu_src_if_f20_overflow(b, src1, SrcType::F64);
300 }
301 Op::DSetP(op) => {
302 let [ref mut src0, ref mut src1] = op.srcs;
303 if swap_srcs_if_not_reg(src0, src1) {
304 op.cmp_op = op.cmp_op.flip();
305 }
306 copy_alu_src_if_not_reg(b, src0, SrcType::F64);
307 copy_alu_src_if_f20_overflow(b, src1, SrcType::F64);
308 }
309 Op::Sel(op) => {
310 let [ref mut src0, ref mut src1] = op.srcs;
311 if swap_srcs_if_not_reg(src0, src1) {
312 op.cond = op.cond.bnot();
313 }
314 copy_alu_src_if_not_reg(b, src0, SrcType::ALU);
315 copy_alu_src_if_i20_overflow(b, src1, SrcType::ALU);
316 }
317 Op::Shfl(op) => {
318 copy_alu_src_if_not_reg(b, &mut op.src, SrcType::GPR);
319 copy_alu_src_if_cbuf(b, &mut op.lane, SrcType::ALU);
320 copy_alu_src_if_cbuf(b, &mut op.c, SrcType::ALU);
321 }
322 Op::Vote(_) => {}
323 Op::IAdd2(op) => {
324 let [ref mut src0, ref mut src1] = op.srcs;
325 swap_srcs_if_not_reg(src0, src1);
326 copy_alu_src_if_not_reg(b, src0, SrcType::I32);
327 }
328 Op::I2F(op) => {
329 copy_alu_src_if_i20_overflow(b, &mut op.src, SrcType::ALU);
330 }
331 Op::F2F(op) => {
332 copy_alu_src_if_f20_overflow(b, &mut op.src, SrcType::ALU);
333 }
334 Op::I2I(op) => {
335 copy_alu_src_if_i20_overflow(b, &mut op.src, SrcType::ALU);
336 }
337 Op::IMad(op) => {
338 let [ref mut src0, ref mut src1, ref mut src2] = op.srcs;
339 copy_alu_src_if_not_reg(b, src0, SrcType::ALU);
340 copy_alu_src_if_not_reg(b, src2, SrcType::ALU);
341 swap_srcs_if_not_reg(src0, src1);
342 copy_alu_src_if_not_reg(b, src0, SrcType::ALU);
343 copy_alu_src_if_i20_overflow(b, src1, SrcType::ALU);
344 if src_is_reg(src1) {
345 copy_alu_src_if_imm(b, src2, SrcType::ALU);
346 } else {
347 copy_alu_src_if_not_reg(b, src2, SrcType::ALU);
348 }
349 }
350 Op::IMul(op) => {
351 let [ref mut src0, ref mut src1] = op.srcs;
352 if swap_srcs_if_not_reg(src0, src1) {
353 op.signed.swap(0, 1);
354 }
355 copy_alu_src_if_not_reg(b, src0, SrcType::ALU);
356 }
357 Op::F2I(op) => {
358 copy_alu_src_if_f20_overflow(b, &mut op.src, SrcType::ALU);
359 }
360 Op::IMnMx(op) => {
361 let [ref mut src0, ref mut src1] = op.srcs;
362 swap_srcs_if_not_reg(src0, src1);
363 copy_alu_src_if_not_reg(b, src0, SrcType::ALU);
364 }
365 Op::Ipa(op) => {
366 copy_alu_src_if_not_reg(b, &mut op.offset, SrcType::GPR);
367 copy_alu_src_if_not_reg(b, &mut op.inv_w, SrcType::GPR);
368 }
369 Op::PopC(_) => {}
370 Op::BRev(op) => {
371 copy_alu_src_if_not_reg(b, &mut op.src, SrcType::ALU);
372 }
373 Op::Flo(op) => {
374 copy_alu_src_if_i20_overflow(b, &mut op.src, SrcType::ALU);
375 }
376 Op::FMnMx(op) => {
377 let [ref mut src0, ref mut src1] = op.srcs;
378 swap_srcs_if_not_reg(src0, src1);
379 copy_alu_src_if_not_reg(b, src0, SrcType::F32);
380 copy_alu_src_if_f20_overflow(b, src1, SrcType::F32);
381 }
382 Op::Prmt(op) => {
383 copy_alu_src_if_not_reg(b, &mut op.srcs[0], SrcType::GPR);
384 copy_alu_src_if_not_reg(b, &mut op.srcs[1], SrcType::GPR);
385 copy_alu_src_if_i20_overflow(b, &mut op.sel, SrcType::ALU);
386 }
387 Op::FFma(op) => {
388 let [ref mut src0, ref mut src1, ref mut src2] = op.srcs;
389 copy_alu_src_if_fabs(b, src0, SrcType::F32);
390 copy_alu_src_if_fabs(b, src1, SrcType::F32);
391 copy_alu_src_if_fabs(b, src2, SrcType::F32);
392 swap_srcs_if_not_reg(src0, src1);
393 copy_alu_src_if_not_reg(b, src0, SrcType::F32);
394 copy_alu_src_if_f20_overflow(b, src1, SrcType::F32);
395 if src_is_reg(src1) {
396 copy_alu_src_if_imm(b, src2, SrcType::F32);
397 } else {
398 copy_alu_src_if_not_reg(b, src2, SrcType::F32);
399 }
400 }
401 Op::Ldc(op) => {
402 // TODO: cb must be a bound constant buffer
403 copy_alu_src_if_not_reg(b, &mut op.offset, SrcType::GPR);
404 }
405 Op::Copy(_) => (), // Nothing to do
406 Op::INeg(_) => (), // we unconditionally lower this
407 Op::SuLd(op) => {
408 copy_alu_src_if_not_reg(b, &mut op.handle, SrcType::GPR);
409 copy_alu_src_if_not_reg(b, &mut op.coord, SrcType::GPR);
410 }
411 Op::SuAtom(op) => {
412 copy_alu_src_if_not_reg(b, &mut op.coord, SrcType::GPR);
413 copy_alu_src_if_not_reg(b, &mut op.handle, SrcType::GPR);
414 copy_alu_src_if_not_reg(b, &mut op.data, SrcType::GPR);
415 }
416 Op::Out(op) => {
417 copy_alu_src_if_not_reg(b, &mut op.handle, SrcType::GPR);
418 copy_alu_src_if_i20_overflow(b, &mut op.stream, SrcType::ALU);
419 }
420 Op::Bfe(op) => {
421 copy_alu_src_if_not_reg(b, &mut op.base, SrcType::ALU);
422 }
423 _ => {
424 let src_types = instr.src_types();
425 for (i, src) in instr.srcs_mut().iter_mut().enumerate() {
426 match src_types[i] {
427 SrcType::SSA => {
428 assert!(src.as_ssa().is_some());
429 }
430 SrcType::GPR => {
431 assert!(src_is_reg(src));
432 }
433 SrcType::ALU
434 | SrcType::F32
435 | SrcType::F64
436 | SrcType::I32
437 | SrcType::B32 => {
438 panic!(
439 "({}): ALU srcs must be legalized explicitly",
440 &mut instr.op
441 );
442 }
443 SrcType::Pred => {
444 panic!(
445 "({}): Predicates must be legalized explicitly",
446 &mut instr.op
447 );
448 }
449 SrcType::Bar => panic!("Barrier regs are Volta+"),
450 }
451 }
452 }
453 }
454 }
455
legalize_sm70_instr( b: &mut impl SSABuilder, bl: &impl BlockLiveness, ip: usize, instr: &mut Instr, )456 fn legalize_sm70_instr(
457 b: &mut impl SSABuilder,
458 bl: &impl BlockLiveness,
459 ip: usize,
460 instr: &mut Instr,
461 ) {
462 match &mut instr.op {
463 Op::FAdd(op) => {
464 let [ref mut src0, ref mut src1] = op.srcs;
465 swap_srcs_if_not_reg(src0, src1);
466 copy_alu_src_if_not_reg(b, src0, SrcType::F32);
467 }
468 Op::FFma(op) => {
469 let [ref mut src0, ref mut src1, ref mut src2] = op.srcs;
470 swap_srcs_if_not_reg(src0, src1);
471 copy_alu_src_if_not_reg(b, src0, SrcType::F32);
472 copy_alu_src_if_both_not_reg(b, src1, src2, SrcType::F32);
473 }
474 Op::FMnMx(op) => {
475 let [ref mut src0, ref mut src1] = op.srcs;
476 swap_srcs_if_not_reg(src0, src1);
477 copy_alu_src_if_not_reg(b, src0, SrcType::F32);
478 }
479 Op::FMul(op) => {
480 let [ref mut src0, ref mut src1] = op.srcs;
481 swap_srcs_if_not_reg(src0, src1);
482 copy_alu_src_if_not_reg(b, src0, SrcType::F32);
483 }
484 Op::FSet(op) => {
485 let [ref mut src0, ref mut src1] = op.srcs;
486 if !src_is_reg(src0) && src_is_reg(src1) {
487 std::mem::swap(src0, src1);
488 op.cmp_op = op.cmp_op.flip();
489 }
490 copy_alu_src_if_not_reg(b, src0, SrcType::F32);
491 }
492 Op::FSetP(op) => {
493 let [ref mut src0, ref mut src1] = op.srcs;
494 if !src_is_reg(src0) && src_is_reg(src1) {
495 std::mem::swap(src0, src1);
496 op.cmp_op = op.cmp_op.flip();
497 }
498 copy_alu_src_if_not_reg(b, src0, SrcType::F32);
499 }
500 Op::MuFu(_) => (), // Nothing to do
501 Op::DAdd(op) => {
502 let [ref mut src0, ref mut src1] = op.srcs;
503 swap_srcs_if_not_reg(src0, src1);
504 copy_alu_src_if_not_reg(b, src0, SrcType::F64);
505 }
506 Op::DFma(op) => {
507 let [ref mut src0, ref mut src1, ref mut src2] = op.srcs;
508 swap_srcs_if_not_reg(src0, src1);
509 copy_alu_src_if_not_reg(b, src0, SrcType::F64);
510 copy_alu_src_if_both_not_reg(b, src1, src2, SrcType::F64);
511 }
512 Op::DMul(op) => {
513 let [ref mut src0, ref mut src1] = op.srcs;
514 swap_srcs_if_not_reg(src0, src1);
515 copy_alu_src_if_not_reg(b, src0, SrcType::F64);
516 }
517 Op::DSetP(op) => {
518 let [ref mut src0, ref mut src1] = op.srcs;
519 if !src_is_reg(src0) && src_is_reg(src1) {
520 std::mem::swap(src0, src1);
521 op.cmp_op = op.cmp_op.flip();
522 }
523 copy_alu_src_if_not_reg(b, src0, SrcType::F64);
524 }
525 Op::BMsk(op) => {
526 copy_alu_src_if_not_reg(b, &mut op.pos, SrcType::ALU);
527 }
528 Op::BRev(_) | Op::Flo(_) => (),
529 Op::IAbs(_) | Op::INeg(_) => (),
530 Op::IAdd3(op) => {
531 let [ref mut src0, ref mut src1, ref mut src2] = op.srcs;
532 swap_srcs_if_not_reg(src0, src1);
533 swap_srcs_if_not_reg(src2, src1);
534 if !src0.src_mod.is_none() && !src1.src_mod.is_none() {
535 let val = b.alloc_ssa(RegFile::GPR, 1);
536 b.push_op(OpIAdd3 {
537 srcs: [Src::new_zero(), *src0, Src::new_zero()],
538 overflow: [Dst::None; 2],
539 dst: val.into(),
540 });
541 *src0 = val.into();
542 }
543 copy_alu_src_if_not_reg(b, src0, SrcType::I32);
544 copy_alu_src_if_both_not_reg(b, src1, src2, SrcType::I32);
545 }
546 Op::IAdd3X(op) => {
547 let [ref mut src0, ref mut src1, ref mut src2] = op.srcs;
548 swap_srcs_if_not_reg(src0, src1);
549 swap_srcs_if_not_reg(src2, src1);
550 if !src0.src_mod.is_none() && !src1.src_mod.is_none() {
551 let val = b.alloc_ssa(RegFile::GPR, 1);
552 b.push_op(OpIAdd3X {
553 srcs: [Src::new_zero(), *src0, Src::new_zero()],
554 overflow: [Dst::None; 2],
555 dst: val.into(),
556 carry: [false.into(); 2],
557 });
558 *src0 = val.into();
559 }
560 copy_alu_src_if_not_reg(b, src0, SrcType::B32);
561 copy_alu_src_if_both_not_reg(b, src1, src2, SrcType::B32);
562 }
563 Op::IDp4(op) => {
564 let [ref mut src_type0, ref mut src_type1] = op.src_types;
565 let [ref mut src0, ref mut src1, ref mut src2] = op.srcs;
566 if swap_srcs_if_not_reg(src0, src1) {
567 std::mem::swap(src_type0, src_type1);
568 }
569 copy_alu_src_if_not_reg(b, src0, SrcType::ALU);
570 copy_alu_src_if_not_reg(b, src2, SrcType::ALU);
571 }
572 Op::IMad(op) => {
573 let [ref mut src0, ref mut src1, ref mut src2] = op.srcs;
574 swap_srcs_if_not_reg(src0, src1);
575 copy_alu_src_if_not_reg(b, src0, SrcType::ALU);
576 copy_alu_src_if_both_not_reg(b, src1, src2, SrcType::ALU);
577 }
578 Op::IMad64(op) => {
579 let [ref mut src0, ref mut src1, ref mut src2] = op.srcs;
580 swap_srcs_if_not_reg(src0, src1);
581 copy_alu_src_if_not_reg(b, src0, SrcType::ALU);
582 copy_alu_src_if_both_not_reg(b, src1, src2, SrcType::ALU);
583 }
584 Op::IMnMx(op) => {
585 let [ref mut src0, ref mut src1] = op.srcs;
586 swap_srcs_if_not_reg(src0, src1);
587 copy_alu_src_if_not_reg(b, src0, SrcType::ALU);
588 }
589 Op::ISetP(op) => {
590 let [ref mut src0, ref mut src1] = op.srcs;
591 if !src_is_reg(src0) && src_is_reg(src1) {
592 std::mem::swap(src0, src1);
593 op.cmp_op = op.cmp_op.flip();
594 }
595 copy_alu_src_if_not_reg(b, src0, SrcType::ALU);
596 }
597 Op::Lop3(op) => {
598 // Fold constants and modifiers if we can
599 op.op = LogicOp3::new_lut(&|mut x, mut y, mut z| {
600 fold_lop_src(&op.srcs[0], &mut x);
601 fold_lop_src(&op.srcs[1], &mut y);
602 fold_lop_src(&op.srcs[2], &mut z);
603 op.op.eval(x, y, z)
604 });
605 for src in &mut op.srcs {
606 src.src_mod = SrcMod::None;
607 if src_as_lop_imm(src).is_some() {
608 src.src_ref = SrcRef::Zero;
609 }
610 }
611
612 let [ref mut src0, ref mut src1, ref mut src2] = op.srcs;
613 if !src_is_reg(src0) && src_is_reg(src1) {
614 std::mem::swap(src0, src1);
615 op.op = LogicOp3::new_lut(&|x, y, z| op.op.eval(y, x, z))
616 }
617 if !src_is_reg(src2) && src_is_reg(src1) {
618 std::mem::swap(src2, src1);
619 op.op = LogicOp3::new_lut(&|x, y, z| op.op.eval(x, z, y))
620 }
621
622 copy_alu_src_if_not_reg(b, src0, SrcType::ALU);
623 copy_alu_src_if_not_reg(b, src2, SrcType::ALU);
624 }
625 Op::PopC(_) => (),
626 Op::Shf(op) => {
627 copy_alu_src_if_not_reg(b, &mut op.low, SrcType::ALU);
628 copy_alu_src_if_not_reg(b, &mut op.high, SrcType::ALU);
629 }
630 Op::F2F(_) | Op::F2I(_) | Op::I2F(_) | Op::Mov(_) | Op::FRnd(_) => (),
631 Op::Prmt(op) => {
632 copy_alu_src_if_not_reg(b, &mut op.srcs[0], SrcType::ALU);
633 copy_alu_src_if_not_reg(b, &mut op.srcs[1], SrcType::ALU);
634 }
635 Op::Sel(op) => {
636 let [ref mut src0, ref mut src1] = op.srcs;
637 if swap_srcs_if_not_reg(src0, src1) {
638 op.cond = op.cond.bnot();
639 }
640 copy_alu_src_if_not_reg(b, src0, SrcType::ALU);
641 }
642 Op::PLop3(op) => {
643 // Fold constants and modifiers if we can
644 for lop in &mut op.ops {
645 *lop = LogicOp3::new_lut(&|mut x, mut y, mut z| {
646 fold_lop_src(&op.srcs[0], &mut x);
647 fold_lop_src(&op.srcs[1], &mut y);
648 fold_lop_src(&op.srcs[2], &mut z);
649 lop.eval(x, y, z)
650 });
651 }
652 for src in &mut op.srcs {
653 src.src_mod = SrcMod::None;
654 if src_as_lop_imm(src).is_some() {
655 src.src_ref = SrcRef::True;
656 }
657 }
658
659 let [ref mut src0, ref mut src1, ref mut src2] = op.srcs;
660 if !src_is_reg(src0) && src_is_reg(src1) {
661 std::mem::swap(src0, src1);
662 for lop in &mut op.ops {
663 *lop = LogicOp3::new_lut(&|x, y, z| lop.eval(y, x, z));
664 }
665 }
666 if !src_is_reg(src2) && src_is_reg(src1) {
667 std::mem::swap(src2, src1);
668 for lop in &mut op.ops {
669 *lop = LogicOp3::new_lut(&|x, y, z| lop.eval(x, z, y));
670 }
671 }
672
673 copy_alu_src_if_not_reg(b, src0, SrcType::Pred);
674 copy_alu_src_if_not_reg(b, src2, SrcType::Pred);
675 }
676 Op::FSwzAdd(op) => {
677 let [ref mut src0, ref mut src1] = op.srcs;
678 copy_alu_src_if_not_reg(b, src0, SrcType::F32);
679 copy_alu_src_if_not_reg(b, src1, SrcType::F32);
680 }
681 Op::Shfl(op) => {
682 copy_alu_src_if_not_reg(b, &mut op.src, SrcType::GPR);
683 copy_alu_src_if_cbuf(b, &mut op.lane, SrcType::ALU);
684 copy_alu_src_if_cbuf(b, &mut op.c, SrcType::ALU);
685 }
686 Op::Out(op) => {
687 copy_alu_src_if_not_reg(b, &mut op.handle, SrcType::GPR);
688 copy_alu_src_if_cbuf(b, &mut op.stream, SrcType::ALU);
689 }
690 Op::Break(op) => {
691 let bar_in = op.bar_in.src_ref.as_ssa().unwrap();
692 if !op.bar_out.is_none() && bl.is_live_after_ip(&bar_in[0], ip) {
693 let gpr = b.bmov_to_gpr(op.bar_in);
694 let tmp = b.bmov_to_bar(gpr.into());
695 op.bar_in = tmp.into();
696 }
697 }
698 Op::BSSy(op) => {
699 let bar_in = op.bar_in.src_ref.as_ssa().unwrap();
700 if !op.bar_out.is_none() && bl.is_live_after_ip(&bar_in[0], ip) {
701 let gpr = b.bmov_to_gpr(op.bar_in);
702 let tmp = b.bmov_to_bar(gpr.into());
703 op.bar_in = tmp.into();
704 }
705 }
706 Op::OutFinal(op) => {
707 copy_alu_src_if_not_reg(b, &mut op.handle, SrcType::GPR);
708 }
709 Op::Ldc(_) => (), // Nothing to do
710 Op::BSync(_) => (),
711 Op::Vote(_) => (), // Nothing to do
712 Op::Copy(_) => (), // Nothing to do
713 _ => {
714 let src_types = instr.src_types();
715 for (i, src) in instr.srcs_mut().iter_mut().enumerate() {
716 match src_types[i] {
717 SrcType::SSA => {
718 assert!(src.as_ssa().is_some());
719 }
720 SrcType::GPR => {
721 assert!(src_is_reg(src));
722 }
723 SrcType::ALU
724 | SrcType::F32
725 | SrcType::F64
726 | SrcType::I32
727 | SrcType::B32 => {
728 panic!("ALU srcs must be legalized explicitly");
729 }
730 SrcType::Pred => {
731 panic!("Predicates must be legalized explicitly");
732 }
733 SrcType::Bar => (),
734 }
735 }
736 }
737 }
738 }
739
legalize_instr( b: &mut impl SSABuilder, bl: &impl BlockLiveness, ip: usize, instr: &mut Instr, )740 fn legalize_instr(
741 b: &mut impl SSABuilder,
742 bl: &impl BlockLiveness,
743 ip: usize,
744 instr: &mut Instr,
745 ) {
746 if b.sm() >= 70 {
747 legalize_sm70_instr(b, bl, ip, instr);
748 } else if b.sm() >= 50 {
749 legalize_sm50_instr(b, bl, ip, instr);
750 } else {
751 panic!("Unknown shader model SM{}", b.sm());
752 }
753
754 let src_types = instr.src_types();
755 for (i, src) in instr.srcs_mut().iter_mut().enumerate() {
756 if let SrcRef::Imm32(u) = &mut src.src_ref {
757 *u = match src_types[i] {
758 SrcType::F32 | SrcType::F64 => match src.src_mod {
759 SrcMod::None => *u,
760 SrcMod::FAbs => *u & !(1_u32 << 31),
761 SrcMod::FNeg => *u ^ !(1_u32 << 31),
762 SrcMod::FNegAbs => *u | !(1_u32 << 31),
763 _ => panic!("Not a float source modifier"),
764 },
765 SrcType::I32 => match src.src_mod {
766 SrcMod::None => *u,
767 SrcMod::INeg => -(*u as i32) as u32,
768 _ => panic!("Not an integer source modifier"),
769 },
770 SrcType::B32 => match src.src_mod {
771 SrcMod::None => *u,
772 SrcMod::BNot => !*u,
773 _ => panic!("Not a bitwise source modifier"),
774 },
775 _ => {
776 assert!(src.src_mod.is_none());
777 *u
778 }
779 };
780 src.src_mod = SrcMod::None;
781 }
782 }
783
784 let mut vec_src_map: HashMap<SSARef, SSARef> = HashMap::new();
785 let mut vec_comps = HashSet::new();
786 for src in instr.srcs_mut() {
787 if let SrcRef::SSA(vec) = &src.src_ref {
788 if vec.comps() == 1 {
789 continue;
790 }
791
792 // If the same vector shows up twice in one instruction, that's
793 // okay. Just make it look the same as the previous source we
794 // fixed up.
795 if let Some(new_vec) = vec_src_map.get(vec) {
796 src.src_ref = (*new_vec).into();
797 continue;
798 }
799
800 let mut new_vec = *vec;
801 for c in 0..vec.comps() {
802 let ssa = vec[usize::from(c)];
803 // If the same SSA value shows up in multiple non-identical
804 // vector sources or as multiple components in the same
805 // source, we need to make a copy so it can get assigned to
806 // multiple different registers.
807 if vec_comps.get(&ssa).is_some() {
808 let copy = b.alloc_ssa(ssa.file(), 1)[0];
809 b.copy_to(copy.into(), ssa.into());
810 new_vec[usize::from(c)] = copy;
811 } else {
812 vec_comps.insert(ssa);
813 }
814 }
815
816 vec_src_map.insert(*vec, new_vec);
817 src.src_ref = new_vec.into();
818 }
819 }
820 }
821
822 impl Shader {
legalize(&mut self)823 pub fn legalize(&mut self) {
824 let sm = self.info.sm;
825 for f in &mut self.functions {
826 let live = SimpleLiveness::for_function(f);
827
828 for (bi, b) in f.blocks.iter_mut().enumerate() {
829 let bl = live.block_live(bi);
830
831 let mut instrs = Vec::new();
832 for (ip, mut instr) in b.instrs.drain(..).enumerate() {
833 let mut b = SSAInstrBuilder::new(sm, &mut f.ssa_alloc);
834 legalize_instr(&mut b, bl, ip, &mut instr);
835 b.push_instr(instr);
836 instrs.append(&mut b.as_vec());
837 }
838 b.instrs = instrs;
839 }
840 }
841 }
842 }
843