1 // Copyright © 2022 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3
4 use crate::api::{GetDebugFlags, DEBUG};
5 use crate::ir::*;
6 use crate::liveness::{BlockLiveness, Liveness, SimpleLiveness};
7
8 use std::collections::{HashMap, HashSet};
9
10 pub type LegalizeBuilder<'a> = SSAInstrBuilder<'a>;
11
src_is_upred_reg(src: &Src) -> bool12 pub fn src_is_upred_reg(src: &Src) -> bool {
13 match &src.src_ref {
14 SrcRef::True | SrcRef::False => false,
15 SrcRef::SSA(ssa) => {
16 assert!(ssa.comps() == 1);
17 match ssa[0].file() {
18 RegFile::Pred => false,
19 RegFile::UPred => true,
20 _ => panic!("Not a predicate source"),
21 }
22 }
23 SrcRef::Reg(_) => panic!("Not in SSA form"),
24 _ => panic!("Not a predicate source"),
25 }
26 }
27
src_is_reg(src: &Src, reg_file: RegFile) -> bool28 pub fn src_is_reg(src: &Src, reg_file: RegFile) -> bool {
29 match src.src_ref {
30 SrcRef::Zero | SrcRef::True | SrcRef::False => true,
31 SrcRef::SSA(ssa) => ssa.file() == Some(reg_file),
32 SrcRef::Imm32(_) | SrcRef::CBuf(_) => false,
33 SrcRef::Reg(_) => panic!("Not in SSA form"),
34 }
35 }
36
swap_srcs_if_not_reg( x: &mut Src, y: &mut Src, reg_file: RegFile, ) -> bool37 pub fn swap_srcs_if_not_reg(
38 x: &mut Src,
39 y: &mut Src,
40 reg_file: RegFile,
41 ) -> bool {
42 if !src_is_reg(x, reg_file) && src_is_reg(y, reg_file) {
43 std::mem::swap(x, y);
44 true
45 } else {
46 false
47 }
48 }
49
src_is_imm(src: &Src) -> bool50 fn src_is_imm(src: &Src) -> bool {
51 matches!(src.src_ref, SrcRef::Imm32(_))
52 }
53
54 pub trait LegalizeBuildHelpers: SSABuilder {
copy_ssa(&mut self, ssa: &mut SSAValue, reg_file: RegFile)55 fn copy_ssa(&mut self, ssa: &mut SSAValue, reg_file: RegFile) {
56 let tmp = self.alloc_ssa(reg_file, 1)[0];
57 self.copy_to(tmp.into(), (*ssa).into());
58 *ssa = tmp;
59 }
60
copy_ssa_ref(&mut self, vec: &mut SSARef, reg_file: RegFile)61 fn copy_ssa_ref(&mut self, vec: &mut SSARef, reg_file: RegFile) {
62 for ssa in &mut vec[..] {
63 self.copy_ssa(ssa, reg_file);
64 }
65 }
66
copy_pred_ssa_if_uniform(&mut self, ssa: &mut SSAValue)67 fn copy_pred_ssa_if_uniform(&mut self, ssa: &mut SSAValue) {
68 match ssa.file() {
69 RegFile::Pred => (),
70 RegFile::UPred => self.copy_ssa(ssa, RegFile::Pred),
71 _ => panic!("Not a predicate value"),
72 }
73 }
74
copy_pred_if_upred(&mut self, pred: &mut Pred)75 fn copy_pred_if_upred(&mut self, pred: &mut Pred) {
76 match &mut pred.pred_ref {
77 PredRef::None => (),
78 PredRef::SSA(ssa) => {
79 self.copy_pred_ssa_if_uniform(ssa);
80 }
81 PredRef::Reg(_) => panic!("Not in SSA form"),
82 }
83 }
84
copy_src_if_upred(&mut self, src: &mut Src)85 fn copy_src_if_upred(&mut self, src: &mut Src) {
86 match &mut src.src_ref {
87 SrcRef::True | SrcRef::False => (),
88 SrcRef::SSA(ssa) => {
89 assert!(ssa.comps() == 1);
90 self.copy_pred_ssa_if_uniform(&mut ssa[0]);
91 }
92 SrcRef::Reg(_) => panic!("Not in SSA form"),
93 _ => panic!("Not a predicate source"),
94 }
95 }
96
copy_src_if_not_same_file(&mut self, src: &mut Src)97 fn copy_src_if_not_same_file(&mut self, src: &mut Src) {
98 let SrcRef::SSA(vec) = &mut src.src_ref else {
99 return;
100 };
101
102 if vec.comps() == 1 {
103 return;
104 }
105
106 let mut all_same = true;
107 let file = vec[0].file();
108 for i in 1..vec.comps() {
109 let c_file = vec[usize::from(i)].file();
110 if c_file != file {
111 debug_assert!(c_file.to_warp() == file.to_warp());
112 all_same = false;
113 }
114 }
115
116 if !all_same {
117 self.copy_ssa_ref(vec, file.to_warp());
118 }
119 }
120
copy_alu_src( &mut self, src: &mut Src, reg_file: RegFile, src_type: SrcType, )121 fn copy_alu_src(
122 &mut self,
123 src: &mut Src,
124 reg_file: RegFile,
125 src_type: SrcType,
126 ) {
127 let val = match src_type {
128 SrcType::GPR
129 | SrcType::ALU
130 | SrcType::F32
131 | SrcType::F16
132 | SrcType::F16v2
133 | SrcType::I32
134 | SrcType::B32 => self.alloc_ssa(reg_file, 1),
135 SrcType::F64 => self.alloc_ssa(reg_file, 2),
136 SrcType::Pred => self.alloc_ssa(reg_file, 1),
137 _ => panic!("Unknown source type"),
138 };
139
140 if DEBUG.annotate() {
141 self.push_instr(Instr::new_boxed(OpAnnotate {
142 annotation: "copy generated by legalizer".into(),
143 }));
144 }
145
146 if val.comps() == 1 {
147 self.copy_to(val.into(), src.src_ref.into());
148 } else {
149 match src.src_ref {
150 SrcRef::Imm32(u) => {
151 // Immediates go in the top bits
152 self.copy_to(val[0].into(), 0.into());
153 self.copy_to(val[1].into(), u.into());
154 }
155 SrcRef::CBuf(cb) => {
156 // CBufs load 8B
157 self.copy_to(val[0].into(), cb.into());
158 self.copy_to(val[1].into(), cb.offset(4).into());
159 }
160 SrcRef::SSA(vec) => {
161 assert!(vec.comps() == 2);
162 self.copy_to(val[0].into(), vec[0].into());
163 self.copy_to(val[1].into(), vec[1].into());
164 }
165 _ => panic!("Invalid 64-bit SrcRef"),
166 }
167 }
168
169 src.src_ref = val.into();
170 }
171
copy_alu_src_if_not_reg( &mut self, src: &mut Src, reg_file: RegFile, src_type: SrcType, )172 fn copy_alu_src_if_not_reg(
173 &mut self,
174 src: &mut Src,
175 reg_file: RegFile,
176 src_type: SrcType,
177 ) {
178 if !src_is_reg(src, reg_file) {
179 self.copy_alu_src(src, reg_file, src_type);
180 }
181 }
182
copy_alu_src_if_not_reg_or_imm( &mut self, src: &mut Src, reg_file: RegFile, src_type: SrcType, )183 fn copy_alu_src_if_not_reg_or_imm(
184 &mut self,
185 src: &mut Src,
186 reg_file: RegFile,
187 src_type: SrcType,
188 ) {
189 if !src_is_reg(src, reg_file)
190 && !matches!(&src.src_ref, SrcRef::Imm32(_))
191 {
192 self.copy_alu_src(src, reg_file, src_type);
193 }
194 }
195
copy_alu_src_if_imm( &mut self, src: &mut Src, reg_file: RegFile, src_type: SrcType, )196 fn copy_alu_src_if_imm(
197 &mut self,
198 src: &mut Src,
199 reg_file: RegFile,
200 src_type: SrcType,
201 ) {
202 if src_is_imm(src) {
203 self.copy_alu_src(src, reg_file, src_type);
204 }
205 }
206
copy_alu_src_if_ineg_imm( &mut self, src: &mut Src, reg_file: RegFile, src_type: SrcType, )207 fn copy_alu_src_if_ineg_imm(
208 &mut self,
209 src: &mut Src,
210 reg_file: RegFile,
211 src_type: SrcType,
212 ) {
213 assert!(src_type == SrcType::I32);
214 if src_is_imm(src) && src.src_mod.is_ineg() {
215 self.copy_alu_src(src, reg_file, src_type);
216 }
217 }
218
copy_alu_src_if_both_not_reg( &mut self, src1: &Src, src2: &mut Src, reg_file: RegFile, src_type: SrcType, )219 fn copy_alu_src_if_both_not_reg(
220 &mut self,
221 src1: &Src,
222 src2: &mut Src,
223 reg_file: RegFile,
224 src_type: SrcType,
225 ) {
226 if !src_is_reg(src1, reg_file) && !src_is_reg(src2, reg_file) {
227 self.copy_alu_src(src2, reg_file, src_type);
228 }
229 }
230
copy_alu_src_and_lower_fmod( &mut self, src: &mut Src, src_type: SrcType, )231 fn copy_alu_src_and_lower_fmod(
232 &mut self,
233 src: &mut Src,
234 src_type: SrcType,
235 ) {
236 match src_type {
237 SrcType::F16 | SrcType::F16v2 => {
238 let val = self.alloc_ssa(RegFile::GPR, 1);
239 self.push_op(OpHAdd2 {
240 dst: val.into(),
241 srcs: [Src::new_zero().fneg(), *src],
242 saturate: false,
243 ftz: false,
244 f32: false,
245 });
246 *src = val.into();
247 }
248 SrcType::F32 => {
249 let val = self.alloc_ssa(RegFile::GPR, 1);
250 self.push_op(OpFAdd {
251 dst: val.into(),
252 srcs: [Src::new_zero().fneg(), *src],
253 saturate: false,
254 rnd_mode: FRndMode::NearestEven,
255 ftz: false,
256 });
257 *src = val.into();
258 }
259 SrcType::F64 => {
260 let val = self.alloc_ssa(RegFile::GPR, 2);
261 self.push_op(OpDAdd {
262 dst: val.into(),
263 srcs: [Src::new_zero().fneg(), *src],
264 rnd_mode: FRndMode::NearestEven,
265 });
266 *src = val.into();
267 }
268 _ => panic!("Invalid ffabs srouce type"),
269 }
270 }
271
copy_ssa_ref_if_uniform(&mut self, ssa_ref: &mut SSARef)272 fn copy_ssa_ref_if_uniform(&mut self, ssa_ref: &mut SSARef) {
273 for ssa in &mut ssa_ref[..] {
274 if ssa.is_uniform() {
275 let warp = self.alloc_ssa(ssa.file().to_warp(), 1)[0];
276 self.copy_to(warp.into(), (*ssa).into());
277 *ssa = warp;
278 }
279 }
280 }
281 }
282
283 impl LegalizeBuildHelpers for LegalizeBuilder<'_> {}
284
legalize_instr( sm: &dyn ShaderModel, b: &mut LegalizeBuilder, bl: &impl BlockLiveness, block_uniform: bool, pinned: &HashSet<SSARef>, ip: usize, instr: &mut Instr, )285 fn legalize_instr(
286 sm: &dyn ShaderModel,
287 b: &mut LegalizeBuilder,
288 bl: &impl BlockLiveness,
289 block_uniform: bool,
290 pinned: &HashSet<SSARef>,
291 ip: usize,
292 instr: &mut Instr,
293 ) {
294 // Handle a few no-op cases up-front
295 match &instr.op {
296 Op::Annotate(_) => {
297 // OpAnnotate does nothing. There's nothing to legalize.
298 return;
299 }
300 Op::Undef(_)
301 | Op::PhiSrcs(_)
302 | Op::PhiDsts(_)
303 | Op::Pin(_)
304 | Op::Unpin(_)
305 | Op::RegOut(_) => {
306 // These are implemented by RA and can take pretty much anything
307 // you can throw at them.
308 debug_assert!(instr.pred.is_true());
309 return;
310 }
311 Op::Copy(_) => {
312 // OpCopy is implemented in a lowering pass and can handle anything
313 return;
314 }
315 Op::SrcBar(_) => {
316 // This is turned into a nop by calc_instr_deps
317 return;
318 }
319 Op::Swap(_) | Op::ParCopy(_) => {
320 // These are generated by RA and should not exist yet
321 panic!("Unsupported instruction");
322 }
323 _ => (),
324 }
325
326 if !instr.is_uniform() {
327 b.copy_pred_if_upred(&mut instr.pred);
328 }
329
330 let src_types = instr.src_types();
331 for (i, src) in instr.srcs_mut().iter_mut().enumerate() {
332 *src = src.fold_imm(src_types[i]);
333 b.copy_src_if_not_same_file(src);
334
335 if !block_uniform {
336 // In non-uniform control-flow, we can't collect uniform vectors so
337 // we need to insert copies to warp regs which we can collect.
338 match &mut src.src_ref {
339 SrcRef::SSA(vec) => {
340 if vec.is_uniform()
341 && vec.comps() > 1
342 && !pinned.contains(vec)
343 {
344 b.copy_ssa_ref(vec, vec.file().unwrap().to_warp());
345 }
346 }
347 SrcRef::CBuf(CBufRef {
348 buf: CBuf::BindlessSSA(handle),
349 ..
350 }) => assert!(pinned.contains(handle)),
351 _ => (),
352 }
353 }
354 }
355
356 // OpBreak and OpBSsy impose additional RA constraints
357 match &mut instr.op {
358 Op::Break(OpBreak {
359 bar_in, bar_out, ..
360 })
361 | Op::BSSy(OpBSSy {
362 bar_in, bar_out, ..
363 }) => {
364 let bar_in_ssa = bar_in.src_ref.as_ssa().unwrap();
365 if !bar_out.is_none() && bl.is_live_after_ip(&bar_in_ssa[0], ip) {
366 let gpr = b.bmov_to_gpr(*bar_in);
367 let tmp = b.bmov_to_bar(gpr.into());
368 *bar_in = tmp.into();
369 }
370 }
371 _ => (),
372 }
373
374 sm.legalize_op(b, &mut instr.op);
375
376 let mut vec_src_map: HashMap<SSARef, SSARef> = HashMap::new();
377 let mut vec_comps = HashSet::new();
378 for src in instr.srcs_mut() {
379 if let SrcRef::SSA(vec) = &src.src_ref {
380 if vec.comps() == 1 {
381 continue;
382 }
383
384 // If the same vector shows up twice in one instruction, that's
385 // okay. Just make it look the same as the previous source we
386 // fixed up.
387 if let Some(new_vec) = vec_src_map.get(vec) {
388 src.src_ref = (*new_vec).into();
389 continue;
390 }
391
392 let mut new_vec = *vec;
393 for c in 0..vec.comps() {
394 let ssa = vec[usize::from(c)];
395 // If the same SSA value shows up in multiple non-identical
396 // vector sources or as multiple components in the same
397 // source, we need to make a copy so it can get assigned to
398 // multiple different registers.
399 if vec_comps.get(&ssa).is_some() {
400 let copy = b.alloc_ssa(ssa.file(), 1)[0];
401 b.copy_to(copy.into(), ssa.into());
402 new_vec[usize::from(c)] = copy;
403 } else {
404 vec_comps.insert(ssa);
405 }
406 }
407
408 vec_src_map.insert(*vec, new_vec);
409 src.src_ref = new_vec.into();
410 }
411 }
412 }
413
414 impl Shader<'_> {
legalize(&mut self)415 pub fn legalize(&mut self) {
416 let sm = self.sm;
417 for f in &mut self.functions {
418 let live = SimpleLiveness::for_function(f);
419 let mut pinned = HashSet::new();
420
421 for (bi, b) in f.blocks.iter_mut().enumerate() {
422 let bl = live.block_live(bi);
423 let bu = b.uniform;
424
425 let mut instrs = Vec::new();
426 for (ip, mut instr) in b.instrs.drain(..).enumerate() {
427 if let Op::Pin(pin) = &instr.op {
428 if let Dst::SSA(ssa) = &pin.dst {
429 pinned.insert(*ssa);
430 }
431 }
432
433 let mut b = SSAInstrBuilder::new(sm, &mut f.ssa_alloc);
434 legalize_instr(sm, &mut b, bl, bu, &pinned, ip, &mut instr);
435 b.push_instr(instr);
436 instrs.append(&mut b.as_vec());
437 }
438 b.instrs = instrs;
439 }
440 }
441 }
442 }
443