1 // Copyright © 2022 Collabora, Ltd. 2 // SPDX-License-Identifier: MIT 3 4 use crate::ir::*; 5 6 use std::collections::HashMap; 7 8 struct CopyEntry { 9 src_type: SrcType, 10 src: Src, 11 } 12 13 struct CopyPropPass { 14 ssa_map: HashMap<SSAValue, CopyEntry>, 15 } 16 17 impl CopyPropPass { new() -> CopyPropPass18 pub fn new() -> CopyPropPass { 19 CopyPropPass { 20 ssa_map: HashMap::new(), 21 } 22 } 23 add_copy(&mut self, dst: SSAValue, src_type: SrcType, src: Src)24 fn add_copy(&mut self, dst: SSAValue, src_type: SrcType, src: Src) { 25 assert!(src.src_ref.get_reg().is_none()); 26 self.ssa_map.insert( 27 dst, 28 CopyEntry { 29 src_type: src_type, 30 src: src, 31 }, 32 ); 33 } 34 add_fp64_copy(&mut self, dst: &SSARef, src: Src)35 fn add_fp64_copy(&mut self, dst: &SSARef, src: Src) { 36 assert!(dst.comps() == 2); 37 match src.src_ref { 38 SrcRef::Zero | SrcRef::Imm32(_) => { 39 self.add_copy(dst[0], SrcType::ALU, Src::new_zero()); 40 self.add_copy(dst[1], SrcType::F64, src); 41 } 42 SrcRef::CBuf(cb) => { 43 let lo32 = Src::from(SrcRef::CBuf(cb)); 44 let hi32 = Src { 45 src_ref: SrcRef::CBuf(cb.offset(4)), 46 src_mod: src.src_mod, 47 }; 48 self.add_copy(dst[0], SrcType::ALU, lo32); 49 self.add_copy(dst[1], SrcType::F64, hi32); 50 } 51 SrcRef::SSA(ssa) => { 52 assert!(ssa.comps() == 2); 53 let lo32 = Src::from(ssa[0]); 54 let hi32 = Src { 55 src_ref: ssa[1].into(), 56 src_mod: src.src_mod, 57 }; 58 self.add_copy(dst[0], SrcType::ALU, lo32); 59 self.add_copy(dst[1], SrcType::F64, hi32); 60 } 61 _ => (), 62 } 63 } 64 get_copy(&self, dst: &SSAValue) -> Option<&CopyEntry>65 fn get_copy(&self, dst: &SSAValue) -> Option<&CopyEntry> { 66 self.ssa_map.get(dst) 67 } 68 prop_to_pred(&self, pred: &mut Pred)69 fn prop_to_pred(&self, pred: &mut Pred) { 70 loop { 71 let src_ssa = match &pred.pred_ref { 72 PredRef::SSA(ssa) => ssa, 73 _ => return, 74 }; 75 76 let entry = match self.get_copy(src_ssa) { 77 Some(e) => e, 78 None => return, 79 }; 80 81 match entry.src.src_ref { 82 SrcRef::True => { 83 pred.pred_ref = PredRef::None; 84 } 85 SrcRef::False => { 86 pred.pred_ref = PredRef::None; 87 pred.pred_inv = !pred.pred_inv; 88 } 89 SrcRef::SSA(ssa) => { 90 assert!(ssa.comps() == 1); 91 pred.pred_ref = PredRef::SSA(ssa[0]); 92 } 93 _ => return, 94 } 95 96 match entry.src.src_mod { 97 SrcMod::None => (), 98 SrcMod::BNot => { 99 pred.pred_inv = !pred.pred_inv; 100 } 101 _ => panic!("Invalid predicate modifier"), 102 } 103 } 104 } 105 prop_to_ssa_ref(&self, src_ssa: &mut SSARef) -> bool106 fn prop_to_ssa_ref(&self, src_ssa: &mut SSARef) -> bool { 107 let mut progress = false; 108 109 for c in 0..src_ssa.comps() { 110 let c_ssa = &mut src_ssa[usize::from(c)]; 111 let entry = match self.get_copy(c_ssa) { 112 Some(e) => e, 113 None => continue, 114 }; 115 116 if entry.src.src_mod.is_none() { 117 if let SrcRef::SSA(entry_ssa) = entry.src.src_ref { 118 assert!(entry_ssa.comps() == 1); 119 *c_ssa = entry_ssa[0]; 120 progress = true; 121 } 122 } 123 } 124 125 progress 126 } 127 prop_to_ssa_src(&self, src: &mut Src)128 fn prop_to_ssa_src(&self, src: &mut Src) { 129 assert!(src.src_mod.is_none()); 130 if let SrcRef::SSA(src_ssa) = &mut src.src_ref { 131 loop { 132 if !self.prop_to_ssa_ref(src_ssa) { 133 break; 134 } 135 } 136 } 137 } 138 prop_to_gpr_src(&self, src: &mut Src)139 fn prop_to_gpr_src(&self, src: &mut Src) { 140 loop { 141 let src_ssa = match &mut src.src_ref { 142 SrcRef::SSA(ssa) => { 143 // First, try to propagate SSA components 144 if self.prop_to_ssa_ref(ssa) { 145 continue; 146 } 147 ssa 148 } 149 _ => return, 150 }; 151 152 for c in 0..usize::from(src_ssa.comps()) { 153 let entry = match self.get_copy(&src_ssa[c]) { 154 Some(e) => e, 155 None => return, 156 }; 157 158 match entry.src.src_ref { 159 SrcRef::Zero | SrcRef::Imm32(0) => (), 160 _ => return, 161 } 162 } 163 164 // If we got here, all the components are zero 165 src.src_ref = SrcRef::Zero; 166 } 167 } 168 prop_to_scalar_src(&self, src_type: SrcType, src: &mut Src)169 fn prop_to_scalar_src(&self, src_type: SrcType, src: &mut Src) { 170 loop { 171 let src_ssa = match &src.src_ref { 172 SrcRef::SSA(ssa) => ssa, 173 _ => return, 174 }; 175 176 assert!(src_ssa.comps() == 1); 177 let entry = match self.get_copy(&src_ssa[0]) { 178 Some(e) => e, 179 None => return, 180 }; 181 182 // If there are modifiers, the source types have to match 183 if !entry.src.src_mod.is_none() && entry.src_type != src_type { 184 return; 185 } 186 187 src.src_ref = entry.src.src_ref; 188 src.src_mod = entry.src.src_mod.modify(src.src_mod); 189 } 190 } 191 prop_to_f64_src(&self, src: &mut Src)192 fn prop_to_f64_src(&self, src: &mut Src) { 193 loop { 194 let src_ssa = match &mut src.src_ref { 195 SrcRef::SSA(ssa) => ssa, 196 _ => return, 197 }; 198 199 assert!(src_ssa.comps() == 2); 200 201 // First, try to propagate the two halves individually. Source 202 // modifiers only apply to the high 32 bits so we have to reject 203 // any copies with source modifiers in the low bits and apply 204 // source modifiers as needed when propagating the high bits. 205 let lo_entry_or_none = self.get_copy(&src_ssa[0]); 206 if let Some(lo_entry) = lo_entry_or_none { 207 if lo_entry.src.src_mod.is_none() { 208 if let SrcRef::SSA(lo_entry_ssa) = lo_entry.src.src_ref { 209 src_ssa[0] = lo_entry_ssa[0]; 210 continue; 211 } 212 } 213 } 214 215 let hi_entry_or_none = self.get_copy(&src_ssa[1]); 216 if let Some(hi_entry) = hi_entry_or_none { 217 if hi_entry.src.src_mod.is_none() 218 || hi_entry.src_type == SrcType::F64 219 { 220 if let SrcRef::SSA(hi_entry_ssa) = hi_entry.src.src_ref { 221 src_ssa[1] = hi_entry_ssa[0]; 222 src.src_mod = hi_entry.src.src_mod.modify(src.src_mod); 223 continue; 224 } 225 } 226 } 227 228 let Some(lo_entry) = lo_entry_or_none else { 229 return; 230 }; 231 232 let Some(hi_entry) = hi_entry_or_none else { 233 return; 234 }; 235 236 if !lo_entry.src.src_mod.is_none() { 237 return; 238 } 239 240 if !hi_entry.src.src_mod.is_none() 241 && hi_entry.src_type != SrcType::F64 242 { 243 return; 244 } 245 246 let new_src_ref = match hi_entry.src.src_ref { 247 SrcRef::Zero => match lo_entry.src.src_ref { 248 SrcRef::Zero | SrcRef::Imm32(0) => SrcRef::Zero, 249 _ => return, 250 }, 251 SrcRef::Imm32(i) => { 252 // 32-bit immediates for f64 srouces are the top 32 bits 253 // with zero in the lower 32. 254 match lo_entry.src.src_ref { 255 SrcRef::Zero | SrcRef::Imm32(0) => SrcRef::Imm32(i), 256 _ => return, 257 } 258 } 259 SrcRef::CBuf(hi_cb) => match lo_entry.src.src_ref { 260 SrcRef::CBuf(lo_cb) => { 261 if hi_cb.buf != lo_cb.buf { 262 return; 263 } 264 if lo_cb.offset % 8 != 0 { 265 return; 266 } 267 if hi_cb.offset != lo_cb.offset + 4 { 268 return; 269 } 270 SrcRef::CBuf(lo_cb) 271 } 272 _ => return, 273 }, 274 // SrcRef::SSA is already handled above 275 _ => return, 276 }; 277 278 src.src_ref = new_src_ref; 279 src.src_mod = hi_entry.src.src_mod.modify(src.src_mod); 280 } 281 } 282 prop_to_src(&self, src_type: SrcType, src: &mut Src)283 fn prop_to_src(&self, src_type: SrcType, src: &mut Src) { 284 match src_type { 285 SrcType::SSA => { 286 self.prop_to_ssa_src(src); 287 } 288 SrcType::GPR => { 289 self.prop_to_gpr_src(src); 290 } 291 SrcType::ALU 292 | SrcType::F32 293 | SrcType::I32 294 | SrcType::B32 295 | SrcType::Pred => { 296 self.prop_to_scalar_src(src_type, src); 297 } 298 SrcType::F64 => { 299 self.prop_to_f64_src(src); 300 } 301 SrcType::Bar => (), 302 } 303 } 304 try_add_instr(&mut self, instr: &Instr)305 fn try_add_instr(&mut self, instr: &Instr) { 306 match &instr.op { 307 Op::FAdd(add) => { 308 let dst = add.dst.as_ssa().unwrap(); 309 assert!(dst.comps() == 1); 310 let dst = dst[0]; 311 312 if !add.saturate { 313 if add.srcs[0].is_fneg_zero(SrcType::F32) { 314 self.add_copy(dst, SrcType::F32, add.srcs[1]); 315 } else if add.srcs[1].is_fneg_zero(SrcType::F32) { 316 self.add_copy(dst, SrcType::F32, add.srcs[0]); 317 } 318 } 319 } 320 Op::DAdd(add) => { 321 let dst = add.dst.as_ssa().unwrap(); 322 if add.srcs[0].is_fneg_zero(SrcType::F64) { 323 self.add_fp64_copy(dst, add.srcs[1]); 324 } else if add.srcs[1].is_fneg_zero(SrcType::F64) { 325 self.add_fp64_copy(dst, add.srcs[0]); 326 } 327 } 328 Op::Lop3(lop) => { 329 let dst = lop.dst.as_ssa().unwrap(); 330 assert!(dst.comps() == 1); 331 let dst = dst[0]; 332 333 let op = lop.op; 334 if op.lut == 0 { 335 self.add_copy(dst, SrcType::ALU, SrcRef::Zero.into()); 336 } else if op.lut == !0 { 337 self.add_copy( 338 dst, 339 SrcType::ALU, 340 SrcRef::Imm32(u32::MAX).into(), 341 ); 342 } else { 343 for s in 0..3 { 344 if op.lut == LogicOp3::SRC_MASKS[s] { 345 self.add_copy(dst, SrcType::ALU, lop.srcs[s]); 346 } 347 } 348 } 349 } 350 Op::PLop3(lop) => { 351 for i in 0..2 { 352 let dst = match lop.dsts[i] { 353 Dst::SSA(vec) => { 354 assert!(vec.comps() == 1); 355 vec[0] 356 } 357 _ => continue, 358 }; 359 360 let op = lop.ops[i]; 361 if op.lut == 0 { 362 self.add_copy(dst, SrcType::Pred, SrcRef::False.into()); 363 } else if op.lut == !0 { 364 self.add_copy(dst, SrcType::Pred, SrcRef::True.into()); 365 } else { 366 for s in 0..3 { 367 if op.lut == LogicOp3::SRC_MASKS[s] { 368 self.add_copy(dst, SrcType::Pred, lop.srcs[s]); 369 } else if op.lut == !LogicOp3::SRC_MASKS[s] { 370 self.add_copy( 371 dst, 372 SrcType::Pred, 373 lop.srcs[s].bnot(), 374 ); 375 } 376 } 377 } 378 } 379 } 380 Op::INeg(neg) => { 381 let dst = neg.dst.as_ssa().unwrap(); 382 assert!(dst.comps() == 1); 383 self.add_copy(dst[0], SrcType::I32, neg.src.ineg()); 384 } 385 Op::Prmt(prmt) => { 386 let dst = prmt.dst.as_ssa().unwrap(); 387 assert!(dst.comps() == 1); 388 if prmt.mode != PrmtMode::Index { 389 return; 390 } 391 let SrcRef::Imm32(sel) = prmt.sel.src_ref else { 392 return; 393 }; 394 395 if sel == 0x3210 { 396 self.add_copy(dst[0], SrcType::GPR, prmt.srcs[0]); 397 } else if sel == 0x7654 { 398 self.add_copy(dst[0], SrcType::GPR, prmt.srcs[1]); 399 } else { 400 let mut is_imm = true; 401 let mut imm = 0_u32; 402 for d in 0..4 { 403 let s = ((sel >> d * 4) & 0x7) as usize; 404 let sign = (sel >> d * 4) & 0x8 != 0; 405 if let Some(u) = prmt.srcs[s / 4].as_u32() { 406 let mut sb = (u >> (s * 8)) as u8; 407 if sign { 408 sb = ((sb as i8) >> 7) as u8; 409 } 410 imm |= (sb as u32) << (d * 8); 411 } else { 412 is_imm = false; 413 break; 414 } 415 } 416 if is_imm { 417 self.add_copy(dst[0], SrcType::GPR, imm.into()); 418 } 419 } 420 } 421 Op::Copy(copy) => { 422 let dst = copy.dst.as_ssa().unwrap(); 423 assert!(dst.comps() == 1); 424 self.add_copy(dst[0], SrcType::GPR, copy.src); 425 } 426 Op::ParCopy(pcopy) => { 427 for (dst, src) in pcopy.dsts_srcs.iter() { 428 let dst = dst.as_ssa().unwrap(); 429 assert!(dst.comps() == 1); 430 self.add_copy(dst[0], SrcType::GPR, *src); 431 } 432 } 433 _ => (), 434 } 435 } 436 run(&mut self, f: &mut Function)437 pub fn run(&mut self, f: &mut Function) { 438 for b in &mut f.blocks { 439 for instr in &mut b.instrs { 440 self.try_add_instr(instr); 441 442 self.prop_to_pred(&mut instr.pred); 443 444 let src_types = instr.src_types(); 445 for (i, src) in instr.srcs_mut().iter_mut().enumerate() { 446 self.prop_to_src(src_types[i], src); 447 } 448 } 449 } 450 } 451 } 452 453 impl Shader { opt_copy_prop(&mut self)454 pub fn opt_copy_prop(&mut self) { 455 for f in &mut self.functions { 456 CopyPropPass::new().run(f); 457 } 458 } 459 } 460