1 // Copyright © 2022 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3
4 use crate::api::{GetDebugFlags, DEBUG};
5 use crate::ir::*;
6 use crate::liveness::{BlockLiveness, Liveness, SimpleLiveness};
7 use crate::union_find::UnionFind;
8
9 use compiler::bitset::BitSet;
10 use std::cmp::{max, min, Ordering};
11 use std::collections::{HashMap, HashSet};
12
13 struct KillSet {
14 set: HashSet<SSAValue>,
15 vec: Vec<SSAValue>,
16 }
17
18 impl KillSet {
new() -> KillSet19 pub fn new() -> KillSet {
20 KillSet {
21 set: HashSet::new(),
22 vec: Vec::new(),
23 }
24 }
25
len(&self) -> usize26 pub fn len(&self) -> usize {
27 self.vec.len()
28 }
29
clear(&mut self)30 pub fn clear(&mut self) {
31 self.set.clear();
32 self.vec.clear();
33 }
34
insert(&mut self, ssa: SSAValue)35 pub fn insert(&mut self, ssa: SSAValue) {
36 if self.set.insert(ssa) {
37 self.vec.push(ssa);
38 }
39 }
40
iter(&self) -> std::slice::Iter<'_, SSAValue>41 pub fn iter(&self) -> std::slice::Iter<'_, SSAValue> {
42 self.vec.iter()
43 }
44
is_empty(&self) -> bool45 pub fn is_empty(&self) -> bool {
46 self.vec.is_empty()
47 }
48 }
49
50 // These two helpers are carefully paired for the purposes of RA.
51 // src_ssa_ref() returns whatever SSARef is present in the source, if any.
52 // src_set_reg() overwrites that SSARef with a RegRef.
53 #[inline]
src_ssa_ref(src: &Src) -> Option<&SSARef>54 fn src_ssa_ref(src: &Src) -> Option<&SSARef> {
55 match &src.src_ref {
56 SrcRef::SSA(ssa) => Some(ssa),
57 SrcRef::CBuf(CBufRef {
58 buf: CBuf::BindlessSSA(ssa),
59 ..
60 }) => Some(ssa),
61 _ => None,
62 }
63 }
64
65 #[inline]
src_set_reg(src: &mut Src, reg: RegRef)66 fn src_set_reg(src: &mut Src, reg: RegRef) {
67 match &mut src.src_ref {
68 SrcRef::SSA(_) => {
69 src.src_ref = reg.into();
70 }
71 SrcRef::CBuf(cb) => {
72 debug_assert!(matches!(&cb.buf, CBuf::BindlessSSA(_)));
73 debug_assert!(reg.file() == RegFile::UGPR && reg.comps() == 2);
74 cb.buf = CBuf::BindlessUGPR(reg);
75 }
76 _ => (),
77 }
78 }
79
80 enum SSAUse {
81 FixedReg(u32),
82 Vec(SSARef),
83 }
84
85 struct SSAUseMap {
86 ssa_map: HashMap<SSAValue, Vec<(usize, SSAUse)>>,
87 }
88
89 impl SSAUseMap {
add_fixed_reg_use(&mut self, ip: usize, ssa: SSAValue, reg: u32)90 fn add_fixed_reg_use(&mut self, ip: usize, ssa: SSAValue, reg: u32) {
91 let v = self.ssa_map.entry(ssa).or_default();
92 v.push((ip, SSAUse::FixedReg(reg)));
93 }
94
add_vec_use(&mut self, ip: usize, vec: SSARef)95 fn add_vec_use(&mut self, ip: usize, vec: SSARef) {
96 if vec.comps() == 1 {
97 return;
98 }
99
100 for ssa in vec.iter() {
101 let v = self.ssa_map.entry(*ssa).or_default();
102 v.push((ip, SSAUse::Vec(vec)));
103 }
104 }
105
find_vec_use_after(&self, ssa: SSAValue, ip: usize) -> Option<&SSAUse>106 fn find_vec_use_after(&self, ssa: SSAValue, ip: usize) -> Option<&SSAUse> {
107 if let Some(v) = self.ssa_map.get(&ssa) {
108 let p = v.partition_point(|(uip, _)| *uip <= ip);
109 if p == v.len() {
110 None
111 } else {
112 let (_, u) = &v[p];
113 Some(u)
114 }
115 } else {
116 None
117 }
118 }
119
add_block(&mut self, b: &BasicBlock)120 pub fn add_block(&mut self, b: &BasicBlock) {
121 for (ip, instr) in b.instrs.iter().enumerate() {
122 match &instr.op {
123 Op::RegOut(op) => {
124 for (i, src) in op.srcs.iter().enumerate() {
125 let out_reg = u32::try_from(i).unwrap();
126 if let Some(ssa) = src_ssa_ref(src) {
127 assert!(ssa.comps() == 1);
128 self.add_fixed_reg_use(ip, ssa[0], out_reg);
129 }
130 }
131 }
132 _ => {
133 // We don't care about predicates because they're scalar
134 for src in instr.srcs() {
135 if let Some(ssa) = src_ssa_ref(src) {
136 self.add_vec_use(ip, *ssa);
137 }
138 }
139 }
140 }
141 }
142 }
143
for_block(b: &BasicBlock) -> SSAUseMap144 pub fn for_block(b: &BasicBlock) -> SSAUseMap {
145 let mut am = SSAUseMap {
146 ssa_map: HashMap::new(),
147 };
148 am.add_block(b);
149 am
150 }
151 }
152
153 /// Tracks the most recent register assigned to a given phi web
154 ///
155 /// During register assignment, we then try to assign this register
156 /// to the next SSAValue in the same web.
157 ///
158 /// This heuristic is inspired by the "Aggressive pre-coalescing" described in
159 /// section 4 of Colombet et al 2011.
160 ///
161 /// Q. Colombet, B. Boissinot, P. Brisk, S. Hack and F. Rastello,
162 /// "Graph-coloring and treescan register allocation using repairing," 2011
163 /// Proceedings of the 14th International Conference on Compilers,
164 /// Architectures and Synthesis for Embedded Systems (CASES), Taipei,
165 /// Taiwan, 2011, pp. 45-54, doi: 10.1145/2038698.2038708.
166 struct PhiWebs {
167 uf: UnionFind<SSAValue>,
168 assignments: HashMap<SSAValue, u32>,
169 }
170
171 impl PhiWebs {
new(f: &Function) -> Self172 pub fn new(f: &Function) -> Self {
173 let mut uf = UnionFind::new();
174
175 // Populate uf with phi equivalence classes
176 //
177 // Note that we intentionally don't pay attention to move instructions
178 // below - the assumption is that any move instructions at this point
179 // were inserted by cssa-conversion and will hurt the coalescing
180 for b_idx in 0..f.blocks.len() {
181 let Some(phi_dsts) = f.blocks[b_idx].phi_dsts() else {
182 continue;
183 };
184 let dsts: HashMap<u32, &SSARef> = phi_dsts
185 .dsts
186 .iter()
187 .map(|(idx, dst)| {
188 let ssa_ref = dst.as_ssa().expect("Expected ssa form");
189 (*idx, ssa_ref)
190 })
191 .collect();
192
193 for pred_idx in f.blocks.pred_indices(b_idx) {
194 let phi_srcs =
195 f.blocks[*pred_idx].phi_srcs().expect("Missing phi_srcs");
196 for (src_idx, src) in phi_srcs.srcs.iter() {
197 let a = src.as_ssa().expect("Expected ssa form");
198 let b = dsts[src_idx];
199
200 assert_eq!(a.comps(), 1);
201 assert_eq!(b.comps(), 1);
202
203 uf.union(a[0], b[0]);
204 }
205 }
206 }
207
208 PhiWebs {
209 uf,
210 assignments: HashMap::new(),
211 }
212 }
213
get(&mut self, ssa: SSAValue) -> Option<u32>214 pub fn get(&mut self, ssa: SSAValue) -> Option<u32> {
215 let phi_web_id = self.uf.find(ssa);
216 self.assignments.get(&phi_web_id).copied()
217 }
218
set(&mut self, ssa: SSAValue, reg: u32)219 pub fn set(&mut self, ssa: SSAValue, reg: u32) {
220 let phi_web_id = self.uf.find(ssa);
221 self.assignments.insert(phi_web_id, reg);
222 }
223 }
224
225 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
226 enum LiveRef {
227 SSA(SSAValue),
228 Phi(u32),
229 }
230
231 #[derive(Clone, Copy, Eq, Hash, PartialEq)]
232 struct LiveValue {
233 pub live_ref: LiveRef,
234 pub reg_ref: RegRef,
235 }
236
237 // We need a stable ordering of live values so that RA is deterministic
238 impl Ord for LiveValue {
cmp(&self, other: &Self) -> Ordering239 fn cmp(&self, other: &Self) -> Ordering {
240 let s_file = u8::from(self.reg_ref.file());
241 let o_file = u8::from(other.reg_ref.file());
242 match s_file.cmp(&o_file) {
243 Ordering::Equal => {
244 let s_idx = self.reg_ref.base_idx();
245 let o_idx = other.reg_ref.base_idx();
246 s_idx.cmp(&o_idx)
247 }
248 ord => ord,
249 }
250 }
251 }
252
253 impl PartialOrd for LiveValue {
partial_cmp(&self, other: &Self) -> Option<Ordering>254 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
255 Some(self.cmp(other))
256 }
257 }
258
259 #[derive(Clone)]
260 struct RegAllocator {
261 file: RegFile,
262 num_regs: u32,
263 used: BitSet,
264 pinned: BitSet,
265 reg_ssa: Vec<SSAValue>,
266 ssa_reg: HashMap<SSAValue, u32>,
267 }
268
269 impl RegAllocator {
new(file: RegFile, num_regs: u32) -> Self270 pub fn new(file: RegFile, num_regs: u32) -> Self {
271 Self {
272 file: file,
273 num_regs: num_regs,
274 used: BitSet::new(),
275 pinned: BitSet::new(),
276 reg_ssa: Vec::new(),
277 ssa_reg: HashMap::new(),
278 }
279 }
280
file(&self) -> RegFile281 fn file(&self) -> RegFile {
282 self.file
283 }
284
num_regs_used(&self) -> u32285 pub fn num_regs_used(&self) -> u32 {
286 self.ssa_reg.len().try_into().unwrap()
287 }
288
reg_is_used(&self, reg: u32) -> bool289 pub fn reg_is_used(&self, reg: u32) -> bool {
290 self.used.get(reg.try_into().unwrap())
291 }
292
reg_is_pinned(&self, reg: u32) -> bool293 pub fn reg_is_pinned(&self, reg: u32) -> bool {
294 self.pinned.get(reg.try_into().unwrap())
295 }
296
try_get_reg(&self, ssa: SSAValue) -> Option<u32>297 pub fn try_get_reg(&self, ssa: SSAValue) -> Option<u32> {
298 self.ssa_reg.get(&ssa).cloned()
299 }
300
try_get_ssa(&self, reg: u32) -> Option<SSAValue>301 pub fn try_get_ssa(&self, reg: u32) -> Option<SSAValue> {
302 if self.reg_is_used(reg) {
303 Some(self.reg_ssa[usize::try_from(reg).unwrap()])
304 } else {
305 None
306 }
307 }
308
try_get_vec_reg(&self, vec: &SSARef) -> Option<u32>309 pub fn try_get_vec_reg(&self, vec: &SSARef) -> Option<u32> {
310 let Some(reg) = self.try_get_reg(vec[0]) else {
311 return None;
312 };
313
314 let align = u32::from(vec.comps()).next_power_of_two();
315 if reg % align != 0 {
316 return None;
317 }
318
319 for c in 1..vec.comps() {
320 let ssa = vec[usize::from(c)];
321 if self.try_get_reg(ssa) != Some(reg + u32::from(c)) {
322 return None;
323 }
324 }
325 Some(reg)
326 }
327
free_ssa(&mut self, ssa: SSAValue) -> u32328 pub fn free_ssa(&mut self, ssa: SSAValue) -> u32 {
329 assert!(ssa.file() == self.file);
330 let reg = self.ssa_reg.remove(&ssa).unwrap();
331 assert!(self.reg_is_used(reg));
332 let reg_usize = usize::try_from(reg).unwrap();
333 assert!(self.reg_ssa[reg_usize] == ssa);
334 self.used.remove(reg_usize);
335 self.pinned.remove(reg_usize);
336 reg
337 }
338
assign_reg(&mut self, ssa: SSAValue, reg: u32)339 pub fn assign_reg(&mut self, ssa: SSAValue, reg: u32) {
340 assert!(ssa.file() == self.file);
341 assert!(reg < self.num_regs);
342 assert!(!self.reg_is_used(reg));
343
344 let reg_usize = usize::try_from(reg).unwrap();
345 if reg_usize >= self.reg_ssa.len() {
346 self.reg_ssa.resize(reg_usize + 1, SSAValue::NONE);
347 }
348 self.reg_ssa[reg_usize] = ssa;
349 let old = self.ssa_reg.insert(ssa, reg);
350 assert!(old.is_none());
351 self.used.insert(reg_usize);
352 }
353
pin_reg(&mut self, reg: u32)354 pub fn pin_reg(&mut self, reg: u32) {
355 assert!(self.reg_is_used(reg));
356 self.pinned.insert(reg.try_into().unwrap());
357 }
358
reg_range_is_unset(set: &BitSet, reg: u32, comps: u8) -> bool359 fn reg_range_is_unset(set: &BitSet, reg: u32, comps: u8) -> bool {
360 for c in 0..u32::from(comps) {
361 if set.get((reg + c).try_into().unwrap()) {
362 return false;
363 }
364 }
365 true
366 }
367
try_find_unset_reg_range( &self, set: &BitSet, start_reg: u32, align: u32, comps: u8, ) -> Option<u32>368 fn try_find_unset_reg_range(
369 &self,
370 set: &BitSet,
371 start_reg: u32,
372 align: u32,
373 comps: u8,
374 ) -> Option<u32> {
375 assert!(comps > 0 && u32::from(comps) <= self.num_regs);
376
377 let mut next_reg = start_reg;
378 loop {
379 let reg: u32 = set
380 .next_unset(usize::try_from(next_reg).unwrap())
381 .try_into()
382 .unwrap();
383
384 // Ensure we're properly aligned
385 let reg = reg.next_multiple_of(align);
386
387 // Ensure we're in-bounds. This also serves as a check to ensure
388 // that u8::try_from(reg + i) will succeed.
389 if reg > self.num_regs - u32::from(comps) {
390 return None;
391 }
392
393 if Self::reg_range_is_unset(set, reg, comps) {
394 return Some(reg);
395 }
396
397 next_reg = reg + align;
398 }
399 }
400
try_find_unused_reg_range( &self, start_reg: u32, align: u32, comps: u8, ) -> Option<u32>401 pub fn try_find_unused_reg_range(
402 &self,
403 start_reg: u32,
404 align: u32,
405 comps: u8,
406 ) -> Option<u32> {
407 self.try_find_unset_reg_range(&self.used, start_reg, align, comps)
408 }
409
alloc_scalar( &mut self, ip: usize, sum: &SSAUseMap, phi_webs: &mut PhiWebs, ssa: SSAValue, ) -> u32410 pub fn alloc_scalar(
411 &mut self,
412 ip: usize,
413 sum: &SSAUseMap,
414 phi_webs: &mut PhiWebs,
415 ssa: SSAValue,
416 ) -> u32 {
417 // Bias register assignment using the phi coalescing
418 if let Some(reg) = phi_webs.get(ssa) {
419 if !self.reg_is_used(reg) {
420 self.assign_reg(ssa, reg);
421 return reg;
422 }
423 }
424
425 // Otherwise, use SSAUseMap heuristics
426 if let Some(u) = sum.find_vec_use_after(ssa, ip) {
427 match u {
428 SSAUse::FixedReg(reg) => {
429 if !self.reg_is_used(*reg) {
430 self.assign_reg(ssa, *reg);
431 return *reg;
432 }
433 }
434 SSAUse::Vec(vec) => {
435 let mut comp = u8::MAX;
436 for c in 0..vec.comps() {
437 if vec[usize::from(c)] == ssa {
438 comp = c;
439 break;
440 }
441 }
442 assert!(comp < vec.comps());
443
444 let align = u32::from(vec.comps()).next_power_of_two();
445 for c in 0..vec.comps() {
446 if c == comp {
447 continue;
448 }
449
450 let other = vec[usize::from(c)];
451 let Some(other_reg) = self.try_get_reg(other) else {
452 continue;
453 };
454
455 let vec_reg = other_reg & !(align - 1);
456 if other_reg != vec_reg + u32::from(c) {
457 continue;
458 }
459
460 let reg = vec_reg + u32::from(comp);
461 if reg < self.num_regs && !self.reg_is_used(reg) {
462 self.assign_reg(ssa, reg);
463 return reg;
464 }
465 }
466
467 // We weren't able to pair it with an already allocated
468 // register but maybe we can at least find an aligned one.
469 if let Some(reg) =
470 self.try_find_unused_reg_range(0, align, 1)
471 {
472 self.assign_reg(ssa, reg);
473 return reg;
474 }
475 }
476 }
477 }
478
479 let reg = self
480 .try_find_unused_reg_range(0, 1, 1)
481 .expect("Failed to find free register");
482 self.assign_reg(ssa, reg);
483 reg
484 }
485 }
486
487 struct VecRegAllocator<'a> {
488 ra: &'a mut RegAllocator,
489 pcopy: OpParCopy,
490 pinned: BitSet,
491 evicted: HashMap<SSAValue, u32>,
492 }
493
494 impl<'a> VecRegAllocator<'a> {
new(ra: &'a mut RegAllocator) -> Self495 fn new(ra: &'a mut RegAllocator) -> Self {
496 let pinned = ra.pinned.clone();
497 VecRegAllocator {
498 ra,
499 pcopy: OpParCopy::new(),
500 pinned,
501 evicted: HashMap::new(),
502 }
503 }
504
file(&self) -> RegFile505 fn file(&self) -> RegFile {
506 self.ra.file()
507 }
508
pin_reg(&mut self, reg: u32)509 fn pin_reg(&mut self, reg: u32) {
510 self.pinned.insert(reg.try_into().unwrap());
511 }
512
pin_reg_range(&mut self, reg: u32, comps: u8)513 fn pin_reg_range(&mut self, reg: u32, comps: u8) {
514 for c in 0..u32::from(comps) {
515 self.pin_reg(reg + c);
516 }
517 }
518
reg_is_pinned(&self, reg: u32) -> bool519 fn reg_is_pinned(&self, reg: u32) -> bool {
520 self.pinned.get(reg.try_into().unwrap())
521 }
522
reg_range_is_unpinned(&self, reg: u32, comps: u8) -> bool523 fn reg_range_is_unpinned(&self, reg: u32, comps: u8) -> bool {
524 RegAllocator::reg_range_is_unset(&self.pinned, reg, comps)
525 }
526
assign_pin_reg(&mut self, ssa: SSAValue, reg: u32) -> RegRef527 fn assign_pin_reg(&mut self, ssa: SSAValue, reg: u32) -> RegRef {
528 self.pin_reg(reg);
529 self.ra.assign_reg(ssa, reg);
530 RegRef::new(self.file(), reg, 1)
531 }
532
assign_pin_vec_reg(&mut self, vec: SSARef, reg: u32) -> RegRef533 pub fn assign_pin_vec_reg(&mut self, vec: SSARef, reg: u32) -> RegRef {
534 for c in 0..vec.comps() {
535 let ssa = vec[usize::from(c)];
536 self.assign_pin_reg(ssa, reg + u32::from(c));
537 }
538 RegRef::new(self.file(), reg, vec.comps())
539 }
540
try_find_unpinned_reg_range( &self, start_reg: u32, align: u32, comps: u8, ) -> Option<u32>541 fn try_find_unpinned_reg_range(
542 &self,
543 start_reg: u32,
544 align: u32,
545 comps: u8,
546 ) -> Option<u32> {
547 self.ra
548 .try_find_unset_reg_range(&self.pinned, start_reg, align, comps)
549 }
550
evict_ssa(&mut self, ssa: SSAValue, old_reg: u32)551 pub fn evict_ssa(&mut self, ssa: SSAValue, old_reg: u32) {
552 assert!(ssa.file() == self.file());
553 assert!(!self.reg_is_pinned(old_reg));
554 self.evicted.insert(ssa, old_reg);
555 }
556
evict_reg_if_used(&mut self, reg: u32)557 pub fn evict_reg_if_used(&mut self, reg: u32) {
558 assert!(!self.reg_is_pinned(reg));
559
560 if let Some(ssa) = self.ra.try_get_ssa(reg) {
561 self.ra.free_ssa(ssa);
562 self.evict_ssa(ssa, reg);
563 }
564 }
565
move_ssa_to_reg(&mut self, ssa: SSAValue, new_reg: u32)566 fn move_ssa_to_reg(&mut self, ssa: SSAValue, new_reg: u32) {
567 if let Some(old_reg) = self.ra.try_get_reg(ssa) {
568 assert!(self.evicted.get(&ssa).is_none());
569 assert!(!self.reg_is_pinned(old_reg));
570
571 if new_reg == old_reg {
572 self.pin_reg(new_reg);
573 } else {
574 self.ra.free_ssa(ssa);
575 self.evict_reg_if_used(new_reg);
576
577 self.pcopy.push(
578 RegRef::new(self.file(), new_reg, 1).into(),
579 RegRef::new(self.file(), old_reg, 1).into(),
580 );
581
582 self.assign_pin_reg(ssa, new_reg);
583 }
584 } else if let Some(old_reg) = self.evicted.remove(&ssa) {
585 self.evict_reg_if_used(new_reg);
586
587 self.pcopy.push(
588 RegRef::new(self.file(), new_reg, 1).into(),
589 RegRef::new(self.file(), old_reg, 1).into(),
590 );
591
592 self.assign_pin_reg(ssa, new_reg);
593 } else {
594 panic!("Unknown SSA value");
595 }
596 }
597
finish(mut self, pcopy: &mut OpParCopy)598 fn finish(mut self, pcopy: &mut OpParCopy) {
599 pcopy.dsts_srcs.append(&mut self.pcopy.dsts_srcs);
600
601 if !self.evicted.is_empty() {
602 // Sort so we get determinism, even if the hash map order changes
603 // from one run to another or due to rust compiler updates.
604 let mut evicted: Vec<_> = self.evicted.drain().collect();
605 evicted.sort_by_key(|(_, reg)| *reg);
606
607 for (ssa, old_reg) in evicted {
608 let mut next_reg = 0;
609 let new_reg = loop {
610 let reg = self
611 .ra
612 .try_find_unused_reg_range(next_reg, 1, 1)
613 .expect("Failed to find free register");
614 if !self.reg_is_pinned(reg) {
615 break reg;
616 }
617 next_reg = reg + 1;
618 };
619
620 pcopy.push(
621 RegRef::new(self.file(), new_reg, 1).into(),
622 RegRef::new(self.file(), old_reg, 1).into(),
623 );
624 self.assign_pin_reg(ssa, new_reg);
625 }
626 }
627 }
628
try_get_vec_reg(&self, vec: &SSARef) -> Option<u32>629 pub fn try_get_vec_reg(&self, vec: &SSARef) -> Option<u32> {
630 self.ra.try_get_vec_reg(vec)
631 }
632
collect_vector(&mut self, vec: &SSARef) -> RegRef633 pub fn collect_vector(&mut self, vec: &SSARef) -> RegRef {
634 if let Some(reg) = self.try_get_vec_reg(vec) {
635 self.pin_reg_range(reg, vec.comps());
636 return RegRef::new(self.file(), reg, vec.comps());
637 }
638
639 let comps = vec.comps();
640 let align = u32::from(comps).next_power_of_two();
641
642 let reg = self
643 .ra
644 .try_find_unused_reg_range(0, align, comps)
645 .or_else(|| {
646 for c in 0..comps {
647 let ssa = vec[usize::from(c)];
648 let Some(comp_reg) = self.ra.try_get_reg(ssa) else {
649 continue;
650 };
651
652 let vec_reg = comp_reg & !(align - 1);
653 if comp_reg != vec_reg + u32::from(c) {
654 continue;
655 }
656
657 if vec_reg + u32::from(comps) > self.ra.num_regs {
658 continue;
659 }
660
661 if self.reg_range_is_unpinned(vec_reg, comps) {
662 return Some(vec_reg);
663 }
664 }
665 None
666 })
667 .or_else(|| self.try_find_unpinned_reg_range(0, align, comps))
668 .expect("Failed to find an unpinned register range");
669
670 for c in 0..comps {
671 let ssa = vec[usize::from(c)];
672 self.move_ssa_to_reg(ssa, reg + u32::from(c));
673 }
674
675 RegRef::new(self.file(), reg, comps)
676 }
677
alloc_vector(&mut self, vec: SSARef) -> RegRef678 pub fn alloc_vector(&mut self, vec: SSARef) -> RegRef {
679 let comps = vec.comps();
680 let align = u32::from(comps).next_power_of_two();
681
682 if let Some(reg) = self.ra.try_find_unused_reg_range(0, align, comps) {
683 return self.assign_pin_vec_reg(vec, reg);
684 }
685
686 let reg = self
687 .try_find_unpinned_reg_range(0, align, comps)
688 .expect("Failed to find an unpinned register range");
689
690 for c in 0..comps {
691 self.evict_reg_if_used(reg + u32::from(c));
692 }
693 self.assign_pin_vec_reg(vec, reg)
694 }
695
free_killed(&mut self, killed: &KillSet)696 pub fn free_killed(&mut self, killed: &KillSet) {
697 for ssa in killed.iter() {
698 if ssa.file() == self.file() {
699 self.ra.free_ssa(*ssa);
700 }
701 }
702 }
703 }
704
705 impl Drop for VecRegAllocator<'_> {
drop(&mut self)706 fn drop(&mut self) {
707 assert!(self.evicted.is_empty());
708 }
709 }
710
instr_remap_srcs_file(instr: &mut Instr, ra: &mut VecRegAllocator)711 fn instr_remap_srcs_file(instr: &mut Instr, ra: &mut VecRegAllocator) {
712 // Collect vector sources first since those may silently pin some of our
713 // scalar sources.
714 for src in instr.srcs_mut() {
715 if let Some(ssa) = src_ssa_ref(src) {
716 if ssa.file().unwrap() == ra.file() && ssa.comps() > 1 {
717 let reg = ra.collect_vector(ssa);
718 src_set_reg(src, reg);
719 }
720 }
721 }
722
723 if let PredRef::SSA(pred) = instr.pred.pred_ref {
724 if pred.file() == ra.file() {
725 instr.pred.pred_ref = ra.collect_vector(&pred.into()).into();
726 }
727 }
728
729 for src in instr.srcs_mut() {
730 if let Some(ssa) = src_ssa_ref(src) {
731 if ssa.file().unwrap() == ra.file() && ssa.comps() == 1 {
732 let reg = ra.collect_vector(ssa);
733 src_set_reg(src, reg);
734 }
735 }
736 }
737 }
738
instr_alloc_scalar_dsts_file( instr: &mut Instr, ip: usize, sum: &SSAUseMap, phi_webs: &mut PhiWebs, ra: &mut RegAllocator, )739 fn instr_alloc_scalar_dsts_file(
740 instr: &mut Instr,
741 ip: usize,
742 sum: &SSAUseMap,
743 phi_webs: &mut PhiWebs,
744 ra: &mut RegAllocator,
745 ) {
746 for dst in instr.dsts_mut() {
747 if let Dst::SSA(ssa) = dst {
748 if ssa.file().unwrap() == ra.file() {
749 assert!(ssa.comps() == 1);
750 let reg = ra.alloc_scalar(ip, sum, phi_webs, ssa[0]);
751 *dst = RegRef::new(ra.file(), reg, 1).into();
752 }
753 }
754 }
755 }
756
instr_assign_regs_file( instr: &mut Instr, ip: usize, sum: &SSAUseMap, phi_webs: &mut PhiWebs, killed: &KillSet, pcopy: &mut OpParCopy, ra: &mut RegAllocator, )757 fn instr_assign_regs_file(
758 instr: &mut Instr,
759 ip: usize,
760 sum: &SSAUseMap,
761 phi_webs: &mut PhiWebs,
762 killed: &KillSet,
763 pcopy: &mut OpParCopy,
764 ra: &mut RegAllocator,
765 ) {
766 struct VecDst {
767 dst_idx: usize,
768 comps: u8,
769 killed: Option<SSARef>,
770 reg: u32,
771 }
772
773 let mut vec_dsts = Vec::new();
774 let mut vec_dst_comps = 0;
775 for (i, dst) in instr.dsts().iter().enumerate() {
776 if let Dst::SSA(ssa) = dst {
777 if ssa.file().unwrap() == ra.file() && ssa.comps() > 1 {
778 vec_dsts.push(VecDst {
779 dst_idx: i,
780 comps: ssa.comps(),
781 killed: None,
782 reg: u32::MAX,
783 });
784 vec_dst_comps += ssa.comps();
785 }
786 }
787 }
788
789 // No vector destinations is the easy case
790 if vec_dst_comps == 0 {
791 let mut vra = VecRegAllocator::new(ra);
792 instr_remap_srcs_file(instr, &mut vra);
793 vra.free_killed(killed);
794 vra.finish(pcopy);
795 instr_alloc_scalar_dsts_file(instr, ip, sum, phi_webs, ra);
796 return;
797 }
798
799 // Predicates can't be vectors. This lets us ignore instr.pred in our
800 // analysis for the cases below. Only the easy case above needs to care
801 // about them.
802 assert!(!ra.file().is_predicate());
803
804 let mut avail = killed.set.clone();
805 let mut killed_vecs = Vec::new();
806 for src in instr.srcs() {
807 if let Some(vec) = src_ssa_ref(src) {
808 if vec.comps() > 1 {
809 let mut vec_killed = true;
810 for ssa in vec.iter() {
811 if ssa.file() != ra.file() || !avail.contains(ssa) {
812 vec_killed = false;
813 break;
814 }
815 }
816 if vec_killed {
817 for ssa in vec.iter() {
818 avail.remove(ssa);
819 }
820 killed_vecs.push(*vec);
821 }
822 }
823 }
824 }
825
826 vec_dsts.sort_by_key(|v| v.comps);
827 killed_vecs.sort_by_key(|v| v.comps());
828
829 let mut next_dst_reg = 0;
830 let mut vec_dsts_map_to_killed_srcs = true;
831 let mut could_trivially_allocate = true;
832 for vec_dst in vec_dsts.iter_mut().rev() {
833 while let Some(src) = killed_vecs.pop() {
834 if src.comps() >= vec_dst.comps {
835 vec_dst.killed = Some(src);
836 break;
837 }
838 }
839 if vec_dst.killed.is_none() {
840 vec_dsts_map_to_killed_srcs = false;
841 }
842
843 let align = u32::from(vec_dst.comps).next_power_of_two();
844 if let Some(reg) =
845 ra.try_find_unused_reg_range(next_dst_reg, align, vec_dst.comps)
846 {
847 vec_dst.reg = reg;
848 next_dst_reg = reg + u32::from(vec_dst.comps);
849 } else {
850 could_trivially_allocate = false;
851 }
852 }
853
854 if vec_dsts_map_to_killed_srcs {
855 let mut vra = VecRegAllocator::new(ra);
856 instr_remap_srcs_file(instr, &mut vra);
857
858 for vec_dst in &mut vec_dsts {
859 let src_vec = vec_dst.killed.as_ref().unwrap();
860 vec_dst.reg = vra.try_get_vec_reg(src_vec).unwrap();
861 }
862
863 vra.free_killed(killed);
864
865 for vec_dst in vec_dsts {
866 let dst = &mut instr.dsts_mut()[vec_dst.dst_idx];
867 *dst = vra
868 .assign_pin_vec_reg(*dst.as_ssa().unwrap(), vec_dst.reg)
869 .into();
870 }
871
872 vra.finish(pcopy);
873
874 instr_alloc_scalar_dsts_file(instr, ip, sum, phi_webs, ra);
875 } else if could_trivially_allocate {
876 let mut vra = VecRegAllocator::new(ra);
877 for vec_dst in vec_dsts {
878 let dst = &mut instr.dsts_mut()[vec_dst.dst_idx];
879 *dst = vra
880 .assign_pin_vec_reg(*dst.as_ssa().unwrap(), vec_dst.reg)
881 .into();
882 }
883
884 instr_remap_srcs_file(instr, &mut vra);
885 vra.free_killed(killed);
886 vra.finish(pcopy);
887 instr_alloc_scalar_dsts_file(instr, ip, sum, phi_webs, ra);
888 } else {
889 let mut vra = VecRegAllocator::new(ra);
890 instr_remap_srcs_file(instr, &mut vra);
891
892 // Allocate vector destinations first so we have the most freedom.
893 // Scalar destinations can fill in holes.
894 for dst in instr.dsts_mut() {
895 if let Dst::SSA(ssa) = dst {
896 if ssa.file().unwrap() == vra.file() && ssa.comps() > 1 {
897 *dst = vra.alloc_vector(*ssa).into();
898 }
899 }
900 }
901
902 vra.free_killed(killed);
903 vra.finish(pcopy);
904
905 instr_alloc_scalar_dsts_file(instr, ip, sum, phi_webs, ra);
906 }
907 }
908
909 impl PerRegFile<RegAllocator> {
assign_reg(&mut self, ssa: SSAValue, reg: RegRef)910 pub fn assign_reg(&mut self, ssa: SSAValue, reg: RegRef) {
911 assert!(reg.file() == ssa.file());
912 assert!(reg.comps() == 1);
913 self[ssa.file()].assign_reg(ssa, reg.base_idx());
914 }
915
free_killed(&mut self, killed: &KillSet)916 pub fn free_killed(&mut self, killed: &KillSet) {
917 for ssa in killed.iter() {
918 self[ssa.file()].free_ssa(*ssa);
919 }
920 }
921 }
922
923 struct AssignRegsBlock {
924 ra: PerRegFile<RegAllocator>,
925 pcopy_tmp_gprs: u8,
926 live_in: Vec<LiveValue>,
927 phi_out: HashMap<u32, SrcRef>,
928 }
929
930 impl AssignRegsBlock {
new(num_regs: &PerRegFile<u32>, pcopy_tmp_gprs: u8) -> AssignRegsBlock931 fn new(num_regs: &PerRegFile<u32>, pcopy_tmp_gprs: u8) -> AssignRegsBlock {
932 AssignRegsBlock {
933 ra: PerRegFile::new_with(|file| {
934 RegAllocator::new(file, num_regs[file])
935 }),
936 pcopy_tmp_gprs: pcopy_tmp_gprs,
937 live_in: Vec::new(),
938 phi_out: HashMap::new(),
939 }
940 }
941
get_scalar(&self, ssa: SSAValue) -> RegRef942 fn get_scalar(&self, ssa: SSAValue) -> RegRef {
943 let ra = &self.ra[ssa.file()];
944 let reg = ra.try_get_reg(ssa).expect("Unknown SSA value");
945 RegRef::new(ssa.file(), reg, 1)
946 }
947
alloc_scalar( &mut self, ip: usize, sum: &SSAUseMap, phi_webs: &mut PhiWebs, ssa: SSAValue, ) -> RegRef948 fn alloc_scalar(
949 &mut self,
950 ip: usize,
951 sum: &SSAUseMap,
952 phi_webs: &mut PhiWebs,
953 ssa: SSAValue,
954 ) -> RegRef {
955 let ra = &mut self.ra[ssa.file()];
956 let reg = ra.alloc_scalar(ip, sum, phi_webs, ssa);
957 RegRef::new(ssa.file(), reg, 1)
958 }
959
pin_vector(&mut self, reg: RegRef)960 fn pin_vector(&mut self, reg: RegRef) {
961 let ra = &mut self.ra[reg.file()];
962 for c in 0..reg.comps() {
963 ra.pin_reg(reg.comp(c).base_idx());
964 }
965 }
966
try_coalesce(&mut self, ssa: SSAValue, src: &Src) -> bool967 fn try_coalesce(&mut self, ssa: SSAValue, src: &Src) -> bool {
968 debug_assert!(src.src_mod.is_none());
969 let SrcRef::Reg(src_reg) = src.src_ref else {
970 return false;
971 };
972 debug_assert!(src_reg.comps() == 1);
973
974 if src_reg.file() != ssa.file() {
975 return false;
976 }
977
978 let ra = &mut self.ra[src_reg.file()];
979 if ra.reg_is_used(src_reg.base_idx()) {
980 return false;
981 }
982
983 ra.assign_reg(ssa, src_reg.base_idx());
984 true
985 }
986
pcopy_tmp(&self) -> Option<RegRef>987 fn pcopy_tmp(&self) -> Option<RegRef> {
988 if self.pcopy_tmp_gprs > 0 {
989 Some(RegRef::new(
990 RegFile::GPR,
991 self.ra[RegFile::GPR].num_regs,
992 self.pcopy_tmp_gprs,
993 ))
994 } else {
995 None
996 }
997 }
998
assign_regs_instr( &mut self, mut instr: Box<Instr>, ip: usize, sum: &SSAUseMap, phi_webs: &mut PhiWebs, srcs_killed: &KillSet, dsts_killed: &KillSet, pcopy: &mut OpParCopy, ) -> Option<Box<Instr>>999 fn assign_regs_instr(
1000 &mut self,
1001 mut instr: Box<Instr>,
1002 ip: usize,
1003 sum: &SSAUseMap,
1004 phi_webs: &mut PhiWebs,
1005 srcs_killed: &KillSet,
1006 dsts_killed: &KillSet,
1007 pcopy: &mut OpParCopy,
1008 ) -> Option<Box<Instr>> {
1009 match &mut instr.op {
1010 Op::Undef(undef) => {
1011 if let Dst::SSA(ssa) = undef.dst {
1012 assert!(ssa.comps() == 1);
1013 self.alloc_scalar(ip, sum, phi_webs, ssa[0]);
1014 }
1015 assert!(srcs_killed.is_empty());
1016 self.ra.free_killed(dsts_killed);
1017 None
1018 }
1019 Op::PhiSrcs(phi) => {
1020 for (id, src) in phi.srcs.iter() {
1021 assert!(src.src_mod.is_none());
1022 if let Some(ssa) = src_ssa_ref(src) {
1023 assert!(ssa.comps() == 1);
1024 let reg = self.get_scalar(ssa[0]);
1025 self.phi_out.insert(*id, reg.into());
1026 } else {
1027 self.phi_out.insert(*id, src.src_ref);
1028 }
1029 }
1030 assert!(dsts_killed.is_empty());
1031 None
1032 }
1033 Op::PhiDsts(phi) => {
1034 assert!(instr.pred.is_true());
1035
1036 for (id, dst) in phi.dsts.iter() {
1037 if let Dst::SSA(ssa) = dst {
1038 assert!(ssa.comps() == 1);
1039 let reg = self.alloc_scalar(ip, sum, phi_webs, ssa[0]);
1040 self.live_in.push(LiveValue {
1041 live_ref: LiveRef::Phi(*id),
1042 reg_ref: reg,
1043 });
1044 }
1045 }
1046 assert!(srcs_killed.is_empty());
1047 self.ra.free_killed(dsts_killed);
1048
1049 None
1050 }
1051 Op::Break(op) => {
1052 for src in op.srcs_as_mut_slice() {
1053 if let Some(ssa) = src_ssa_ref(src) {
1054 assert!(ssa.comps() == 1);
1055 let reg = self.get_scalar(ssa[0]);
1056 src_set_reg(src, reg);
1057 }
1058 }
1059
1060 self.ra.free_killed(srcs_killed);
1061
1062 if let Dst::SSA(ssa) = &op.bar_out {
1063 let reg = *op.bar_in.src_ref.as_reg().unwrap();
1064 self.ra.assign_reg(ssa[0], reg);
1065 op.bar_out = reg.into();
1066 }
1067
1068 self.ra.free_killed(dsts_killed);
1069
1070 Some(instr)
1071 }
1072 Op::BSSy(op) => {
1073 for src in op.srcs_as_mut_slice() {
1074 if let Some(ssa) = src_ssa_ref(src) {
1075 assert!(ssa.comps() == 1);
1076 let reg = self.get_scalar(ssa[0]);
1077 src_set_reg(src, reg);
1078 }
1079 }
1080
1081 self.ra.free_killed(srcs_killed);
1082
1083 if let Dst::SSA(ssa) = &op.bar_out {
1084 let reg = *op.bar_in.src_ref.as_reg().unwrap();
1085 self.ra.assign_reg(ssa[0], reg);
1086 op.bar_out = reg.into();
1087 }
1088
1089 self.ra.free_killed(dsts_killed);
1090
1091 Some(instr)
1092 }
1093 Op::Copy(copy) => {
1094 if let Some(ssa) = src_ssa_ref(©.src) {
1095 // This may be a Cbuf::BindlessSSA source so we need to
1096 // support vectors because cbuf handles are vec2s. However,
1097 // since we only have a single scalar destination, we can
1098 // just allocate and free killed up-front.
1099 let ra = &mut self.ra[ssa.file().unwrap()];
1100 let mut vra = VecRegAllocator::new(ra);
1101 let reg = vra.collect_vector(ssa);
1102 vra.free_killed(srcs_killed);
1103 vra.finish(pcopy);
1104 src_set_reg(&mut copy.src, reg);
1105 }
1106
1107 let mut del_copy = false;
1108 if let Dst::SSA(dst_vec) = &mut copy.dst {
1109 debug_assert!(dst_vec.comps() == 1);
1110 let dst_ssa = &dst_vec[0];
1111
1112 if self.try_coalesce(*dst_ssa, ©.src) {
1113 del_copy = true;
1114 } else {
1115 copy.dst = self
1116 .alloc_scalar(ip, sum, phi_webs, *dst_ssa)
1117 .into();
1118 }
1119 }
1120
1121 self.ra.free_killed(dsts_killed);
1122
1123 if del_copy {
1124 None
1125 } else {
1126 Some(instr)
1127 }
1128 }
1129 Op::Pin(OpPin { src, dst }) | Op::Unpin(OpUnpin { src, dst }) => {
1130 assert!(instr.pred.is_true());
1131
1132 // These basically act as a vector version of OpCopy except that
1133 // they only work on SSA values and we pin the destination if
1134 // it's OpPin.
1135 let src_vec = src.as_ssa().unwrap();
1136 let dst_vec = dst.as_ssa().unwrap();
1137 assert!(src_vec.comps() == dst_vec.comps());
1138
1139 if srcs_killed.len() == src_vec.comps().into()
1140 && src_vec.file() == dst_vec.file()
1141 {
1142 let ra = &mut self.ra[src_vec.file().unwrap()];
1143 let mut vra = VecRegAllocator::new(ra);
1144 let reg = vra.collect_vector(src_vec);
1145 vra.finish(pcopy);
1146 for c in 0..src_vec.comps() {
1147 let c_reg = ra.free_ssa(src_vec[usize::from(c)]);
1148 debug_assert!(c_reg == reg.comp(c).base_idx());
1149 ra.assign_reg(dst_vec[usize::from(c)], c_reg);
1150 }
1151
1152 if matches!(&instr.op, Op::Pin(_)) {
1153 self.pin_vector(reg);
1154 }
1155 self.ra.free_killed(dsts_killed);
1156
1157 None
1158 } else {
1159 // Otherwise, turn into a parallel copy
1160 //
1161 // We can always allocate the destination first in this
1162 // case.
1163 assert!(dst_vec.comps() > 1 || srcs_killed.is_empty());
1164
1165 let dst_ra = &mut self.ra[dst_vec.file().unwrap()];
1166 let mut vra = VecRegAllocator::new(dst_ra);
1167 let dst_reg = vra.alloc_vector(*dst_vec);
1168 vra.finish(pcopy);
1169
1170 let mut pin_copy = OpParCopy::new();
1171 for c in 0..dst_reg.comps() {
1172 let src_reg = self.get_scalar(src_vec[usize::from(c)]);
1173 pin_copy.push(dst_reg.comp(c).into(), src_reg.into());
1174 }
1175
1176 if matches!(&instr.op, Op::Pin(_)) {
1177 self.pin_vector(dst_reg);
1178 }
1179 self.ra.free_killed(srcs_killed);
1180 self.ra.free_killed(dsts_killed);
1181
1182 Some(Instr::new_boxed(pin_copy))
1183 }
1184 }
1185 Op::ParCopy(pcopy) => {
1186 for (_, src) in pcopy.dsts_srcs.iter_mut() {
1187 if let Some(src_vec) = src_ssa_ref(src) {
1188 debug_assert!(src_vec.comps() == 1);
1189 let reg = self.get_scalar(src_vec[0]).into();
1190 src_set_reg(src, reg);
1191 }
1192 }
1193
1194 self.ra.free_killed(srcs_killed);
1195
1196 // Try to coalesce destinations into sources, if possible
1197 pcopy.dsts_srcs.retain(|dst, src| match dst {
1198 Dst::None => false,
1199 Dst::SSA(dst_vec) => {
1200 debug_assert!(dst_vec.comps() == 1);
1201 !self.try_coalesce(dst_vec[0], src)
1202 }
1203 Dst::Reg(_) => true,
1204 });
1205
1206 for (dst, _) in pcopy.dsts_srcs.iter_mut() {
1207 if let Dst::SSA(dst_vec) = dst {
1208 debug_assert!(dst_vec.comps() == 1);
1209 *dst = self
1210 .alloc_scalar(ip, sum, phi_webs, dst_vec[0])
1211 .into();
1212 }
1213 }
1214
1215 self.ra.free_killed(dsts_killed);
1216
1217 pcopy.tmp = self.pcopy_tmp();
1218 if pcopy.is_empty() {
1219 None
1220 } else {
1221 Some(instr)
1222 }
1223 }
1224 Op::RegOut(out) => {
1225 for src in out.srcs.iter_mut() {
1226 if let Some(src_vec) = src_ssa_ref(src) {
1227 debug_assert!(src_vec.comps() == 1);
1228 let reg = self.get_scalar(src_vec[0]).into();
1229 src_set_reg(src, reg);
1230 }
1231 }
1232
1233 self.ra.free_killed(srcs_killed);
1234 assert!(dsts_killed.is_empty());
1235
1236 // This should be the last instruction and its sources should
1237 // be the last free GPRs.
1238 debug_assert!(self.ra[RegFile::GPR].num_regs_used() == 0);
1239
1240 for (i, src) in out.srcs.iter().enumerate() {
1241 let reg = u32::try_from(i).unwrap();
1242 let dst = RegRef::new(RegFile::GPR, reg, 1);
1243 pcopy.push(dst.into(), *src);
1244 }
1245
1246 None
1247 }
1248 _ => {
1249 for file in self.ra.values_mut() {
1250 instr_assign_regs_file(
1251 &mut instr,
1252 ip,
1253 sum,
1254 phi_webs,
1255 srcs_killed,
1256 pcopy,
1257 file,
1258 );
1259 }
1260 self.ra.free_killed(dsts_killed);
1261 Some(instr)
1262 }
1263 }
1264 }
1265
first_pass<BL: BlockLiveness>( &mut self, b: &mut BasicBlock, bl: &BL, pred_ra: Option<&PerRegFile<RegAllocator>>, phi_webs: &mut PhiWebs, )1266 fn first_pass<BL: BlockLiveness>(
1267 &mut self,
1268 b: &mut BasicBlock,
1269 bl: &BL,
1270 pred_ra: Option<&PerRegFile<RegAllocator>>,
1271 phi_webs: &mut PhiWebs,
1272 ) {
1273 // Populate live in from the register file we're handed. We'll add more
1274 // live in when we process the OpPhiDst, if any.
1275 if let Some(pred_ra) = pred_ra {
1276 for (raf, pred_raf) in self.ra.values_mut().zip(pred_ra.values()) {
1277 for (ssa, reg) in &pred_raf.ssa_reg {
1278 if bl.is_live_in(ssa) {
1279 raf.assign_reg(*ssa, *reg);
1280 if pred_raf.reg_is_pinned(*reg) {
1281 raf.pin_reg(*reg);
1282 }
1283 self.live_in.push(LiveValue {
1284 live_ref: LiveRef::SSA(*ssa),
1285 reg_ref: RegRef::new(raf.file(), *reg, 1),
1286 });
1287 }
1288 }
1289 }
1290 }
1291
1292 let sum = SSAUseMap::for_block(b);
1293
1294 let mut instrs = Vec::new();
1295 let mut srcs_killed = KillSet::new();
1296 let mut dsts_killed = KillSet::new();
1297
1298 for (ip, instr) in b.instrs.drain(..).enumerate() {
1299 // Build up the kill set
1300 srcs_killed.clear();
1301 if let PredRef::SSA(ssa) = &instr.pred.pred_ref {
1302 if !bl.is_live_after_ip(ssa, ip) {
1303 srcs_killed.insert(*ssa);
1304 }
1305 }
1306 for src in instr.srcs() {
1307 for ssa in src.iter_ssa() {
1308 if !bl.is_live_after_ip(ssa, ip) {
1309 srcs_killed.insert(*ssa);
1310 }
1311 }
1312 }
1313
1314 dsts_killed.clear();
1315 for dst in instr.dsts() {
1316 if let Dst::SSA(vec) = dst {
1317 for ssa in vec.iter() {
1318 if !bl.is_live_after_ip(ssa, ip) {
1319 dsts_killed.insert(*ssa);
1320 }
1321 }
1322 }
1323 }
1324
1325 let mut pcopy = OpParCopy::new();
1326 pcopy.tmp = self.pcopy_tmp();
1327
1328 let instr = self.assign_regs_instr(
1329 instr,
1330 ip,
1331 &sum,
1332 phi_webs,
1333 &srcs_killed,
1334 &dsts_killed,
1335 &mut pcopy,
1336 );
1337
1338 if !pcopy.is_empty() {
1339 if DEBUG.annotate() {
1340 instrs.push(Instr::new_boxed(OpAnnotate {
1341 annotation: "generated by assign_regs".into(),
1342 }));
1343 }
1344 if !b.uniform {
1345 for dst in pcopy.dsts_as_slice() {
1346 if let Dst::Reg(reg) = dst {
1347 assert!(!reg.is_uniform());
1348 }
1349 }
1350 }
1351 instrs.push(Instr::new_boxed(pcopy));
1352 }
1353
1354 if let Some(instr) = instr {
1355 instrs.push(instr);
1356 }
1357 }
1358
1359 // Update phi_webs with the registers assigned in this block
1360 for ra in self.ra.values() {
1361 for (ssa, reg) in &ra.ssa_reg {
1362 phi_webs.set(*ssa, *reg);
1363 }
1364 }
1365
1366 // Sort live-in to maintain determinism
1367 self.live_in.sort();
1368
1369 b.instrs = instrs;
1370 }
1371
second_pass(&self, target: &AssignRegsBlock, b: &mut BasicBlock)1372 fn second_pass(&self, target: &AssignRegsBlock, b: &mut BasicBlock) {
1373 let mut pcopy = OpParCopy::new();
1374 pcopy.tmp = self.pcopy_tmp();
1375
1376 for lv in &target.live_in {
1377 let src = match lv.live_ref {
1378 LiveRef::SSA(ssa) => SrcRef::from(self.get_scalar(ssa)),
1379 LiveRef::Phi(phi) => *self.phi_out.get(&phi).unwrap(),
1380 };
1381 let dst = lv.reg_ref;
1382 if let SrcRef::Reg(src_reg) = src {
1383 if dst == src_reg {
1384 continue;
1385 }
1386 }
1387 pcopy.push(dst.into(), src.into());
1388 }
1389
1390 if DEBUG.annotate() {
1391 b.instrs.push(Instr::new_boxed(OpAnnotate {
1392 annotation: "generated by assign_regs".into(),
1393 }));
1394 }
1395 if b.branch().is_some() {
1396 b.instrs.insert(b.instrs.len() - 1, Instr::new_boxed(pcopy));
1397 } else {
1398 b.instrs.push(Instr::new_boxed(pcopy));
1399 }
1400 }
1401 }
1402
1403 impl Shader<'_> {
assign_regs(&mut self)1404 pub fn assign_regs(&mut self) {
1405 assert!(self.functions.len() == 1);
1406 let f = &mut self.functions[0];
1407
1408 // Convert to CSSA before we spill or assign registers
1409 f.to_cssa();
1410
1411 let mut live = SimpleLiveness::for_function(f);
1412 let mut max_live = live.calc_max_live(f);
1413
1414 // We want at least one temporary GPR reserved for parallel copies.
1415 let mut tmp_gprs = 1_u8;
1416
1417 let spill_files =
1418 [RegFile::UPred, RegFile::Pred, RegFile::UGPR, RegFile::Bar];
1419 for file in spill_files {
1420 let num_regs = self.sm.num_regs(file);
1421 if max_live[file] > num_regs {
1422 f.spill_values(file, num_regs);
1423
1424 // Re-calculate liveness after we spill
1425 live = SimpleLiveness::for_function(f);
1426 max_live = live.calc_max_live(f);
1427
1428 if file == RegFile::Bar {
1429 tmp_gprs = max(tmp_gprs, 2);
1430 }
1431 }
1432 }
1433
1434 // An instruction can have at most 4 vector sources/destinations. In
1435 // order to ensure we always succeed at allocation, regardless of
1436 // arbitrary choices, we need at least 16 GPRs.
1437 let mut gpr_limit = max(max_live[RegFile::GPR], 16);
1438 let mut total_gprs = gpr_limit + u32::from(tmp_gprs);
1439
1440 let mut max_gprs = if DEBUG.spill() {
1441 // We need at least 16 registers to satisfy RA constraints for
1442 // texture ops and another 2 for parallel copy lowering
1443 18
1444 } else {
1445 self.sm.num_regs(RegFile::GPR)
1446 };
1447
1448 if let ShaderStageInfo::Compute(cs_info) = &self.info.stage {
1449 max_gprs = min(
1450 max_gprs,
1451 gpr_limit_from_local_size(&cs_info.local_size)
1452 - self.sm.hw_reserved_gprs(),
1453 );
1454 }
1455
1456 if total_gprs > max_gprs {
1457 // If we're spilling GPRs, we need to reserve 2 GPRs for OpParCopy
1458 // lowering because it needs to be able lower Mem copies which
1459 // require a temporary
1460 tmp_gprs = max(tmp_gprs, 2);
1461 total_gprs = max_gprs;
1462 gpr_limit = total_gprs - u32::from(tmp_gprs);
1463
1464 f.spill_values(RegFile::GPR, gpr_limit);
1465
1466 // Re-calculate liveness one last time
1467 live = SimpleLiveness::for_function(f);
1468 }
1469
1470 self.info.num_gprs = total_gprs.try_into().unwrap();
1471
1472 let limit = PerRegFile::new_with(|file| {
1473 if file == RegFile::GPR {
1474 gpr_limit
1475 } else {
1476 self.sm.num_regs(file)
1477 }
1478 });
1479
1480 let mut phi_webs = PhiWebs::new(f);
1481
1482 let mut blocks: Vec<AssignRegsBlock> = Vec::new();
1483 for b_idx in 0..f.blocks.len() {
1484 let pred = f.blocks.pred_indices(b_idx);
1485 let pred_ra = if pred.is_empty() {
1486 None
1487 } else {
1488 // Start with the previous block's.
1489 Some(&blocks[pred[0]].ra)
1490 };
1491
1492 let bl = live.block_live(b_idx);
1493
1494 let mut arb = AssignRegsBlock::new(&limit, tmp_gprs);
1495 arb.first_pass(&mut f.blocks[b_idx], bl, pred_ra, &mut phi_webs);
1496
1497 assert!(blocks.len() == b_idx);
1498 blocks.push(arb);
1499 }
1500
1501 for b_idx in 0..f.blocks.len() {
1502 let arb = &blocks[b_idx];
1503 for sb_idx in f.blocks.succ_indices(b_idx).to_vec() {
1504 arb.second_pass(&blocks[sb_idx], &mut f.blocks[b_idx]);
1505 }
1506 }
1507 }
1508 }
1509