1 /*
2 * Copyright © 2023 Valve Corporation.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "ir3.h"
7 #include "util/ralloc.h"
8
9 /* RA cannot handle phis of shared registers where there are extra physical
10 * sources, or the sources have extra physical destinations, because these edges
11 * are critical edges that we cannot resolve copies along. Here's a contrived
12 * example:
13 *
14 * loop {
15 * if non-uniform {
16 * if uniform {
17 * x_1 = ...;
18 * continue;
19 * }
20 * x_2 = ...;
21 * } else {
22 * break;
23 * }
24 * // continue block
25 * x_3 = phi(x_1, x_2)
26 * }
27 *
28 * Assuming x_1 and x_2 are uniform, x_3 will also be uniform, because all
29 * threads that stay in the loop take the same branch to the continue block,
30 * however execution may fall through from the assignment to x_2 to the
31 * break statement because the outer if is non-uniform, and then it will fall
32 * through again to the continue block. In cases like this we have to demote the
33 * phi to normal registers and insert movs around it (which will probably be
34 * coalesced).
35 */
36
37 static void
lower_phi(void * ctx,struct ir3_instruction * phi)38 lower_phi(void *ctx, struct ir3_instruction *phi)
39 {
40 struct ir3_block *block = phi->block;
41 for (unsigned i = 0; i < block->predecessors_count; i++) {
42 struct ir3_block *pred = block->predecessors[i];
43 if (phi->srcs[i]->def) {
44 struct ir3_instruction *pred_mov =
45 ir3_instr_create_at(ir3_before_terminator(pred), OPC_MOV, 1, 1);
46 pred_mov->uses = _mesa_pointer_set_create(ctx);
47 __ssa_dst(pred_mov)->flags |= (phi->srcs[i]->flags & IR3_REG_HALF);
48 unsigned src_flags = IR3_REG_SSA | IR3_REG_SHARED |
49 (phi->srcs[i]->flags & IR3_REG_HALF);
50 ir3_src_create(pred_mov, INVALID_REG, src_flags)->def =
51 phi->srcs[i]->def;
52 pred_mov->cat1.src_type = pred_mov->cat1.dst_type =
53 (src_flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
54
55 _mesa_set_remove_key(phi->srcs[i]->def->instr->uses, phi);
56 _mesa_set_add(phi->srcs[i]->def->instr->uses, pred_mov);
57 phi->srcs[i]->def = pred_mov->dsts[0];
58 }
59 phi->srcs[i]->flags &= ~IR3_REG_SHARED;
60 }
61
62 phi->dsts[0]->flags &= ~IR3_REG_SHARED;
63
64 struct ir3_builder build = ir3_builder_at(ir3_after_phis(block));
65 struct ir3_instruction *shared_mov = ir3_MOV(
66 &build, phi, (phi->dsts[0]->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32);
67 shared_mov->uses = _mesa_pointer_set_create(ctx);
68 shared_mov->dsts[0]->flags |= IR3_REG_SHARED;
69
70 foreach_ssa_use (use, phi) {
71 for (unsigned i = 0; i < use->srcs_count; i++) {
72 if (use->srcs[i]->def == phi->dsts[0])
73 use->srcs[i]->def = shared_mov->dsts[0];
74 }
75 }
76 }
77
78 bool
ir3_lower_shared_phis(struct ir3 * ir)79 ir3_lower_shared_phis(struct ir3 *ir)
80 {
81 void *mem_ctx = ralloc_context(NULL);
82 bool progress = false;
83
84 ir3_find_ssa_uses(ir, mem_ctx, false);
85
86 foreach_block (block, &ir->block_list) {
87 bool pred_physical_edge = false;
88 for (unsigned i = 0; i < block->predecessors_count; i++) {
89 unsigned successors_count =
90 block->predecessors[i]->successors[1] ? 2 : 1;
91 if (block->predecessors[i]->physical_successors_count > successors_count) {
92 pred_physical_edge = true;
93 break;
94 }
95 }
96
97 if (!pred_physical_edge &&
98 block->physical_predecessors_count == block->predecessors_count)
99 continue;
100
101 foreach_instr_safe (phi, &block->instr_list) {
102 if (phi->opc != OPC_META_PHI)
103 break;
104
105 if (!(phi->dsts[0]->flags & IR3_REG_SHARED))
106 continue;
107
108 lower_phi(mem_ctx, phi);
109 progress = true;
110 }
111 }
112
113 ralloc_free(mem_ctx);
114 return progress;
115 }
116
117