1 /*
2 * Copyright © 2024 Valve Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "aco_ir.h"
8
9 #include <functional>
10
11 namespace aco {
12
13 namespace {
14
15 /**
16 * This is a limited SSA repair pass which inserts the phis necessary for the definition of a
17 * linear temporary to dominate it's uses in the linear CFG. The definition must still dominate it's
18 * uses in the logical CFG. If a path in which the temporary is defined is not taken, the value used
19 * is undefined.
20 *
21 * aco::lower_phis() must be run after to lower the logical phis created by this pass.
22 */
23
24 struct repair_state {
25 Program* program;
26 Block* block;
27
28 std::vector<bool> block_needs_repair; /* uses in this block might need repair */
29 std::vector<bool> dom_needs_repair; /* whether a block dominates a block which needs repair */
30
31 std::vector<bool> has_def_block;
32 std::unique_ptr<uint32_t[]> def_blocks;
33 std::unordered_map<uint64_t, Temp> renames;
34
35 std::vector<aco_ptr<Instruction>> new_phis;
36
37 std::vector<bool> visit_block;
38 std::vector<Temp> temps;
39
is_temp_defined_at_predaco::__anonf38860610111::repair_state40 bool is_temp_defined_at_pred(uint32_t block_idx, uint32_t def_block) const
41 {
42 return block_idx >= def_block && visit_block[block_idx] && temps[block_idx].id();
43 }
44 };
45
46 Temp
create_phis(repair_state * state,Temp tmp,uint32_t use_block,uint32_t def_block)47 create_phis(repair_state* state, Temp tmp, uint32_t use_block, uint32_t def_block)
48 {
49 Program* program = state->program;
50
51 assert(program->blocks[def_block].logical_idom != -1);
52 assert(program->blocks[use_block].logical_idom != -1);
53 assert(use_block > def_block);
54
55 std::fill(state->visit_block.begin() + def_block, state->visit_block.begin() + use_block + 1,
56 false);
57
58 for (int32_t i = use_block; i >= (int)def_block; i--) {
59 bool block_needs_tmp = (uint32_t)i == use_block;
60 for (uint32_t succ : program->blocks[i].logical_succs)
61 block_needs_tmp |= succ > (uint32_t)i && state->visit_block[succ];
62 state->visit_block[i] = block_needs_tmp;
63
64 if (block_needs_tmp && (uint32_t)i != def_block) {
65 uint64_t k = i | ((uint64_t)tmp.id() << 32);
66 auto it = state->renames.find(k);
67 if (it != state->renames.end())
68 state->temps[i] = it->second;
69 else
70 state->temps[i] = Temp(0, tmp.regClass());
71 }
72 }
73
74 state->temps[def_block] = tmp;
75 for (uint32_t i = def_block + 1; i <= use_block; i++) {
76 if (!state->visit_block[i] || state->temps[i].id())
77 continue;
78
79 Block& block = program->blocks[i];
80
81 bool undef = true;
82 for (unsigned pred : block.logical_preds)
83 undef &= pred < i && !state->is_temp_defined_at_pred(pred, def_block);
84 if (undef) {
85 state->temps[i] = Temp(0, tmp.regClass());
86 continue;
87 }
88
89 /* If a logical dominator has a temporary, we don't need to create a phi and can just use
90 * that temporary instead. For linear temporaries, we also need to check if it dominates in
91 * the linear CFG, because logical dominators do not necessarily dominate a block in the
92 * linear CFG (for example, because of continue_or_break or empty exec skips). */
93 uint32_t dom = block.index;
94 bool skip_phis = false;
95 do {
96 dom = program->blocks[dom].logical_idom;
97 if (state->is_temp_defined_at_pred(dom, def_block) &&
98 dominates_linear(program->blocks[dom], block)) {
99 skip_phis = true;
100 break;
101 }
102 } while (dom != def_block);
103 if (skip_phis) {
104 state->temps[i] = state->temps[dom];
105 continue;
106 }
107
108 /* This pass doesn't support creating loop header phis */
109 assert(!(block.kind & block_kind_loop_header));
110
111 Temp def = program->allocateTmp(tmp.regClass());
112 aco_ptr<Instruction> phi{
113 create_instruction(aco_opcode::p_phi, Format::PSEUDO, block.logical_preds.size(), 1)};
114 for (unsigned j = 0; j < block.logical_preds.size(); j++) {
115 uint32_t pred = block.logical_preds[j];
116 assert(state->is_temp_defined_at_pred(pred, def_block));
117 phi->operands[j] = Operand(state->temps[pred]);
118 }
119 phi->definitions[0] = Definition(def);
120
121 if (&block == state->block)
122 state->new_phis.emplace_back(std::move(phi));
123 else
124 block.instructions.emplace(block.instructions.begin(), std::move(phi));
125
126 uint64_t k = i | ((uint64_t)tmp.id() << 32);
127 state->renames.emplace(k, def);
128 state->temps[i] = def;
129 }
130
131 return state->temps[use_block];
132 }
133
134 template <bool LoopHeader>
135 bool
repair_block(repair_state * state,Block & block)136 repair_block(repair_state* state, Block& block)
137 {
138 bool needs_repair = state->block_needs_repair[block.index];
139 bool dom_needs_repair = state->dom_needs_repair[block.index];
140 bool progress = false;
141
142 state->block = █
143 for (aco_ptr<Instruction>& instr : block.instructions) {
144 if (dom_needs_repair) {
145 for (Definition def : instr->definitions) {
146 if (def.isTemp()) {
147 state->def_blocks[def.tempId()] = block.index;
148 state->has_def_block[def.tempId()] = true;
149 }
150 }
151 }
152
153 bool phi = is_phi(instr) || instr->opcode == aco_opcode::p_boolean_phi;
154
155 /* Skip the section below if we don't need to repair. If we don't need to update def_blocks
156 * either, then we can just break. We always need to process phis because their actual uses
157 * are in the predecessors, which might need repair. */
158 if (!phi && !needs_repair) {
159 if (!dom_needs_repair)
160 break;
161 continue;
162 }
163
164 unsigned start = 0;
165 unsigned num_operands = instr->operands.size();
166 if (phi && (block.kind & block_kind_loop_header)) {
167 if (LoopHeader)
168 start++;
169 else
170 num_operands = 1;
171 } else if (LoopHeader) {
172 break;
173 }
174
175 for (unsigned i = start; i < num_operands; i++) {
176 Operand& op = instr->operands[i];
177 if (!op.isTemp() || !op.getTemp().is_linear() || !state->has_def_block[op.tempId()])
178 continue;
179
180 uint32_t def_block = state->def_blocks[op.tempId()];
181 uint32_t use_block = block.index;
182 if (instr->opcode == aco_opcode::p_boolean_phi || instr->opcode == aco_opcode::p_phi) {
183 use_block = block.logical_preds[i];
184 if (!state->block_needs_repair[use_block])
185 continue;
186 } else if (instr->opcode == aco_opcode::p_linear_phi) {
187 use_block = block.linear_preds[i];
188 if (!state->block_needs_repair[use_block])
189 continue;
190 }
191
192 if (!dominates_linear(state->program->blocks[def_block],
193 state->program->blocks[use_block])) {
194 assert(dominates_logical(state->program->blocks[def_block],
195 state->program->blocks[use_block]));
196 op.setTemp(create_phis(state, op.getTemp(), use_block, def_block));
197 progress = true;
198 }
199 }
200 }
201
202 /* These are inserted later to not invalidate any iterators. */
203 block.instructions.insert(block.instructions.begin(),
204 std::move_iterator(state->new_phis.begin()),
205 std::move_iterator(state->new_phis.end()));
206 state->new_phis.clear();
207
208 return progress;
209 }
210
211 } /* end namespace */
212
213 bool
repair_ssa(Program * program)214 repair_ssa(Program* program)
215 {
216 repair_state state;
217 state.program = program;
218
219 state.block_needs_repair.resize(program->blocks.size());
220 state.dom_needs_repair.resize(program->blocks.size());
221
222 state.has_def_block.resize(program->peekAllocationId());
223 state.def_blocks.reset(new uint32_t[program->peekAllocationId()]);
224
225 state.visit_block.resize(program->blocks.size());
226 state.temps.resize(program->blocks.size());
227
228 for (Block& block : program->blocks) {
229 if (block.logical_idom == -1)
230 continue;
231
232 if (state.block_needs_repair[block.logical_idom] ||
233 !dominates_linear(program->blocks[block.logical_idom], block)) {
234 state.block_needs_repair[block.index] = true;
235
236 /* Set dom_needs_repair=true for logical dominators. */
237 uint32_t parent = block.logical_idom;
238 while (!state.dom_needs_repair[parent]) {
239 state.dom_needs_repair[parent] = true;
240 parent = program->blocks[parent].logical_idom;
241 }
242 }
243 }
244
245 std::vector<unsigned> loop_header_indices;
246
247 bool progress = false;
248 for (Block& block : program->blocks) {
249 if (block.kind & block_kind_loop_header)
250 loop_header_indices.push_back(block.index);
251
252 progress |= repair_block<false>(&state, block);
253
254 if (block.kind & block_kind_loop_exit) {
255 unsigned header = loop_header_indices.back();
256 loop_header_indices.pop_back();
257
258 progress |= repair_block<true>(&state, program->blocks[header]);
259 }
260 }
261
262 return progress;
263 }
264
265 } // namespace aco
266