1 /*
2 * Copyright (C) 2021 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 /* This pass lowers array accesses to SSA.
25 *
26 * After this pass, instructions writing arrays implicitly read the contents of
27 * the array defined in instr->dsts[0]->def (possibly a phi node), perform the
28 * operation, and store to instr->dsts[0].
29 *
30 * This makes arrays appear like "normal" SSA values, even if the false
31 * dependencies mean that they always stay in CSSA form (i.e. able to removed
32 * out-of-SSA with no copies.) While hopefully they shouldn't induce copies in
33 * most cases, we can't make that guarantee while also splitting spilling from
34 * RA and guaranteeing a certain number of registers are used, so we have to
35 * insert the phi nodes to be able to know when copying should happen.
36 *
37 * The implementation is based on the idea in "Simple and Efficient Construction
38 * of Static Single Assignment Form" of scanning backwards to find the
39 * definition. However, since we're not doing this on-the-fly we can simplify
40 * things a little by doing a pre-pass to get the last definition of each array
41 * in each block. Then we optimize trivial phis in a separate pass, "on the fly"
42 * so that we don't have to rewrite (and keep track of) users.
43 */
44
45 #include <stdlib.h>
46 #include "ir3.h"
47
48 struct array_state {
49 struct ir3_register *live_in_definition;
50 struct ir3_register *live_out_definition;
51 bool constructed;
52 bool optimized;
53 };
54
55 struct array_ctx {
56 struct array_state *states;
57 struct ir3 *ir;
58 unsigned array_count;
59 };
60
61 static struct array_state *
get_state(struct array_ctx * ctx,struct ir3_block * block,unsigned id)62 get_state(struct array_ctx *ctx, struct ir3_block *block, unsigned id)
63 {
64 return &ctx->states[ctx->array_count * block->index + id];
65 }
66
67 static struct ir3_register *read_value_beginning(struct array_ctx *ctx,
68 struct ir3_block *block,
69 struct ir3_array *arr);
70
71 static struct ir3_register *
read_value_end(struct array_ctx * ctx,struct ir3_block * block,struct ir3_array * arr)72 read_value_end(struct array_ctx *ctx, struct ir3_block *block,
73 struct ir3_array *arr)
74 {
75 struct array_state *state = get_state(ctx, block, arr->id);
76 if (state->live_out_definition)
77 return state->live_out_definition;
78
79 state->live_out_definition = read_value_beginning(ctx, block, arr);
80 return state->live_out_definition;
81 }
82
83 /* Roughly equivalent to readValueRecursive from the paper: */
84 static struct ir3_register *
read_value_beginning(struct array_ctx * ctx,struct ir3_block * block,struct ir3_array * arr)85 read_value_beginning(struct array_ctx *ctx, struct ir3_block *block,
86 struct ir3_array *arr)
87 {
88 struct array_state *state = get_state(ctx, block, arr->id);
89
90 if (state->constructed)
91 return state->live_in_definition;
92
93 if (block->predecessors_count == 0) {
94 state->constructed = true;
95 return NULL;
96 }
97
98 if (block->predecessors_count == 1) {
99 state->live_in_definition =
100 read_value_end(ctx, block->predecessors[0], arr);
101 state->constructed = true;
102 return state->live_in_definition;
103 }
104
105 unsigned flags = IR3_REG_ARRAY | (arr->half ? IR3_REG_HALF : 0);
106 struct ir3_instruction *phi =
107 ir3_instr_create(block, OPC_META_PHI, 1, block->predecessors_count);
108 list_del(&phi->node);
109 list_add(&phi->node, &block->instr_list);
110
111 struct ir3_register *dst = __ssa_dst(phi);
112 dst->flags |= flags;
113 dst->array.id = arr->id;
114 dst->size = arr->length;
115
116 state->live_in_definition = phi->dsts[0];
117 state->constructed = true;
118
119 for (unsigned i = 0; i < block->predecessors_count; i++) {
120 struct ir3_register *src =
121 read_value_end(ctx, block->predecessors[i], arr);
122 struct ir3_register *src_reg;
123 if (src) {
124 src_reg = __ssa_src(phi, src->instr, flags);
125 } else {
126 src_reg = ir3_src_create(phi, INVALID_REG, flags | IR3_REG_SSA);
127 }
128 src_reg->array.id = arr->id;
129 src_reg->size = arr->length;
130 }
131 return phi->dsts[0];
132 }
133
134 static struct ir3_register *
remove_trivial_phi(struct ir3_instruction * phi)135 remove_trivial_phi(struct ir3_instruction *phi)
136 {
137 /* Break cycles */
138 if (phi->data)
139 return phi->data;
140
141 phi->data = phi->dsts[0];
142
143 struct ir3_register *unique_def = NULL;
144 bool unique = true;
145 for (unsigned i = 0; i < phi->block->predecessors_count; i++) {
146 struct ir3_register *src = phi->srcs[i];
147
148 /* If there are any undef sources, then the remaining sources may not
149 * dominate the phi node, even if they are all equal. So we need to
150 * bail out in this case.
151 *
152 * This seems to be a bug in the original paper.
153 */
154 if (!src->def) {
155 unique = false;
156 break;
157 }
158
159 struct ir3_instruction *src_instr = src->def->instr;
160
161 /* phi sources which point to the phi itself don't count for
162 * figuring out if the phi is trivial
163 */
164 if (src_instr == phi)
165 continue;
166
167 if (src_instr->opc == OPC_META_PHI) {
168 src->def = remove_trivial_phi(src->def->instr);
169 }
170
171 if (unique_def) {
172 if (unique_def != src->def) {
173 unique = false;
174 break;
175 }
176 } else {
177 unique_def = src->def;
178 }
179 }
180
181 if (unique) {
182 phi->data = unique_def;
183 return unique_def;
184 } else {
185 return phi->dsts[0];
186 }
187 }
188
189 static struct ir3_register *
lookup_value(struct ir3_register * reg)190 lookup_value(struct ir3_register *reg)
191 {
192 if (reg->instr->opc == OPC_META_PHI)
193 return reg->instr->data;
194 return reg;
195 }
196
197 static struct ir3_register *
lookup_live_in(struct array_ctx * ctx,struct ir3_block * block,unsigned id)198 lookup_live_in(struct array_ctx *ctx, struct ir3_block *block, unsigned id)
199 {
200 struct array_state *state = get_state(ctx, block, id);
201 if (state->live_in_definition)
202 return lookup_value(state->live_in_definition);
203
204 return NULL;
205 }
206
207 bool
ir3_array_to_ssa(struct ir3 * ir)208 ir3_array_to_ssa(struct ir3 *ir)
209 {
210 struct array_ctx ctx = {};
211
212 foreach_array (array, &ir->array_list) {
213 ctx.array_count = MAX2(ctx.array_count, array->id + 1);
214 }
215
216 if (ctx.array_count == 0)
217 return false;
218
219 unsigned i = 0;
220 foreach_block (block, &ir->block_list) {
221 block->index = i++;
222 }
223
224 ctx.ir = ir;
225 ctx.states = calloc(ctx.array_count * i, sizeof(struct array_state));
226
227 foreach_block (block, &ir->block_list) {
228 foreach_instr (instr, &block->instr_list) {
229 foreach_dst (dst, instr) {
230 if (dst->flags & IR3_REG_ARRAY) {
231 struct array_state *state =
232 get_state(&ctx, block, dst->array.id);
233 state->live_out_definition = dst;
234 }
235 }
236 }
237 }
238
239 foreach_block (block, &ir->block_list) {
240 foreach_instr (instr, &block->instr_list) {
241 if (instr->opc == OPC_META_PHI)
242 continue;
243
244 foreach_dst (reg, instr) {
245 if ((reg->flags & IR3_REG_ARRAY) && !reg->tied) {
246 struct ir3_array *arr = ir3_lookup_array(ir, reg->array.id);
247
248 /* Construct any phi nodes necessary to read this value */
249 read_value_beginning(&ctx, block, arr);
250 }
251 }
252 foreach_src (reg, instr) {
253 if ((reg->flags & IR3_REG_ARRAY) && !reg->def) {
254 struct ir3_array *arr = ir3_lookup_array(ir, reg->array.id);
255
256 /* Construct any phi nodes necessary to read this value */
257 read_value_beginning(&ctx, block, arr);
258 }
259 }
260 }
261 }
262
263 foreach_block (block, &ir->block_list) {
264 foreach_instr_safe (instr, &block->instr_list) {
265 if (instr->opc == OPC_META_PHI)
266 remove_trivial_phi(instr);
267 else
268 break;
269 }
270 }
271
272 foreach_block (block, &ir->block_list) {
273 foreach_instr_safe (instr, &block->instr_list) {
274 if (instr->opc == OPC_META_PHI) {
275 if (!(instr->flags & IR3_REG_ARRAY))
276 continue;
277 if (instr->data != instr->dsts[0]) {
278 list_del(&instr->node);
279 continue;
280 }
281 for (unsigned i = 0; i < instr->srcs_count; i++) {
282 instr->srcs[i] = lookup_value(instr->srcs[i]);
283 }
284 } else {
285 foreach_dst (reg, instr) {
286 if ((reg->flags & IR3_REG_ARRAY)) {
287 if (!reg->tied) {
288 struct ir3_register *def =
289 lookup_live_in(&ctx, block, reg->array.id);
290 if (def)
291 ir3_reg_set_last_array(instr, reg, def);
292 }
293 reg->flags |= IR3_REG_SSA;
294 }
295 }
296 foreach_src (reg, instr) {
297 if ((reg->flags & IR3_REG_ARRAY)) {
298 /* It is assumed that before calling
299 * ir3_array_to_ssa(), reg->def was set to the
300 * previous writer of the array within the current
301 * block or NULL if none.
302 */
303 if (!reg->def) {
304 reg->def = lookup_live_in(&ctx, block, reg->array.id);
305 }
306 reg->flags |= IR3_REG_SSA;
307 }
308 }
309 }
310 }
311 }
312
313 free(ctx.states);
314 return true;
315 }
316