1 /*
2 * Copyright © 2012 Intel Corporation
3 * Copyright © 2016 Broadcom
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #define MAX_INSTRUCTION (1 << 30)
26
27 #include "util/ralloc.h"
28 #include "util/register_allocate.h"
29 #include "v3d_compiler.h"
30
31 struct partial_update_state {
32 struct qinst *insts[4];
33 uint8_t channels;
34 };
35
36 static int
vir_reg_to_var(struct qreg reg)37 vir_reg_to_var(struct qreg reg)
38 {
39 if (reg.file == QFILE_TEMP)
40 return reg.index;
41
42 return -1;
43 }
44
45 static void
vir_setup_use(struct v3d_compile * c,struct qblock * block,int ip,struct qreg src)46 vir_setup_use(struct v3d_compile *c, struct qblock *block, int ip,
47 struct qreg src)
48 {
49 int var = vir_reg_to_var(src);
50 if (var == -1)
51 return;
52
53 c->temp_start[var] = MIN2(c->temp_start[var], ip);
54 c->temp_end[var] = MAX2(c->temp_end[var], ip);
55
56 /* The use[] bitset marks when the block makes
57 * use of a variable without having completely
58 * defined that variable within the block.
59 */
60 if (!BITSET_TEST(block->def, var))
61 BITSET_SET(block->use, var);
62 }
63
64 static struct partial_update_state *
get_partial_update_state(struct hash_table * partial_update_ht,struct qinst * inst)65 get_partial_update_state(struct hash_table *partial_update_ht,
66 struct qinst *inst)
67 {
68 struct hash_entry *entry =
69 _mesa_hash_table_search(partial_update_ht,
70 &inst->dst.index);
71 if (entry)
72 return entry->data;
73
74 struct partial_update_state *state =
75 rzalloc(partial_update_ht, struct partial_update_state);
76
77 _mesa_hash_table_insert(partial_update_ht, &inst->dst.index, state);
78
79 return state;
80 }
81
82 static void
vir_setup_def(struct v3d_compile * c,struct qblock * block,int ip,struct hash_table * partial_update_ht,struct qinst * inst)83 vir_setup_def(struct v3d_compile *c, struct qblock *block, int ip,
84 struct hash_table *partial_update_ht, struct qinst *inst)
85 {
86 if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU)
87 return;
88
89 /* The def[] bitset marks when an initialization in a
90 * block completely screens off previous updates of
91 * that variable.
92 */
93 int var = vir_reg_to_var(inst->dst);
94 if (var == -1)
95 return;
96
97 c->temp_start[var] = MIN2(c->temp_start[var], ip);
98 c->temp_end[var] = MAX2(c->temp_end[var], ip);
99
100 /* Mark the block as having a (partial) def of the var. */
101 BITSET_SET(block->defout, var);
102
103 /* If we've already tracked this as a def that screens off previous
104 * uses, or already used it within the block, there's nothing to do.
105 */
106 if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var))
107 return;
108
109 /* Easy, common case: unconditional full register update.*/
110 if ((inst->qpu.flags.ac == V3D_QPU_COND_NONE &&
111 inst->qpu.flags.mc == V3D_QPU_COND_NONE) &&
112 inst->qpu.alu.add.output_pack == V3D_QPU_PACK_NONE &&
113 inst->qpu.alu.mul.output_pack == V3D_QPU_PACK_NONE) {
114 BITSET_SET(block->def, var);
115 return;
116 }
117
118 /* Finally, look at the condition code and packing and mark it as a
119 * def. We need to make sure that we understand sequences
120 * instructions like:
121 *
122 * mov.zs t0, t1
123 * mov.zc t0, t2
124 *
125 * or:
126 *
127 * mmov t0.8a, t1
128 * mmov t0.8b, t2
129 * mmov t0.8c, t3
130 * mmov t0.8d, t4
131 *
132 * as defining the temp within the block, because otherwise dst's live
133 * range will get extended up the control flow to the top of the
134 * program.
135 */
136 struct partial_update_state *state =
137 get_partial_update_state(partial_update_ht, inst);
138 uint8_t mask = 0xf; /* XXX vir_channels_written(inst); */
139
140 if (inst->qpu.flags.ac == V3D_QPU_COND_NONE &&
141 inst->qpu.flags.mc == V3D_QPU_COND_NONE) {
142 state->channels |= mask;
143 } else {
144 for (int i = 0; i < 4; i++) {
145 if (!(mask & (1 << i)))
146 continue;
147
148 /* XXXif (state->insts[i] &&
149 state->insts[i]->cond ==
150 qpu_cond_complement(inst->cond))
151 state->channels |= 1 << i;
152 else
153 */
154 state->insts[i] = inst;
155 }
156 }
157
158 if (state->channels == 0xf)
159 BITSET_SET(block->def, var);
160 }
161
162 static void
sf_state_clear(struct hash_table * partial_update_ht)163 sf_state_clear(struct hash_table *partial_update_ht)
164 {
165 hash_table_foreach(partial_update_ht, entry) {
166 struct partial_update_state *state = entry->data;
167
168 for (int i = 0; i < 4; i++) {
169 if (state->insts[i] &&
170 (state->insts[i]->qpu.flags.ac != V3D_QPU_COND_NONE ||
171 state->insts[i]->qpu.flags.mc != V3D_QPU_COND_NONE))
172 state->insts[i] = NULL;
173 }
174 }
175 }
176
177 /* Sets up the def/use arrays for when variables are used-before-defined or
178 * defined-before-used in the block.
179 *
180 * Also initializes the temp_start/temp_end to cover just the instruction IPs
181 * where the variable is used, which will be extended later in
182 * vir_compute_start_end().
183 */
184 static void
vir_setup_def_use(struct v3d_compile * c)185 vir_setup_def_use(struct v3d_compile *c)
186 {
187 struct hash_table *partial_update_ht =
188 _mesa_hash_table_create(c, _mesa_hash_int, _mesa_key_int_equal);
189 int ip = 0;
190
191 vir_for_each_block(block, c) {
192 block->start_ip = ip;
193
194 _mesa_hash_table_clear(partial_update_ht, NULL);
195
196 vir_for_each_inst(inst, block) {
197 for (int i = 0; i < vir_get_nsrc(inst); i++)
198 vir_setup_use(c, block, ip, inst->src[i]);
199
200 vir_setup_def(c, block, ip, partial_update_ht, inst);
201
202 if (false /* XXX inst->uf */)
203 sf_state_clear(partial_update_ht);
204
205 /* Payload registers: r0/1/2 contain W, centroid W,
206 * and Z at program start. Register allocation will
207 * force their nodes to R0/1/2.
208 */
209 if (inst->src[0].file == QFILE_REG) {
210 switch (inst->src[0].index) {
211 case 0:
212 case 1:
213 case 2:
214 c->temp_start[inst->dst.index] = 0;
215 break;
216 }
217 }
218
219 ip++;
220 }
221 block->end_ip = ip;
222 }
223
224 _mesa_hash_table_destroy(partial_update_ht, NULL);
225 }
226
227 static bool
vir_live_variables_dataflow(struct v3d_compile * c,int bitset_words)228 vir_live_variables_dataflow(struct v3d_compile *c, int bitset_words)
229 {
230 bool cont = false;
231
232 vir_for_each_block_rev(block, c) {
233 /* Update live_out: Any successor using the variable
234 * on entrance needs us to have the variable live on
235 * exit.
236 */
237 vir_for_each_successor(succ, block) {
238 for (int i = 0; i < bitset_words; i++) {
239 BITSET_WORD new_live_out = (succ->live_in[i] &
240 ~block->live_out[i]);
241 if (new_live_out) {
242 block->live_out[i] |= new_live_out;
243 cont = true;
244 }
245 }
246 }
247
248 /* Update live_in */
249 for (int i = 0; i < bitset_words; i++) {
250 BITSET_WORD new_live_in = (block->use[i] |
251 (block->live_out[i] &
252 ~block->def[i]));
253 if (new_live_in & ~block->live_in[i]) {
254 block->live_in[i] |= new_live_in;
255 cont = true;
256 }
257 }
258 }
259
260 return cont;
261 }
262
263 static bool
vir_live_variables_defin_defout_dataflow(struct v3d_compile * c,int bitset_words)264 vir_live_variables_defin_defout_dataflow(struct v3d_compile *c, int bitset_words)
265 {
266 bool cont = false;
267
268 vir_for_each_block_rev(block, c) {
269 /* Propagate defin/defout down the successors to produce the
270 * union of blocks with a reachable (partial) definition of
271 * the var.
272 *
273 * This keeps a conditional first write to a reg from
274 * extending its lifetime back to the start of the program.
275 */
276 vir_for_each_successor(succ, block) {
277 for (int i = 0; i < bitset_words; i++) {
278 BITSET_WORD new_def = (block->defout[i] &
279 ~succ->defin[i]);
280 succ->defin[i] |= new_def;
281 succ->defout[i] |= new_def;
282 cont |= new_def;
283 }
284 }
285 }
286
287 return cont;
288 }
289
290 /**
291 * Extend the start/end ranges for each variable to account for the
292 * new information calculated from control flow.
293 */
294 static void
vir_compute_start_end(struct v3d_compile * c,int num_vars)295 vir_compute_start_end(struct v3d_compile *c, int num_vars)
296 {
297 vir_for_each_block(block, c) {
298 for (int i = 0; i < num_vars; i++) {
299 if (BITSET_TEST(block->live_in, i) &&
300 BITSET_TEST(block->defin, i)) {
301 c->temp_start[i] = MIN2(c->temp_start[i],
302 block->start_ip);
303 c->temp_end[i] = MAX2(c->temp_end[i],
304 block->start_ip);
305 }
306
307 if (BITSET_TEST(block->live_out, i) &&
308 BITSET_TEST(block->defout, i)) {
309 c->temp_start[i] = MIN2(c->temp_start[i],
310 block->end_ip);
311 c->temp_end[i] = MAX2(c->temp_end[i],
312 block->end_ip);
313 }
314 }
315 }
316 }
317
318 void
vir_calculate_live_intervals(struct v3d_compile * c)319 vir_calculate_live_intervals(struct v3d_compile *c)
320 {
321 int bitset_words = BITSET_WORDS(c->num_temps);
322
323 /* We may be called more than once if we've rearranged the program to
324 * try to get register allocation to succeed.
325 */
326 if (c->temp_start) {
327 ralloc_free(c->temp_start);
328 ralloc_free(c->temp_end);
329
330 vir_for_each_block(block, c) {
331 ralloc_free(block->def);
332 ralloc_free(block->use);
333 ralloc_free(block->live_in);
334 ralloc_free(block->live_out);
335 }
336 }
337
338 c->temp_start = rzalloc_array(c, int, c->num_temps);
339 c->temp_end = rzalloc_array(c, int, c->num_temps);
340
341 for (int i = 0; i < c->num_temps; i++) {
342 c->temp_start[i] = MAX_INSTRUCTION;
343 c->temp_end[i] = -1;
344 }
345
346 vir_for_each_block(block, c) {
347 block->def = rzalloc_array(c, BITSET_WORD, bitset_words);
348 block->defin = rzalloc_array(c, BITSET_WORD, bitset_words);
349 block->defout = rzalloc_array(c, BITSET_WORD, bitset_words);
350 block->use = rzalloc_array(c, BITSET_WORD, bitset_words);
351 block->live_in = rzalloc_array(c, BITSET_WORD, bitset_words);
352 block->live_out = rzalloc_array(c, BITSET_WORD, bitset_words);
353 }
354
355 vir_setup_def_use(c);
356
357 while (vir_live_variables_dataflow(c, bitset_words))
358 ;
359
360 while (vir_live_variables_defin_defout_dataflow(c, bitset_words))
361 ;
362
363 vir_compute_start_end(c, c->num_temps);
364
365 c->live_intervals_valid = true;
366 }
367