• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2012 Intel Corporation
3  * Copyright © 2016 Broadcom
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #define MAX_INSTRUCTION (1 << 30)
26 
27 #include "util/ralloc.h"
28 #include "util/register_allocate.h"
29 #include "v3d_compiler.h"
30 
31 struct partial_update_state {
32         struct qinst *insts[4];
33         uint8_t channels;
34 };
35 
36 static int
vir_reg_to_var(struct qreg reg)37 vir_reg_to_var(struct qreg reg)
38 {
39         if (reg.file == QFILE_TEMP)
40                 return reg.index;
41 
42         return -1;
43 }
44 
45 static void
vir_setup_use(struct v3d_compile * c,struct qblock * block,int ip,struct qreg src)46 vir_setup_use(struct v3d_compile *c, struct qblock *block, int ip,
47               struct qreg src)
48 {
49         int var = vir_reg_to_var(src);
50         if (var == -1)
51                 return;
52 
53         c->temp_start[var] = MIN2(c->temp_start[var], ip);
54         c->temp_end[var] = MAX2(c->temp_end[var], ip);
55 
56         /* The use[] bitset marks when the block makes
57          * use of a variable without having completely
58          * defined that variable within the block.
59          */
60         if (!BITSET_TEST(block->def, var))
61                 BITSET_SET(block->use, var);
62 }
63 
64 static struct partial_update_state *
get_partial_update_state(struct hash_table * partial_update_ht,struct qinst * inst)65 get_partial_update_state(struct hash_table *partial_update_ht,
66                          struct qinst *inst)
67 {
68         struct hash_entry *entry =
69                 _mesa_hash_table_search(partial_update_ht,
70                                         &inst->dst.index);
71         if (entry)
72                 return entry->data;
73 
74         struct partial_update_state *state =
75                 rzalloc(partial_update_ht, struct partial_update_state);
76 
77         _mesa_hash_table_insert(partial_update_ht, &inst->dst.index, state);
78 
79         return state;
80 }
81 
82 static void
vir_setup_def(struct v3d_compile * c,struct qblock * block,int ip,struct hash_table * partial_update_ht,struct qinst * inst)83 vir_setup_def(struct v3d_compile *c, struct qblock *block, int ip,
84               struct hash_table *partial_update_ht, struct qinst *inst)
85 {
86         if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU)
87                 return;
88 
89         /* The def[] bitset marks when an initialization in a
90          * block completely screens off previous updates of
91          * that variable.
92          */
93         int var = vir_reg_to_var(inst->dst);
94         if (var == -1)
95                 return;
96 
97         c->temp_start[var] = MIN2(c->temp_start[var], ip);
98         c->temp_end[var] = MAX2(c->temp_end[var], ip);
99 
100         /* Mark the block as having a (partial) def of the var. */
101         BITSET_SET(block->defout, var);
102 
103         /* If we've already tracked this as a def that screens off previous
104          * uses, or already used it within the block, there's nothing to do.
105          */
106         if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var))
107                 return;
108 
109         /* Easy, common case: unconditional full register update.*/
110         if ((inst->qpu.flags.ac == V3D_QPU_COND_NONE &&
111              inst->qpu.flags.mc == V3D_QPU_COND_NONE) &&
112             inst->qpu.alu.add.output_pack == V3D_QPU_PACK_NONE &&
113             inst->qpu.alu.mul.output_pack == V3D_QPU_PACK_NONE) {
114                 BITSET_SET(block->def, var);
115                 return;
116         }
117 
118         /* Finally, look at the condition code and packing and mark it as a
119          * def.  We need to make sure that we understand sequences
120          * instructions like:
121          *
122          *     mov.zs t0, t1
123          *     mov.zc t0, t2
124          *
125          * or:
126          *
127          *     mmov t0.8a, t1
128          *     mmov t0.8b, t2
129          *     mmov t0.8c, t3
130          *     mmov t0.8d, t4
131          *
132          * as defining the temp within the block, because otherwise dst's live
133          * range will get extended up the control flow to the top of the
134          * program.
135          */
136         struct partial_update_state *state =
137                 get_partial_update_state(partial_update_ht, inst);
138         uint8_t mask = 0xf; /* XXX vir_channels_written(inst); */
139 
140         if (inst->qpu.flags.ac == V3D_QPU_COND_NONE &&
141             inst->qpu.flags.mc == V3D_QPU_COND_NONE) {
142                 state->channels |= mask;
143         } else {
144                 for (int i = 0; i < 4; i++) {
145                         if (!(mask & (1 << i)))
146                                 continue;
147 
148                         /* XXXif (state->insts[i] &&
149                             state->insts[i]->cond ==
150                             qpu_cond_complement(inst->cond))
151                                 state->channels |= 1 << i;
152                         else
153                         */
154                                 state->insts[i] = inst;
155                 }
156         }
157 
158         if (state->channels == 0xf)
159                 BITSET_SET(block->def, var);
160 }
161 
162 static void
sf_state_clear(struct hash_table * partial_update_ht)163 sf_state_clear(struct hash_table *partial_update_ht)
164 {
165         hash_table_foreach(partial_update_ht, entry) {
166                 struct partial_update_state *state = entry->data;
167 
168                 for (int i = 0; i < 4; i++) {
169                         if (state->insts[i] &&
170                             (state->insts[i]->qpu.flags.ac != V3D_QPU_COND_NONE ||
171                              state->insts[i]->qpu.flags.mc != V3D_QPU_COND_NONE))
172                                 state->insts[i] = NULL;
173                 }
174         }
175 }
176 
177 /* Sets up the def/use arrays for when variables are used-before-defined or
178  * defined-before-used in the block.
179  *
180  * Also initializes the temp_start/temp_end to cover just the instruction IPs
181  * where the variable is used, which will be extended later in
182  * vir_compute_start_end().
183  */
184 static void
vir_setup_def_use(struct v3d_compile * c)185 vir_setup_def_use(struct v3d_compile *c)
186 {
187         struct hash_table *partial_update_ht =
188                 _mesa_hash_table_create(c, _mesa_hash_int, _mesa_key_int_equal);
189         int ip = 0;
190 
191         vir_for_each_block(block, c) {
192                 block->start_ip = ip;
193 
194                 _mesa_hash_table_clear(partial_update_ht, NULL);
195 
196                 vir_for_each_inst(inst, block) {
197                         for (int i = 0; i < vir_get_nsrc(inst); i++)
198                                 vir_setup_use(c, block, ip, inst->src[i]);
199 
200                         vir_setup_def(c, block, ip, partial_update_ht, inst);
201 
202                         if (false /* XXX inst->uf */)
203                                 sf_state_clear(partial_update_ht);
204 
205                         /* Payload registers: r0/1/2 contain W, centroid W,
206                          * and Z at program start.  Register allocation will
207                          * force their nodes to R0/1/2.
208                          */
209                         if (inst->src[0].file == QFILE_REG) {
210                                 switch (inst->src[0].index) {
211                                 case 0:
212                                 case 1:
213                                 case 2:
214                                         c->temp_start[inst->dst.index] = 0;
215                                         break;
216                                 }
217                         }
218 
219                         ip++;
220                 }
221                 block->end_ip = ip;
222         }
223 
224         _mesa_hash_table_destroy(partial_update_ht, NULL);
225 }
226 
227 static bool
vir_live_variables_dataflow(struct v3d_compile * c,int bitset_words)228 vir_live_variables_dataflow(struct v3d_compile *c, int bitset_words)
229 {
230         bool cont = false;
231 
232         vir_for_each_block_rev(block, c) {
233                 /* Update live_out: Any successor using the variable
234                  * on entrance needs us to have the variable live on
235                  * exit.
236                  */
237                 vir_for_each_successor(succ, block) {
238                         for (int i = 0; i < bitset_words; i++) {
239                                 BITSET_WORD new_live_out = (succ->live_in[i] &
240                                                             ~block->live_out[i]);
241                                 if (new_live_out) {
242                                         block->live_out[i] |= new_live_out;
243                                         cont = true;
244                                 }
245                         }
246                 }
247 
248                 /* Update live_in */
249                 for (int i = 0; i < bitset_words; i++) {
250                         BITSET_WORD new_live_in = (block->use[i] |
251                                                    (block->live_out[i] &
252                                                     ~block->def[i]));
253                         if (new_live_in & ~block->live_in[i]) {
254                                 block->live_in[i] |= new_live_in;
255                                 cont = true;
256                         }
257                 }
258         }
259 
260         return cont;
261 }
262 
263 static bool
vir_live_variables_defin_defout_dataflow(struct v3d_compile * c,int bitset_words)264 vir_live_variables_defin_defout_dataflow(struct v3d_compile *c, int bitset_words)
265 {
266         bool cont = false;
267 
268         vir_for_each_block_rev(block, c) {
269                 /* Propagate defin/defout down the successors to produce the
270                  * union of blocks with a reachable (partial) definition of
271                  * the var.
272                  *
273                  * This keeps a conditional first write to a reg from
274                  * extending its lifetime back to the start of the program.
275                  */
276                 vir_for_each_successor(succ, block) {
277                         for (int i = 0; i < bitset_words; i++) {
278                                 BITSET_WORD new_def = (block->defout[i] &
279                                                        ~succ->defin[i]);
280                                 succ->defin[i] |= new_def;
281                                 succ->defout[i] |= new_def;
282                                 cont |= new_def;
283                         }
284                 }
285         }
286 
287         return cont;
288 }
289 
290 /**
291  * Extend the start/end ranges for each variable to account for the
292  * new information calculated from control flow.
293  */
294 static void
vir_compute_start_end(struct v3d_compile * c,int num_vars)295 vir_compute_start_end(struct v3d_compile *c, int num_vars)
296 {
297         vir_for_each_block(block, c) {
298                 for (int i = 0; i < num_vars; i++) {
299                         if (BITSET_TEST(block->live_in, i) &&
300                             BITSET_TEST(block->defin, i)) {
301                                 c->temp_start[i] = MIN2(c->temp_start[i],
302                                                         block->start_ip);
303                                 c->temp_end[i] = MAX2(c->temp_end[i],
304                                                       block->start_ip);
305                         }
306 
307                         if (BITSET_TEST(block->live_out, i) &&
308                             BITSET_TEST(block->defout, i)) {
309                                 c->temp_start[i] = MIN2(c->temp_start[i],
310                                                         block->end_ip);
311                                 c->temp_end[i] = MAX2(c->temp_end[i],
312                                                       block->end_ip);
313                         }
314                 }
315         }
316 }
317 
318 void
vir_calculate_live_intervals(struct v3d_compile * c)319 vir_calculate_live_intervals(struct v3d_compile *c)
320 {
321         int bitset_words = BITSET_WORDS(c->num_temps);
322 
323         /* We may be called more than once if we've rearranged the program to
324          * try to get register allocation to succeed.
325          */
326         if (c->temp_start) {
327                 ralloc_free(c->temp_start);
328                 ralloc_free(c->temp_end);
329 
330                 vir_for_each_block(block, c) {
331                         ralloc_free(block->def);
332                         ralloc_free(block->use);
333                         ralloc_free(block->live_in);
334                         ralloc_free(block->live_out);
335                 }
336         }
337 
338         c->temp_start = rzalloc_array(c, int, c->num_temps);
339         c->temp_end = rzalloc_array(c, int, c->num_temps);
340 
341         for (int i = 0; i < c->num_temps; i++) {
342                 c->temp_start[i] = MAX_INSTRUCTION;
343                 c->temp_end[i] = -1;
344         }
345 
346         vir_for_each_block(block, c) {
347                 block->def = rzalloc_array(c, BITSET_WORD, bitset_words);
348                 block->defin = rzalloc_array(c, BITSET_WORD, bitset_words);
349                 block->defout = rzalloc_array(c, BITSET_WORD, bitset_words);
350                 block->use = rzalloc_array(c, BITSET_WORD, bitset_words);
351                 block->live_in = rzalloc_array(c, BITSET_WORD, bitset_words);
352                 block->live_out = rzalloc_array(c, BITSET_WORD, bitset_words);
353         }
354 
355         vir_setup_def_use(c);
356 
357         while (vir_live_variables_dataflow(c, bitset_words))
358                 ;
359 
360         while (vir_live_variables_defin_defout_dataflow(c, bitset_words))
361                 ;
362 
363         vir_compute_start_end(c, c->num_temps);
364 
365         c->live_intervals_valid = true;
366 }
367