1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include "util/ralloc.h"
26 #include "util/register_allocate.h"
27 #include "util/u_debug.h"
28
29 #include "ppir.h"
30 #include "lima_context.h"
31
32 #define PPIR_FULL_REG_NUM 6
33
34 #define PPIR_VEC1_REG_NUM (PPIR_FULL_REG_NUM * 4) /* x, y, z, w */
35 #define PPIR_VEC2_REG_NUM (PPIR_FULL_REG_NUM * 3) /* xy, yz, zw */
36 #define PPIR_VEC3_REG_NUM (PPIR_FULL_REG_NUM * 2) /* xyz, yzw */
37 #define PPIR_VEC4_REG_NUM PPIR_FULL_REG_NUM /* xyzw */
38 #define PPIR_HEAD_VEC1_REG_NUM PPIR_FULL_REG_NUM /* x */
39 #define PPIR_HEAD_VEC2_REG_NUM PPIR_FULL_REG_NUM /* xy */
40 #define PPIR_HEAD_VEC3_REG_NUM PPIR_FULL_REG_NUM /* xyz */
41 #define PPIR_HEAD_VEC4_REG_NUM PPIR_FULL_REG_NUM /* xyzw */
42
43 #define PPIR_VEC1_REG_BASE 0
44 #define PPIR_VEC2_REG_BASE (PPIR_VEC1_REG_BASE + PPIR_VEC1_REG_NUM)
45 #define PPIR_VEC3_REG_BASE (PPIR_VEC2_REG_BASE + PPIR_VEC2_REG_NUM)
46 #define PPIR_VEC4_REG_BASE (PPIR_VEC3_REG_BASE + PPIR_VEC3_REG_NUM)
47 #define PPIR_HEAD_VEC1_REG_BASE (PPIR_VEC4_REG_BASE + PPIR_VEC4_REG_NUM)
48 #define PPIR_HEAD_VEC2_REG_BASE (PPIR_HEAD_VEC1_REG_BASE + PPIR_HEAD_VEC1_REG_NUM)
49 #define PPIR_HEAD_VEC3_REG_BASE (PPIR_HEAD_VEC2_REG_BASE + PPIR_HEAD_VEC2_REG_NUM)
50 #define PPIR_HEAD_VEC4_REG_BASE (PPIR_HEAD_VEC3_REG_BASE + PPIR_HEAD_VEC3_REG_NUM)
51 #define PPIR_REG_COUNT (PPIR_HEAD_VEC4_REG_BASE + PPIR_HEAD_VEC4_REG_NUM)
52
53 enum ppir_ra_reg_class {
54 ppir_ra_reg_class_vec1,
55 ppir_ra_reg_class_vec2,
56 ppir_ra_reg_class_vec3,
57 ppir_ra_reg_class_vec4,
58
59 /* 4 reg class for load/store instr regs:
60 * load/store instr has no swizzle field, so the (virtual) register
61 * must be allocated at the beginning of a (physical) register,
62 */
63 ppir_ra_reg_class_head_vec1,
64 ppir_ra_reg_class_head_vec2,
65 ppir_ra_reg_class_head_vec3,
66 ppir_ra_reg_class_head_vec4,
67
68 ppir_ra_reg_class_num,
69 };
70
71 static const int ppir_ra_reg_base[ppir_ra_reg_class_num + 1] = {
72 [ppir_ra_reg_class_vec1] = PPIR_VEC1_REG_BASE,
73 [ppir_ra_reg_class_vec2] = PPIR_VEC2_REG_BASE,
74 [ppir_ra_reg_class_vec3] = PPIR_VEC3_REG_BASE,
75 [ppir_ra_reg_class_vec4] = PPIR_VEC4_REG_BASE,
76 [ppir_ra_reg_class_head_vec1] = PPIR_HEAD_VEC1_REG_BASE,
77 [ppir_ra_reg_class_head_vec2] = PPIR_HEAD_VEC2_REG_BASE,
78 [ppir_ra_reg_class_head_vec3] = PPIR_HEAD_VEC3_REG_BASE,
79 [ppir_ra_reg_class_head_vec4] = PPIR_HEAD_VEC4_REG_BASE,
80 [ppir_ra_reg_class_num] = PPIR_REG_COUNT,
81 };
82
83 static unsigned int *
84 ppir_ra_reg_q_values[ppir_ra_reg_class_num] = {
85 (unsigned int []) {1, 2, 3, 4, 1, 2, 3, 4},
86 (unsigned int []) {2, 3, 3, 3, 1, 2, 3, 3},
87 (unsigned int []) {2, 2, 2, 2, 1, 2, 2, 2},
88 (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
89 (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
90 (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
91 (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
92 (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
93 };
94
ppir_regalloc_init(void * mem_ctx)95 struct ra_regs *ppir_regalloc_init(void *mem_ctx)
96 {
97 struct ra_regs *ret = ra_alloc_reg_set(mem_ctx, PPIR_REG_COUNT, false);
98 if (!ret)
99 return NULL;
100
101 /* (x, y, z, w) (xy, yz, zw) (xyz, yzw) (xyzw) (x) (xy) (xyz) (xyzw) */
102 static const int class_reg_num[ppir_ra_reg_class_num] = {
103 4, 3, 2, 1, 1, 1, 1, 1,
104 };
105 /* base reg (x, y, z, w) confliction with other regs */
106 for (int h = 0; h < 4; h++) {
107 int base_reg_mask = 1 << h;
108 for (int i = 1; i < ppir_ra_reg_class_num; i++) {
109 int class_reg_base_mask = (1 << ((i % 4) + 1)) - 1;
110 for (int j = 0; j < class_reg_num[i]; j++) {
111 if (base_reg_mask & (class_reg_base_mask << j)) {
112 for (int k = 0; k < PPIR_FULL_REG_NUM; k++) {
113 ra_add_reg_conflict(ret, k * 4 + h,
114 ppir_ra_reg_base[i] + k * class_reg_num[i] + j);
115 }
116 }
117 }
118 }
119 }
120 /* build all other confliction by the base reg confliction */
121 for (int i = 0; i < PPIR_VEC1_REG_NUM; i++)
122 ra_make_reg_conflicts_transitive(ret, i);
123
124 for (int i = 0; i < ppir_ra_reg_class_num; i++)
125 ra_alloc_reg_class(ret);
126
127 int reg_index = 0;
128 for (int i = 0; i < ppir_ra_reg_class_num; i++) {
129 while (reg_index < ppir_ra_reg_base[i + 1])
130 ra_class_add_reg(ret, i, reg_index++);
131 }
132
133 ra_set_finalize(ret, ppir_ra_reg_q_values);
134 return ret;
135 }
136
ppir_regalloc_update_reglist_ssa(ppir_compiler * comp)137 static void ppir_regalloc_update_reglist_ssa(ppir_compiler *comp)
138 {
139 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
140 list_for_each_entry(ppir_node, node, &block->node_list, list) {
141 if (node->is_end)
142 continue;
143
144 if (!node->instr || node->op == ppir_op_const)
145 continue;
146
147 ppir_dest *dest = ppir_node_get_dest(node);
148 if (dest) {
149 ppir_reg *reg = NULL;
150
151 if (dest->type == ppir_target_ssa) {
152 reg = &dest->ssa;
153 list_addtail(®->list, &comp->reg_list);
154 }
155 }
156 }
157 }
158 }
159
get_phy_reg_index(int reg)160 static int get_phy_reg_index(int reg)
161 {
162 int i;
163
164 for (i = 0; i < ppir_ra_reg_class_num; i++) {
165 if (reg < ppir_ra_reg_base[i + 1]) {
166 reg -= ppir_ra_reg_base[i];
167 break;
168 }
169 }
170
171 if (i < ppir_ra_reg_class_head_vec1)
172 return reg / (4 - i) * 4 + reg % (4 - i);
173 else
174 return reg * 4;
175 }
176
ppir_regalloc_print_result(ppir_compiler * comp)177 static void ppir_regalloc_print_result(ppir_compiler *comp)
178 {
179 printf("======ppir regalloc result======\n");
180 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
181 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
182 printf("%03d:", instr->index);
183 for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
184 ppir_node *node = instr->slots[i];
185 if (!node)
186 continue;
187
188 printf(" (%d|", node->index);
189
190 ppir_dest *dest = ppir_node_get_dest(node);
191 if (dest)
192 printf("%d", ppir_target_get_dest_reg_index(dest));
193
194 printf("|");
195
196 for (int i = 0; i < ppir_node_get_src_num(node); i++) {
197 if (i)
198 printf(" ");
199 printf("%d", ppir_target_get_src_reg_index(ppir_node_get_src(node, i)));
200 }
201
202 printf(")");
203 }
204 printf("\n");
205 }
206 }
207 printf("--------------------------\n");
208 }
209
create_new_instr_after(ppir_block * block,ppir_instr * ref,ppir_node * node)210 static bool create_new_instr_after(ppir_block *block, ppir_instr *ref,
211 ppir_node *node)
212 {
213 ppir_instr *newinstr = ppir_instr_create(block);
214 if (unlikely(!newinstr))
215 return false;
216
217 list_del(&newinstr->list);
218 list_add(&newinstr->list, &ref->list);
219
220 if (!ppir_instr_insert_node(newinstr, node))
221 return false;
222
223 list_for_each_entry_from(ppir_instr, instr, ref, &block->instr_list, list) {
224 instr->seq++;
225 }
226 newinstr->seq = ref->seq+1;
227 newinstr->scheduled = true;
228 return true;
229 }
230
create_new_instr_before(ppir_block * block,ppir_instr * ref,ppir_node * node)231 static bool create_new_instr_before(ppir_block *block, ppir_instr *ref,
232 ppir_node *node)
233 {
234 ppir_instr *newinstr = ppir_instr_create(block);
235 if (unlikely(!newinstr))
236 return false;
237
238 list_del(&newinstr->list);
239 list_addtail(&newinstr->list, &ref->list);
240
241 if (!ppir_instr_insert_node(newinstr, node))
242 return false;
243
244 list_for_each_entry_from(ppir_instr, instr, ref, &block->instr_list, list) {
245 instr->seq++;
246 }
247 newinstr->seq = ref->seq-1;
248 newinstr->scheduled = true;
249 return true;
250 }
251
ppir_update_spilled_src(ppir_compiler * comp,ppir_block * block,ppir_node * node,ppir_src * src,ppir_node ** fill_node)252 static bool ppir_update_spilled_src(ppir_compiler *comp, ppir_block *block,
253 ppir_node *node, ppir_src *src,
254 ppir_node **fill_node)
255 {
256 /* nodes might have multiple references to the same value.
257 * avoid creating unnecessary loads for the same fill by
258 * saving the node resulting from the temporary load */
259 if (*fill_node)
260 goto update_src;
261
262 int num_components = src->reg->num_components;
263
264 /* alloc new node to load value */
265 ppir_node *load_node = ppir_node_create(block, ppir_op_load_temp, -1, 0);
266 if (!load_node)
267 return false;
268 list_addtail(&load_node->list, &node->list);
269 comp->num_fills++;
270
271 ppir_load_node *load = ppir_node_to_load(load_node);
272
273 load->index = -comp->prog->stack_size; /* index sizes are negative */
274 load->num_components = num_components;
275
276 ppir_dest *ld_dest = &load->dest;
277 ld_dest->type = ppir_target_pipeline;
278 ld_dest->pipeline = ppir_pipeline_reg_uniform;
279 ld_dest->write_mask = u_bit_consecutive(0, num_components);
280
281 /* If the uniform slot is empty, we can insert the load_temp
282 * there and use it directly. Exceptionally, if the node is in the
283 * varying or texld slot, this doesn't work. */
284 if (!node->instr->slots[PPIR_INSTR_SLOT_UNIFORM] &&
285 node->instr_pos != PPIR_INSTR_SLOT_VARYING &&
286 node->instr_pos != PPIR_INSTR_SLOT_TEXLD) {
287 ppir_node_target_assign(src, load_node);
288 *fill_node = load_node;
289 return ppir_instr_insert_node(node->instr, load_node);
290 }
291
292 /* Uniform slot was taken, so fall back to a new instruction with a mov */
293 if (!create_new_instr_before(block, node->instr, load_node))
294 return false;
295
296 /* Create move node */
297 ppir_node *move_node = ppir_node_create(block, ppir_op_mov, -1 , 0);
298 if (unlikely(!move_node))
299 return false;
300 list_addtail(&move_node->list, &node->list);
301
302 ppir_alu_node *move_alu = ppir_node_to_alu(move_node);
303
304 move_alu->num_src = 1;
305 move_alu->src->type = ppir_target_pipeline;
306 move_alu->src->pipeline = ppir_pipeline_reg_uniform;
307 for (int i = 0; i < 4; i++)
308 move_alu->src->swizzle[i] = i;
309
310 ppir_dest *alu_dest = &move_alu->dest;
311 alu_dest->type = ppir_target_ssa;
312 alu_dest->ssa.num_components = num_components;
313 alu_dest->ssa.spilled = true;
314 alu_dest->write_mask = u_bit_consecutive(0, num_components);
315
316 list_addtail(&alu_dest->ssa.list, &comp->reg_list);
317
318 if (!ppir_instr_insert_node(load_node->instr, move_node))
319 return false;
320
321 /* insert the new node as predecessor */
322 ppir_node_foreach_pred_safe(node, dep) {
323 ppir_node *pred = dep->pred;
324 ppir_node_remove_dep(dep);
325 ppir_node_add_dep(load_node, pred, ppir_dep_src);
326 }
327 ppir_node_add_dep(node, move_node, ppir_dep_src);
328 ppir_node_add_dep(move_node, load_node, ppir_dep_src);
329
330 *fill_node = move_node;
331
332 update_src:
333 /* switch node src to use the fill node dest */
334 ppir_node_target_assign(src, *fill_node);
335
336 return true;
337 }
338
ppir_update_spilled_dest_load(ppir_compiler * comp,ppir_block * block,ppir_node * node)339 static bool ppir_update_spilled_dest_load(ppir_compiler *comp, ppir_block *block,
340 ppir_node *node)
341 {
342 ppir_dest *dest = ppir_node_get_dest(node);
343 assert(dest != NULL);
344 assert(dest->type == ppir_target_register);
345 ppir_reg *reg = dest->reg;
346 int num_components = reg->num_components;
347
348 /* alloc new node to load value */
349 ppir_node *load_node = ppir_node_create(block, ppir_op_load_temp, -1, 0);
350 if (!load_node)
351 return NULL;
352 list_addtail(&load_node->list, &node->list);
353 comp->num_fills++;
354
355 ppir_load_node *load = ppir_node_to_load(load_node);
356
357 load->index = -comp->prog->stack_size; /* index sizes are negative */
358 load->num_components = num_components;
359
360 load->dest.type = ppir_target_pipeline;
361 load->dest.pipeline = ppir_pipeline_reg_uniform;
362 load->dest.write_mask = u_bit_consecutive(0, num_components);
363
364 /* New instruction is needed since we're updating a dest register
365 * and we can't write to the uniform pipeline reg */
366 if (!create_new_instr_before(block, node->instr, load_node))
367 return false;
368
369 /* Create move node */
370 ppir_node *move_node = ppir_node_create(block, ppir_op_mov, -1 , 0);
371 if (unlikely(!move_node))
372 return false;
373 list_addtail(&move_node->list, &node->list);
374
375 ppir_alu_node *move_alu = ppir_node_to_alu(move_node);
376
377 move_alu->num_src = 1;
378 move_alu->src->type = ppir_target_pipeline;
379 move_alu->src->pipeline = ppir_pipeline_reg_uniform;
380 for (int i = 0; i < 4; i++)
381 move_alu->src->swizzle[i] = i;
382
383 move_alu->dest.type = ppir_target_register;
384 move_alu->dest.reg = reg;
385 move_alu->dest.write_mask = u_bit_consecutive(0, num_components);
386
387 if (!ppir_instr_insert_node(load_node->instr, move_node))
388 return false;
389
390 ppir_node_foreach_pred_safe(node, dep) {
391 ppir_node *pred = dep->pred;
392 ppir_node_remove_dep(dep);
393 ppir_node_add_dep(load_node, pred, ppir_dep_src);
394 }
395 ppir_node_add_dep(node, move_node, ppir_dep_src);
396 ppir_node_add_dep(move_node, load_node, ppir_dep_src);
397
398 return true;
399 }
400
ppir_update_spilled_dest(ppir_compiler * comp,ppir_block * block,ppir_node * node)401 static bool ppir_update_spilled_dest(ppir_compiler *comp, ppir_block *block,
402 ppir_node *node)
403 {
404 ppir_dest *dest = ppir_node_get_dest(node);
405 assert(dest != NULL);
406 ppir_reg *reg = ppir_dest_get_reg(dest);
407
408 /* alloc new node to store value */
409 ppir_node *store_node = ppir_node_create(block, ppir_op_store_temp, -1, 0);
410 if (!store_node)
411 return false;
412 list_addtail(&store_node->list, &node->list);
413 comp->num_spills++;
414
415 ppir_store_node *store = ppir_node_to_store(store_node);
416
417 store->index = -comp->prog->stack_size; /* index sizes are negative */
418
419 ppir_node_target_assign(&store->src, node);
420 store->num_components = reg->num_components;
421
422 /* insert the new node as successor */
423 ppir_node_foreach_succ_safe(node, dep) {
424 ppir_node *succ = dep->succ;
425 ppir_node_remove_dep(dep);
426 ppir_node_add_dep(succ, store_node, ppir_dep_src);
427 }
428 ppir_node_add_dep(store_node, node, ppir_dep_src);
429
430 /* If the store temp slot is empty, we can insert the store_temp
431 * there and use it directly. Exceptionally, if the node is in the
432 * combine slot, this doesn't work. */
433 if (!node->instr->slots[PPIR_INSTR_SLOT_STORE_TEMP] &&
434 node->instr_pos != PPIR_INSTR_SLOT_ALU_COMBINE)
435 return ppir_instr_insert_node(node->instr, store_node);
436
437 /* Not possible to merge store, so fall back to a new instruction */
438 return create_new_instr_after(block, node->instr, store_node);
439 }
440
ppir_regalloc_spill_reg(ppir_compiler * comp,ppir_reg * chosen)441 static bool ppir_regalloc_spill_reg(ppir_compiler *comp, ppir_reg *chosen)
442 {
443 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
444 list_for_each_entry(ppir_node, node, &block->node_list, list) {
445
446 ppir_dest *dest = ppir_node_get_dest(node);
447 if (dest && ppir_dest_get_reg(dest) == chosen) {
448 /* If dest is a register, it might be updating only some its
449 * components, so need to load the existing value first */
450 if (dest->type == ppir_target_register) {
451 if (!ppir_update_spilled_dest_load(comp, block, node))
452 return false;
453 }
454 if (!ppir_update_spilled_dest(comp, block, node))
455 return false;
456 }
457
458 ppir_node *fill_node = NULL;
459 /* nodes might have multiple references to the same value.
460 * avoid creating unnecessary loads for the same fill by
461 * saving the node resulting from the temporary load */
462 for (int i = 0; i < ppir_node_get_src_num(node); i++) {
463 ppir_src *src = ppir_node_get_src(node, i);
464 ppir_reg *reg = ppir_src_get_reg(src);
465 if (reg == chosen) {
466 if (!ppir_update_spilled_src(comp, block, node, src, &fill_node))
467 return false;
468 }
469 }
470 }
471 }
472
473 return true;
474 }
475
ppir_regalloc_choose_spill_node(ppir_compiler * comp,struct ra_graph * g)476 static ppir_reg *ppir_regalloc_choose_spill_node(ppir_compiler *comp,
477 struct ra_graph *g)
478 {
479 float spill_costs[list_length(&comp->reg_list)];
480 /* experimentally determined, it seems to be worth scaling cost of
481 * regs in instructions that have used uniform/store_temp slots,
482 * but not too much as to offset the num_components base cost. */
483 const float slot_scale = 1.1f;
484
485 list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
486 if (reg->spilled) {
487 /* not considered for spilling */
488 spill_costs[reg->regalloc_index] = 0.0f;
489 continue;
490 }
491
492 /* It is beneficial to spill registers with higher component number,
493 * so increase the cost of spilling registers with few components */
494 float spill_cost = 4.0f / (float)reg->num_components;
495 spill_costs[reg->regalloc_index] = spill_cost;
496 }
497
498 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
499 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
500 if (instr->slots[PPIR_INSTR_SLOT_UNIFORM]) {
501 for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
502 ppir_node *node = instr->slots[i];
503 if (!node)
504 continue;
505 for (int j = 0; j < ppir_node_get_src_num(node); j++) {
506 ppir_src *src = ppir_node_get_src(node, j);
507 if (!src)
508 continue;
509 ppir_reg *reg = ppir_src_get_reg(src);
510 if (!reg)
511 continue;
512
513 spill_costs[reg->regalloc_index] *= slot_scale;
514 }
515 }
516 }
517 if (instr->slots[PPIR_INSTR_SLOT_STORE_TEMP]) {
518 for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
519 ppir_node *node = instr->slots[i];
520 if (!node)
521 continue;
522 ppir_dest *dest = ppir_node_get_dest(node);
523 if (!dest)
524 continue;
525 ppir_reg *reg = ppir_dest_get_reg(dest);
526 if (!reg)
527 continue;
528
529 spill_costs[reg->regalloc_index] *= slot_scale;
530 }
531 }
532 }
533 }
534
535 for (int i = 0; i < list_length(&comp->reg_list); i++)
536 ra_set_node_spill_cost(g, i, spill_costs[i]);
537
538 int r = ra_get_best_spill_node(g);
539 if (r == -1)
540 return NULL;
541
542 ppir_reg *chosen = NULL;
543 int i = 0;
544 list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
545 if (i++ == r) {
546 chosen = reg;
547 break;
548 }
549 }
550 assert(chosen);
551 chosen->spilled = true;
552 chosen->is_head = true; /* store_temp unable to do swizzle */
553
554 return chosen;
555 }
556
ppir_regalloc_reset_liveness_info(ppir_compiler * comp)557 static void ppir_regalloc_reset_liveness_info(ppir_compiler *comp)
558 {
559 int idx = 0;
560
561 list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
562 reg->regalloc_index = idx++;
563 }
564
565 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
566
567 if (block->live_in)
568 ralloc_free(block->live_in);
569 block->live_in = rzalloc_array(comp,
570 struct ppir_liveness, list_length(&comp->reg_list));
571
572 if (block->live_in_set)
573 _mesa_set_destroy(block->live_in_set, NULL);
574 block->live_in_set = _mesa_set_create(comp,
575 _mesa_hash_pointer,
576 _mesa_key_pointer_equal);
577
578 if (block->live_out)
579 ralloc_free(block->live_out);
580 block->live_out = rzalloc_array(comp,
581 struct ppir_liveness, list_length(&comp->reg_list));
582
583 if (block->live_out_set)
584 _mesa_set_destroy(block->live_out_set, NULL);
585 block->live_out_set = _mesa_set_create(comp,
586 _mesa_hash_pointer,
587 _mesa_key_pointer_equal);
588
589 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
590
591 if (instr->live_in)
592 ralloc_free(instr->live_in);
593 instr->live_in = rzalloc_array(comp,
594 struct ppir_liveness, list_length(&comp->reg_list));
595
596 if (instr->live_in_set)
597 _mesa_set_destroy(instr->live_in_set, NULL);
598 instr->live_in_set = _mesa_set_create(comp,
599 _mesa_hash_pointer,
600 _mesa_key_pointer_equal);
601
602 if (instr->live_internal)
603 ralloc_free(instr->live_internal);
604 instr->live_internal = rzalloc_array(comp,
605 struct ppir_liveness, list_length(&comp->reg_list));
606
607 if (instr->live_internal_set)
608 _mesa_set_destroy(instr->live_internal_set, NULL);
609 instr->live_internal_set = _mesa_set_create(comp,
610 _mesa_hash_pointer,
611 _mesa_key_pointer_equal);
612
613 if (instr->live_out)
614 ralloc_free(instr->live_out);
615 instr->live_out = rzalloc_array(comp,
616 struct ppir_liveness, list_length(&comp->reg_list));
617
618 if (instr->live_out_set)
619 _mesa_set_destroy(instr->live_out_set, NULL);
620 instr->live_out_set = _mesa_set_create(comp,
621 _mesa_hash_pointer,
622 _mesa_key_pointer_equal);
623 }
624 }
625 }
626
ppir_all_interference(ppir_compiler * comp,struct ra_graph * g,struct set * liveness)627 static void ppir_all_interference(ppir_compiler *comp, struct ra_graph *g,
628 struct set *liveness)
629 {
630 set_foreach(liveness, entry1) {
631 set_foreach(liveness, entry2) {
632 const struct ppir_liveness *r1 = entry1->key;
633 const struct ppir_liveness *r2 = entry2->key;
634 ra_add_node_interference(g, r1->reg->regalloc_index,
635 r2->reg->regalloc_index);
636 }
637 _mesa_set_remove(liveness, entry1);
638 }
639 }
640
641 int lima_ppir_force_spilling = 0;
642
ppir_regalloc_prog_try(ppir_compiler * comp,bool * spilled)643 static bool ppir_regalloc_prog_try(ppir_compiler *comp, bool *spilled)
644 {
645 ppir_regalloc_reset_liveness_info(comp);
646
647 struct ra_graph *g = ra_alloc_interference_graph(
648 comp->ra, list_length(&comp->reg_list));
649
650 int n = 0;
651 list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
652 int c = ppir_ra_reg_class_vec1 + (reg->num_components - 1);
653 if (reg->is_head)
654 c += 4;
655 ra_set_node_class(g, n++, c);
656 }
657
658 ppir_liveness_analysis(comp);
659
660 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
661 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
662 set_foreach(instr->live_internal_set, entry) {
663 _mesa_set_add(instr->live_in_set, entry->key);
664 _mesa_set_add(instr->live_out_set, entry->key);
665 }
666 ppir_all_interference(comp, g, instr->live_in_set);
667 ppir_all_interference(comp, g, instr->live_out_set);
668 }
669 }
670
671 *spilled = false;
672 bool ok = ra_allocate(g);
673 if (!ok || (comp->force_spilling-- > 0)) {
674 ppir_reg *chosen = ppir_regalloc_choose_spill_node(comp, g);
675 if (chosen) {
676 /* stack_size will be used to assemble the frame reg in lima_draw.
677 * It is also be used in the spilling code, as negative indices
678 * starting from -1, to create stack addresses. */
679 comp->prog->stack_size++;
680 if (!ppir_regalloc_spill_reg(comp, chosen))
681 goto err_out;
682 /* Ask the outer loop to call back in. */
683 *spilled = true;
684
685 ppir_debug("spilled register %d/%d, num_components: %d\n",
686 chosen->regalloc_index, list_length(&comp->reg_list),
687 chosen->num_components);
688 goto err_out;
689 }
690
691 ppir_error("regalloc fail\n");
692 goto err_out;
693 }
694
695 n = 0;
696 list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
697 int reg_index = ra_get_node_reg(g, n++);
698 reg->index = get_phy_reg_index(reg_index);
699 }
700
701 ralloc_free(g);
702
703 if (lima_debug & LIMA_DEBUG_PP)
704 ppir_regalloc_print_result(comp);
705
706 return true;
707
708 err_out:
709 ralloc_free(g);
710 return false;
711 }
712
ppir_regalloc_prog(ppir_compiler * comp)713 bool ppir_regalloc_prog(ppir_compiler *comp)
714 {
715 bool spilled = false;
716 comp->prog->stack_size = 0;
717
718 /* Set from an environment variable to force spilling
719 * for debugging purposes, see lima_screen.c */
720 comp->force_spilling = lima_ppir_force_spilling;
721
722 ppir_regalloc_update_reglist_ssa(comp);
723
724 /* No registers? Probably shader consists of discard instruction */
725 if (list_is_empty(&comp->reg_list))
726 return true;
727
728 /* this will most likely succeed in the first
729 * try, except for very complicated shaders */
730 while (!ppir_regalloc_prog_try(comp, &spilled))
731 if (!spilled)
732 return false;
733
734 return true;
735 }
736