1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include "gpir.h"
26 #include "util/u_dynarray.h"
27
28 /* Per-register information */
29
30 struct reg_info {
31 BITSET_WORD *conflicts;
32 struct util_dynarray conflict_list;
33
34 /* Number of conflicts that must be allocated to physical registers.
35 */
36 unsigned phys_conflicts;
37
38 unsigned node_conflicts;
39
40 /* Number of conflicts that can be allocated to either. */
41 unsigned total_conflicts;
42
43 int assigned_color;
44
45 bool visited;
46 };
47
48 struct regalloc_ctx {
49 unsigned bitset_words, num_nodes_and_regs;
50 struct reg_info *registers;
51
52 /* Reusable scratch liveness array */
53 BITSET_WORD *live;
54
55 unsigned *worklist;
56 unsigned worklist_start, worklist_end;
57
58 unsigned *stack;
59 unsigned stack_size;
60
61 gpir_compiler *comp;
62 void *mem_ctx;
63 };
64
65 /* Liveness analysis */
66
propagate_liveness_instr(gpir_node * node,BITSET_WORD * live,gpir_compiler * comp)67 static void propagate_liveness_instr(gpir_node *node, BITSET_WORD *live,
68 gpir_compiler *comp)
69 {
70 /* KILL */
71 if (node->type == gpir_node_type_store) {
72 if (node->op == gpir_op_store_reg) {
73 gpir_store_node *store = gpir_node_to_store(node);
74 BITSET_CLEAR(live, store->reg->index);
75 }
76 }
77
78 /* GEN */
79 if (node->type == gpir_node_type_load) {
80 if (node->op == gpir_op_load_reg) {
81 gpir_load_node *load = gpir_node_to_load(node);
82 BITSET_SET(live, load->reg->index);
83 }
84 }
85 }
86
propagate_liveness_block(gpir_block * block,struct regalloc_ctx * ctx)87 static bool propagate_liveness_block(gpir_block *block, struct regalloc_ctx *ctx)
88 {
89 for (unsigned i = 0; i < 2; i++) {
90 if (block->successors[i]) {
91 for (unsigned j = 0; j < ctx->bitset_words; j++)
92 block->live_out[j] |= block->successors[i]->live_in[j];
93 }
94 }
95
96 memcpy(ctx->live, block->live_out, ctx->bitset_words * sizeof(BITSET_WORD));
97
98 list_for_each_entry_rev(gpir_node, node, &block->node_list, list) {
99 propagate_liveness_instr(node, ctx->live, block->comp);
100 }
101
102 bool changed = false;
103 for (unsigned i = 0; i < ctx->bitset_words; i++) {
104 changed |= (block->live_in[i] != ctx->live[i]);
105 block->live_in[i] = ctx->live[i];
106 }
107 return changed;
108 }
109
calc_def_block(gpir_block * block)110 static void calc_def_block(gpir_block *block)
111 {
112 list_for_each_entry(gpir_node, node, &block->node_list, list) {
113 if (node->op == gpir_op_store_reg) {
114 gpir_store_node *store = gpir_node_to_store(node);
115 BITSET_SET(block->def_out, store->reg->index);
116 }
117 }
118 }
119
calc_liveness(struct regalloc_ctx * ctx)120 static void calc_liveness(struct regalloc_ctx *ctx)
121 {
122 bool changed = true;
123 while (changed) {
124 changed = false;
125 list_for_each_entry_rev(gpir_block, block, &ctx->comp->block_list, list) {
126 changed |= propagate_liveness_block(block, ctx);
127 }
128 }
129
130 list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) {
131 calc_def_block(block);
132 }
133
134 changed = true;
135 while (changed) {
136 changed = false;
137 list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) {
138 for (unsigned i = 0; i < 2; i++) {
139 gpir_block *succ = block->successors[i];
140 if (!succ)
141 continue;
142
143 for (unsigned j = 0; j < ctx->bitset_words; j++) {
144 BITSET_WORD new = block->def_out[j] & ~succ->def_out[j];
145 changed |= (new != 0);
146 succ->def_out[j] |= block->def_out[j];
147 }
148 }
149 }
150 }
151 }
152
153 /* Interference calculation */
154
add_interference(struct regalloc_ctx * ctx,unsigned i,unsigned j)155 static void add_interference(struct regalloc_ctx *ctx, unsigned i, unsigned j)
156 {
157 if (i == j)
158 return;
159
160 struct reg_info *a = &ctx->registers[i];
161 struct reg_info *b = &ctx->registers[j];
162
163 if (BITSET_TEST(a->conflicts, j))
164 return;
165
166 BITSET_SET(a->conflicts, j);
167 BITSET_SET(b->conflicts, i);
168
169 a->total_conflicts++;
170 b->total_conflicts++;
171 if (j < ctx->comp->cur_reg)
172 a->phys_conflicts++;
173 else
174 a->node_conflicts++;
175
176 if (i < ctx->comp->cur_reg)
177 b->phys_conflicts++;
178 else
179 b->node_conflicts++;
180
181 util_dynarray_append(&a->conflict_list, unsigned, j);
182 util_dynarray_append(&b->conflict_list, unsigned, i);
183 }
184
185 /* Make the register or node "i" intefere with all the other live registers
186 * and nodes.
187 */
add_all_interferences(struct regalloc_ctx * ctx,unsigned i,BITSET_WORD * live_nodes,BITSET_WORD * live_regs)188 static void add_all_interferences(struct regalloc_ctx *ctx,
189 unsigned i,
190 BITSET_WORD *live_nodes,
191 BITSET_WORD *live_regs)
192 {
193 int live_node;
194 BITSET_FOREACH_SET(live_node, live_nodes, ctx->comp->cur_index) {
195 add_interference(ctx, i,
196 live_node + ctx->comp->cur_reg);
197 }
198
199 int live_reg;
200 BITSET_FOREACH_SET(live_reg, ctx->live, ctx->comp->cur_index) {
201 add_interference(ctx, i, live_reg);
202 }
203
204 }
205
print_liveness(struct regalloc_ctx * ctx,BITSET_WORD * live_reg,BITSET_WORD * live_val)206 static void print_liveness(struct regalloc_ctx *ctx,
207 BITSET_WORD *live_reg, BITSET_WORD *live_val)
208 {
209 if (!(lima_debug & LIMA_DEBUG_GP))
210 return;
211
212 int live_idx;
213 BITSET_FOREACH_SET(live_idx, live_reg, ctx->comp->cur_reg) {
214 printf("reg%d ", live_idx);
215 }
216 BITSET_FOREACH_SET(live_idx, live_val, ctx->comp->cur_index) {
217 printf("%d ", live_idx);
218 }
219 printf("\n");
220 }
221
calc_interference(struct regalloc_ctx * ctx)222 static void calc_interference(struct regalloc_ctx *ctx)
223 {
224 BITSET_WORD *live_nodes =
225 rzalloc_array(ctx->mem_ctx, BITSET_WORD, ctx->comp->cur_index);
226
227 list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) {
228 /* Initialize liveness at the end of the block, but exclude values that
229 * definitely aren't defined by the end. This helps out with
230 * partially-defined registers, like:
231 *
232 * if (condition) {
233 * foo = ...;
234 * }
235 * if (condition) {
236 * ... = foo;
237 * }
238 *
239 * If we naively propagated liveness backwards, foo would be live from
240 * the beginning of the program, but if we're not inside a loop then
241 * its value is undefined before the first if and we don't have to
242 * consider it live. Mask out registers like foo here.
243 */
244 for (unsigned i = 0; i < ctx->bitset_words; i++) {
245 ctx->live[i] = block->live_out[i] & block->def_out[i];
246 }
247
248 list_for_each_entry_rev(gpir_node, node, &block->node_list, list) {
249 gpir_debug("processing node %d\n", node->index);
250 print_liveness(ctx, ctx->live, live_nodes);
251 if (node->type != gpir_node_type_store &&
252 node->type != gpir_node_type_branch) {
253 add_all_interferences(ctx, node->index + ctx->comp->cur_reg,
254 live_nodes, ctx->live);
255
256 /* KILL */
257 BITSET_CLEAR(live_nodes, node->index);
258 } else if (node->op == gpir_op_store_reg) {
259 gpir_store_node *store = gpir_node_to_store(node);
260 add_all_interferences(ctx, store->reg->index,
261 live_nodes, ctx->live);
262
263 /* KILL */
264 BITSET_CLEAR(ctx->live, store->reg->index);
265 }
266
267 /* GEN */
268 if (node->type == gpir_node_type_store) {
269 gpir_store_node *store = gpir_node_to_store(node);
270 BITSET_SET(live_nodes, store->child->index);
271 } else if (node->type == gpir_node_type_alu) {
272 gpir_alu_node *alu = gpir_node_to_alu(node);
273 for (int i = 0; i < alu->num_child; i++)
274 BITSET_SET(live_nodes, alu->children[i]->index);
275 } else if (node->type == gpir_node_type_branch) {
276 gpir_branch_node *branch = gpir_node_to_branch(node);
277 BITSET_SET(live_nodes, branch->cond->index);
278 } else if (node->op == gpir_op_load_reg) {
279 gpir_load_node *load = gpir_node_to_load(node);
280 BITSET_SET(ctx->live, load->reg->index);
281 }
282 }
283 }
284 }
285
286 /* Register allocation */
287
can_simplify(struct regalloc_ctx * ctx,unsigned i)288 static bool can_simplify(struct regalloc_ctx *ctx, unsigned i)
289 {
290 struct reg_info *info = &ctx->registers[i];
291 if (i < ctx->comp->cur_reg) {
292 /* Physical regs. */
293 return info->phys_conflicts + info->node_conflicts < GPIR_PHYSICAL_REG_NUM;
294 } else {
295 /* Nodes: if we manage to allocate all of its conflicting physical
296 * registers, they will take up at most GPIR_PHYSICAL_REG_NUM colors, so
297 * we can ignore any more than that.
298 */
299 return MIN2(info->phys_conflicts, GPIR_PHYSICAL_REG_NUM) +
300 info->node_conflicts < GPIR_PHYSICAL_REG_NUM + GPIR_VALUE_REG_NUM;
301 }
302 }
303
push_stack(struct regalloc_ctx * ctx,unsigned i)304 static void push_stack(struct regalloc_ctx *ctx, unsigned i)
305 {
306 ctx->stack[ctx->stack_size++] = i;
307 if (i < ctx->comp->cur_reg)
308 gpir_debug("pushing reg%u\n", i);
309 else
310 gpir_debug("pushing %d\n", i - ctx->comp->cur_reg);
311
312 struct reg_info *info = &ctx->registers[i];
313 assert(info->visited);
314
315 util_dynarray_foreach(&info->conflict_list, unsigned, conflict) {
316 struct reg_info *conflict_info = &ctx->registers[*conflict];
317 if (i < ctx->comp->cur_reg) {
318 assert(conflict_info->phys_conflicts > 0);
319 conflict_info->phys_conflicts--;
320 } else {
321 assert(conflict_info->node_conflicts > 0);
322 conflict_info->node_conflicts--;
323 }
324 if (!ctx->registers[*conflict].visited && can_simplify(ctx, *conflict)) {
325 ctx->worklist[ctx->worklist_end++] = *conflict;
326 ctx->registers[*conflict].visited = true;
327 }
328 }
329 }
330
do_regalloc(struct regalloc_ctx * ctx)331 static bool do_regalloc(struct regalloc_ctx *ctx)
332 {
333 ctx->worklist_start = 0;
334 ctx->worklist_end = 0;
335 ctx->stack_size = 0;
336
337 /* Step 1: find the initially simplifiable registers */
338 for (int i = 0; i < ctx->comp->cur_reg + ctx->comp->cur_index; i++) {
339 if (can_simplify(ctx, i)) {
340 ctx->worklist[ctx->worklist_end++] = i;
341 ctx->registers[i].visited = true;
342 }
343 }
344
345 while (true) {
346 /* Step 2: push onto the stack whatever we can */
347 while (ctx->worklist_start != ctx->worklist_end) {
348 push_stack(ctx, ctx->worklist[ctx->worklist_start++]);
349 }
350
351 if (ctx->stack_size < ctx->num_nodes_and_regs) {
352 /* If there are still unsimplifiable nodes left, we need to
353 * optimistically push a node onto the stack. Choose the one with
354 * the smallest number of current neighbors, since that's the most
355 * likely to succeed.
356 */
357 unsigned min_conflicts = UINT_MAX;
358 unsigned best_reg = 0;
359 for (unsigned reg = 0; reg < ctx->num_nodes_and_regs; reg++) {
360 struct reg_info *info = &ctx->registers[reg];
361 if (info->visited)
362 continue;
363 if (info->phys_conflicts + info->node_conflicts < min_conflicts) {
364 best_reg = reg;
365 min_conflicts = info->phys_conflicts + info->node_conflicts;
366 }
367 }
368 gpir_debug("optimistic triggered\n");
369 ctx->registers[best_reg].visited = true;
370 push_stack(ctx, best_reg);
371 } else {
372 break;
373 }
374 }
375
376 /* Step 4: pop off the stack and assign colors */
377 for (int i = ctx->num_nodes_and_regs - 1; i >= 0; i--) {
378 unsigned idx = ctx->stack[i];
379 struct reg_info *reg = &ctx->registers[idx];
380
381 unsigned num_available_regs;
382 if (idx < ctx->comp->cur_reg) {
383 num_available_regs = GPIR_PHYSICAL_REG_NUM;
384 } else {
385 num_available_regs = GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM;
386 }
387
388 bool found = false;
389 unsigned start = i % num_available_regs;
390 for (unsigned j = 0; j < num_available_regs; j++) {
391 unsigned candidate = (j + start) % num_available_regs;
392 bool available = true;
393 util_dynarray_foreach(®->conflict_list, unsigned, conflict_idx) {
394 struct reg_info *conflict = &ctx->registers[*conflict_idx];
395 if (conflict->assigned_color >= 0 &&
396 conflict->assigned_color == (int) candidate) {
397 available = false;
398 break;
399 }
400 }
401
402 if (available) {
403 reg->assigned_color = candidate;
404 found = true;
405 break;
406 }
407 }
408
409 /* TODO: spilling */
410 if (!found) {
411 gpir_error("Failed to allocate registers\n");
412 return false;
413 }
414 }
415
416 return true;
417 }
418
assign_regs(struct regalloc_ctx * ctx)419 static void assign_regs(struct regalloc_ctx *ctx)
420 {
421 list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) {
422 list_for_each_entry(gpir_node, node, &block->node_list, list) {
423 if (node->index >= 0) {
424 node->value_reg =
425 ctx->registers[ctx->comp->cur_reg + node->index].assigned_color;
426 }
427
428 if (node->op == gpir_op_load_reg) {
429 gpir_load_node *load = gpir_node_to_load(node);
430 unsigned color = ctx->registers[load->reg->index].assigned_color;
431 load->index = color / 4;
432 load->component = color % 4;
433 }
434
435 if (node->op == gpir_op_store_reg) {
436 gpir_store_node *store = gpir_node_to_store(node);
437 unsigned color = ctx->registers[store->reg->index].assigned_color;
438 store->index = color / 4;
439 store->component = color % 4;
440 node->value_reg = color;
441 }
442 }
443
444 block->live_out_phys = 0;
445
446 int reg_idx;
447 BITSET_FOREACH_SET(reg_idx, block->live_out, ctx->comp->cur_reg) {
448 if (BITSET_TEST(block->def_out, reg_idx)) {
449 block->live_out_phys |= (1ull << ctx->registers[reg_idx].assigned_color);
450 }
451 }
452 }
453 }
454
regalloc_print_result(gpir_compiler * comp)455 static void regalloc_print_result(gpir_compiler *comp)
456 {
457 if (!(lima_debug & LIMA_DEBUG_GP))
458 return;
459
460 int index = 0;
461 printf("======== regalloc ========\n");
462 list_for_each_entry(gpir_block, block, &comp->block_list, list) {
463 list_for_each_entry(gpir_node, node, &block->node_list, list) {
464 printf("%03d: %d/%d %s ", index++, node->index, node->value_reg,
465 gpir_op_infos[node->op].name);
466 gpir_node_foreach_pred(node, dep) {
467 gpir_node *pred = dep->pred;
468 printf(" %d/%d", pred->index, pred->value_reg);
469 }
470 if (node->op == gpir_op_load_reg) {
471 gpir_load_node *load = gpir_node_to_load(node);
472 printf(" -/%d", 4 * load->index + load->component);
473 printf(" (%d)", load->reg->index);
474 } else if (node->op == gpir_op_store_reg) {
475 gpir_store_node *store = gpir_node_to_store(node);
476 printf(" (%d)", store->reg->index);
477 }
478 printf("\n");
479 }
480 printf("----------------------------\n");
481 }
482 }
483
gpir_regalloc_prog(gpir_compiler * comp)484 bool gpir_regalloc_prog(gpir_compiler *comp)
485 {
486 struct regalloc_ctx ctx;
487
488 ctx.mem_ctx = ralloc_context(NULL);
489 ctx.num_nodes_and_regs = comp->cur_reg + comp->cur_index;
490 ctx.bitset_words = BITSET_WORDS(ctx.num_nodes_and_regs);
491 ctx.live = ralloc_array(ctx.mem_ctx, BITSET_WORD, ctx.bitset_words);
492 ctx.worklist = ralloc_array(ctx.mem_ctx, unsigned, ctx.num_nodes_and_regs);
493 ctx.stack = ralloc_array(ctx.mem_ctx, unsigned, ctx.num_nodes_and_regs);
494 ctx.comp = comp;
495
496 ctx.registers = rzalloc_array(ctx.mem_ctx, struct reg_info, ctx.num_nodes_and_regs);
497 for (unsigned i = 0; i < ctx.num_nodes_and_regs; i++) {
498 ctx.registers[i].conflicts = rzalloc_array(ctx.mem_ctx, BITSET_WORD,
499 ctx.bitset_words);
500 util_dynarray_init(&ctx.registers[i].conflict_list, ctx.mem_ctx);
501 }
502
503 list_for_each_entry(gpir_block, block, &comp->block_list, list) {
504 block->live_out = rzalloc_array(ctx.mem_ctx, BITSET_WORD, ctx.bitset_words);
505 block->live_in = rzalloc_array(ctx.mem_ctx, BITSET_WORD, ctx.bitset_words);
506 block->def_out = rzalloc_array(ctx.mem_ctx, BITSET_WORD, ctx.bitset_words);
507 }
508
509 calc_liveness(&ctx);
510 calc_interference(&ctx);
511 if (!do_regalloc(&ctx)) {
512 ralloc_free(ctx.mem_ctx);
513 return false;
514 }
515 assign_regs(&ctx);
516
517 regalloc_print_result(comp);
518 ralloc_free(ctx.mem_ctx);
519 return true;
520 }
521