1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "rogue.h"
25 #include "util/macros.h"
26 #include "util/ralloc.h"
27 #include "util/register_allocate.h"
28 
29 #include <stdbool.h>
30 #include <stdlib.h>
31 
32 /**
33  * \file rogue_regalloc.c
34  *
35  * \brief Contains the rogue_regalloc pass.
36  */
37 
38 /* TODO: Internal register support for high register pressure regs. */
39 
40 typedef struct rogue_live_range {
41    unsigned start;
42    unsigned end;
43 } rogue_live_range;
44 
rogue_regarray_liveness(rogue_regarray * regarray,rogue_live_range * live_range)45 static void rogue_regarray_liveness(rogue_regarray *regarray,
46                                     rogue_live_range *live_range)
47 {
48    assert(list_is_singular(®array->writes) ||
49           list_is_empty(®array->writes));
50    if (!list_is_empty(®array->writes)) {
51       rogue_regarray_write *write =
52          list_first_entry(®array->writes, rogue_regarray_write, link);
53       live_range->start = MIN2(live_range->start, write->instr->index);
54    }
55 
56    rogue_foreach_regarray_use (use, regarray) {
57       live_range->end = MAX2(live_range->end, use->instr->index);
58    }
59 }
60 
rogue_reg_liveness(rogue_reg * reg,rogue_live_range * live_range)61 static void rogue_reg_liveness(rogue_reg *reg, rogue_live_range *live_range)
62 {
63    assert(list_is_singular(®->writes) || list_is_empty(®->writes));
64    if (!list_is_empty(®->writes)) {
65       rogue_reg_write *write =
66          list_first_entry(®->writes, rogue_reg_write, link);
67       live_range->start = MIN2(live_range->start, write->instr->index);
68    }
69 
70    rogue_foreach_reg_use (use, reg) {
71       live_range->end = MAX2(live_range->end, use->instr->index);
72    }
73 }
74 
75 PUBLIC
rogue_regalloc(rogue_shader * shader)76 bool rogue_regalloc(rogue_shader *shader)
77 {
78    if (shader->is_grouped)
79       return false;
80 
81    bool progress = false;
82 
83    unsigned num_ssa_regs = rogue_count_used_regs(shader, ROGUE_REG_CLASS_SSA);
84    if (!num_ssa_regs)
85       return false;
86 
87    assert(list_is_empty(&shader->regs[ROGUE_REG_CLASS_TEMP]));
88    unsigned hw_temps = rogue_reg_infos[ROGUE_REG_CLASS_TEMP].num;
89 
90    /* Setup regset and register classes. */
91    struct ra_regs *ra_regs = ra_alloc_reg_set(shader, hw_temps, true);
92 
93    for (enum rogue_regalloc_class c = 0; c < ROGUE_REGALLOC_CLASS_COUNT; ++c) {
94       ASSERTED struct ra_class *ra_class =
95          ra_alloc_contig_reg_class(ra_regs, regalloc_info[c].stride);
96       assert(c == ra_class_index(ra_class));
97 
98       for (unsigned t = 0; t < hw_temps; ++t)
99          if (!(t % regalloc_info[c].stride))
100             ra_class_add_reg(ra_class, t);
101    }
102 
103    ra_set_finalize(ra_regs, NULL);
104 
105    /* TODO: Consider tracking this in the shader itself, i.e. one list for child
106     * regarrays, one for parents. Or, since children are already in a list in
107     * the parent, only have parent regarrays in the shader.
108     */
109 
110    /* Count the parent regarrays. */
111    unsigned num_parent_regarrays = 0;
112    rogue_foreach_regarray (regarray, shader) {
113       if (regarray->parent || regarray->regs[0]->class != ROGUE_REG_CLASS_SSA)
114          continue;
115 
116       ++num_parent_regarrays;
117    }
118 
119    /* Construct list of parent regarrays. */
120    rogue_regarray **parent_regarrays =
121       rzalloc_array_size(ra_regs,
122                          sizeof(*parent_regarrays),
123                          num_parent_regarrays);
124 
125    unsigned ra = 0;
126    rogue_foreach_regarray (regarray, shader) {
127       if (regarray->parent || regarray->regs[0]->class != ROGUE_REG_CLASS_SSA)
128          continue;
129 
130       parent_regarrays[ra++] = regarray;
131    }
132 
133    /* Prepare live ranges. */
134    rogue_live_range *ssa_live_range =
135       rzalloc_array_size(ra_regs, sizeof(*ssa_live_range), num_ssa_regs);
136    for (unsigned u = 0; u < num_ssa_regs; ++u)
137       ssa_live_range[u].start = ~0U;
138 
139    /* Populate live ranges for register arrays. */
140    for (unsigned u = 0; u < num_parent_regarrays; ++u) {
141       rogue_regarray *regarray = parent_regarrays[u];
142       unsigned base_index = regarray->regs[0]->index;
143       rogue_live_range *live_range = &ssa_live_range[base_index];
144 
145       rogue_regarray_liveness(regarray, live_range);
146 
147       rogue_foreach_subarray (subarray, regarray) {
148          rogue_regarray_liveness(subarray, live_range);
149       }
150    }
151 
152    /* Populate live ranges for registers. */
153    rogue_foreach_reg (reg, shader, ROGUE_REG_CLASS_SSA) {
154       if (reg->regarray)
155          continue;
156 
157       rogue_live_range *live_range = &ssa_live_range[reg->index];
158       rogue_reg_liveness(reg, live_range);
159    }
160 
161    struct ra_graph *ra_graph =
162       ra_alloc_interference_graph(ra_regs, num_ssa_regs);
163    ralloc_steal(ra_regs, ra_graph);
164 
165    /* Set register class for regarrays/vectors. */
166    for (unsigned u = 0; u < num_parent_regarrays; ++u) {
167       rogue_regarray *regarray = parent_regarrays[u];
168       unsigned base_index = regarray->regs[0]->index;
169 
170       enum rogue_regalloc_class raclass;
171 
172       if (regarray->size == 2)
173          raclass = ROGUE_REGALLOC_CLASS_TEMP_2;
174       else if (regarray->size == 4)
175          raclass = ROGUE_REGALLOC_CLASS_TEMP_4;
176       else
177          unreachable("Unsupported regarray size.");
178 
179       ra_set_node_class(ra_graph,
180                         base_index,
181                         ra_get_class_from_index(ra_regs, raclass));
182    }
183 
184    /* Set register class for "standalone" registers. */
185    rogue_foreach_reg (reg, shader, ROGUE_REG_CLASS_SSA) {
186       if (reg->regarray)
187          continue;
188 
189       ra_set_node_class(ra_graph,
190                         reg->index,
191                         ra_get_class_from_index(ra_regs,
192                                                 ROGUE_REGALLOC_CLASS_TEMP_1));
193    }
194 
195    /* Build interference graph from overlapping live ranges. */
196    for (unsigned index0 = 0; index0 < num_ssa_regs; ++index0) {
197       rogue_live_range *live_range0 = &ssa_live_range[index0];
198 
199       for (unsigned index1 = 0; index1 < num_ssa_regs; ++index1) {
200          if (index0 == index1)
201             continue;
202 
203          rogue_live_range *live_range1 = &ssa_live_range[index1];
204 
205          /* If the live ranges overlap, those register nodes interfere. */
206          if (!(live_range0->start >= live_range1->end ||
207                live_range1->start >= live_range0->end))
208             ra_add_node_interference(ra_graph, index0, index1);
209       }
210    }
211 
212    /* TODO: Spilling support. */
213    if (!ra_allocate(ra_graph))
214       unreachable("Register allocation failed.");
215 
216    /* Replace SSA regarray registers with allocated physical registers. */
217    for (unsigned u = 0; u < num_parent_regarrays; ++u) {
218       rogue_regarray *regarray = parent_regarrays[u];
219 
220       unsigned base_index = regarray->regs[0]->index;
221       unsigned hw_base_index = ra_get_node_reg(ra_graph, base_index);
222       enum rogue_regalloc_class ra_class =
223          ra_class_index(ra_get_node_class(ra_graph, base_index));
224       enum rogue_reg_class new_class = regalloc_info[ra_class].class;
225 
226       bool used = false;
227       for (unsigned r = 0; r < regarray->size; ++r) {
228          used |= rogue_reg_is_used(shader, new_class, hw_base_index + r);
229 
230          if (used)
231             break;
232       }
233 
234       /* First time using new regarray, modify in place. */
235       if (!used) {
236          progress |=
237             rogue_regarray_rewrite(shader, regarray, new_class, hw_base_index);
238       } else {
239          /* Regarray has already been used, replace references and delete. */
240 
241          /* Replace parent regarray first. */
242          rogue_regarray *new_regarray = rogue_regarray_cached(shader,
243                                                               regarray->size,
244                                                               new_class,
245                                                               hw_base_index);
246          progress |= rogue_regarray_replace(shader, regarray, new_regarray);
247       }
248    }
249 
250    /* Replace remaining standalone SSA registers with allocated physical
251     * registers. */
252    rogue_foreach_reg_safe (reg, shader, ROGUE_REG_CLASS_SSA) {
253       assert(!reg->regarray);
254       unsigned hw_index = ra_get_node_reg(ra_graph, reg->index);
255 
256       enum rogue_regalloc_class ra_class =
257          ra_class_index(ra_get_node_class(ra_graph, reg->index));
258       enum rogue_reg_class new_class = regalloc_info[ra_class].class;
259 
260       /* First time using new register, modify in place. */
261       if (!rogue_reg_is_used(shader, new_class, hw_index)) {
262          progress |= rogue_reg_rewrite(shader, reg, new_class, hw_index);
263       } else {
264          /* Register has already been used, replace references and delete. */
265          assert(list_is_singular(®->writes)); /* SSA reg. */
266          rogue_reg *new_reg = rogue_temp_reg(shader, hw_index);
267          progress |= rogue_reg_replace(reg, new_reg);
268       }
269    }
270 
271 #ifndef NDEBUG
272    /* Ensure that temp regs are continuous from zero, and have no gaps. */
273    unsigned num_temp_regs = list_length(&shader->regs[ROGUE_REG_CLASS_TEMP]);
274    rogue_foreach_reg (reg, shader, ROGUE_REG_CLASS_TEMP) {
275       assert(reg->index < num_temp_regs);
276    }
277 #endif /* NDEBUG */
278 
279    ralloc_free(ra_regs);
280    return progress;
281 }
282