1 /*
2 * Copyright © 2024 Imagination Technologies Ltd.
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 /**
8 * \file pco_ra.c
9 *
10 * \brief PCO register allocator.
11 */
12
13 #include "hwdef/rogue_hw_utils.h"
14 #include "pco.h"
15 #include "pco_builder.h"
16 #include "pco_internal.h"
17 #include "util/bitset.h"
18 #include "util/hash_table.h"
19 #include "util/macros.h"
20 #include "util/register_allocate.h"
21 #include "util/sparse_array.h"
22 #include "util/u_dynarray.h"
23
24 #include <assert.h>
25 #include <inttypes.h>
26 #include <stdbool.h>
27 #include <stdint.h>
28
29 /** Live range of an SSA variable. */
30 struct live_range {
31 unsigned start;
32 unsigned end;
33 };
34
35 /** Vector override information. */
36 struct vec_override {
37 pco_ref ref;
38 unsigned offset;
39 };
40
41 /**
42 * \brief Performs register allocation on a function.
43 *
44 * \param[in,out] func PCO shader.
45 * \param[in] allocable_temps Number of allocatable temp registers.
46 * \param[in] allocable_vtxins Number of allocatable vertex input registers.
47 * \param[in] allocable_interns Number of allocatable internal registers.
48 * \return True if registers were allocated.
49 */
pco_ra_func(pco_func * func,unsigned allocable_temps,unsigned allocable_vtxins,unsigned allocable_interns)50 static bool pco_ra_func(pco_func *func,
51 unsigned allocable_temps,
52 unsigned allocable_vtxins,
53 unsigned allocable_interns)
54 {
55 /* TODO: support multiple functions and calls. */
56 assert(func->type == PCO_FUNC_TYPE_ENTRYPOINT);
57
58 /* TODO: loop lifetime extension.
59 * TODO: track successors/predecessors.
60 */
61
62 /* Collect used bit sizes. */
63 uint8_t ssa_bits = 0;
64 pco_foreach_instr_in_func (instr, func) {
65 pco_foreach_instr_dest_ssa (pdest, instr) {
66 ssa_bits |= (1 << pdest->bits);
67 }
68 }
69
70 /* No registers to allocate. */
71 if (!ssa_bits)
72 return false;
73
74 /* 64-bit SSA should've been lowered by now. */
75 assert(!(ssa_bits & (1 << PCO_BITS_64)));
76
77 /* TODO: support multiple bit sizes. */
78 bool only_32bit = ssa_bits == (1 << PCO_BITS_32);
79 assert(only_32bit);
80
81 struct ra_regs *ra_regs =
82 ra_alloc_reg_set(func, allocable_temps, !only_32bit);
83
84 /* Overrides for vector coalescing. */
85 struct hash_table_u64 *overrides = _mesa_hash_table_u64_create(ra_regs);
86 pco_foreach_instr_in_func_rev (instr, func) {
87 if (instr->op != PCO_OP_VEC)
88 continue;
89
90 pco_ref dest = instr->dest[0];
91 unsigned offset = 0;
92
93 struct vec_override *src_override =
94 _mesa_hash_table_u64_search(overrides, dest.val);
95
96 if (src_override) {
97 dest = src_override->ref;
98 offset += src_override->offset;
99 }
100
101 pco_foreach_instr_src (psrc, instr) {
102 /* TODO: skip if vector producer is used by multiple things in a way
103 * that doesn't allow coalescing. */
104 /* TODO: can NIR scalarise things so that the only remaining vectors
105 * can be used in this way? */
106
107 if (pco_ref_is_ssa(*psrc)) {
108 /* Make sure this hasn't already been overridden somewhere else! */
109 assert(!_mesa_hash_table_u64_search(overrides, psrc->val));
110
111 struct vec_override *src_override =
112 rzalloc_size(overrides, sizeof(*src_override));
113 src_override->ref = dest;
114 src_override->offset = offset;
115
116 _mesa_hash_table_u64_insert(overrides, psrc->val, src_override);
117 }
118
119 offset += pco_ref_get_chans(*psrc);
120 }
121 }
122
123 /* Overrides for vector component uses. */
124 pco_foreach_instr_in_func (instr, func) {
125 if (instr->op != PCO_OP_COMP)
126 continue;
127
128 pco_ref dest = instr->dest[0];
129 pco_ref src = instr->src[0];
130 unsigned offset = pco_ref_get_imm(instr->src[1]);
131
132 assert(pco_ref_is_ssa(src));
133 assert(pco_ref_is_ssa(dest));
134
135 struct vec_override *src_override =
136 rzalloc_size(overrides, sizeof(*src_override));
137 src_override->ref = src;
138 src_override->offset = offset;
139 _mesa_hash_table_u64_insert(overrides, dest.val, src_override);
140 }
141
142 /* Allocate classes. */
143 struct hash_table_u64 *ra_classes = _mesa_hash_table_u64_create(ra_regs);
144 pco_foreach_instr_in_func (instr, func) {
145 pco_foreach_instr_dest_ssa (pdest, instr) {
146 unsigned chans = pco_ref_get_chans(*pdest);
147 /* TODO: bitset instead of search? */
148 if (_mesa_hash_table_u64_search(ra_classes, chans))
149 continue;
150
151 /* Skip if collated. */
152 if (_mesa_hash_table_u64_search(overrides, pdest->val))
153 continue;
154
155 struct ra_class *ra_class = ra_alloc_contig_reg_class(ra_regs, chans);
156 _mesa_hash_table_u64_insert(ra_classes, chans, ra_class);
157 }
158 }
159
160 /* Assign registers to classes. */
161 hash_table_u64_foreach (ra_classes, entry) {
162 const unsigned stride = entry.key;
163 struct ra_class *ra_class = entry.data;
164
165 for (unsigned t = 0; t < allocable_temps - (stride - 1); ++t)
166 ra_class_add_reg(ra_class, t);
167 }
168
169 ra_set_finalize(ra_regs, NULL);
170
171 struct ra_graph *ra_graph =
172 ra_alloc_interference_graph(ra_regs, func->next_ssa);
173 ralloc_steal(ra_regs, ra_graph);
174
175 /* Allocate and calculate live ranges. */
176 struct live_range *live_ranges =
177 rzalloc_array_size(ra_regs, sizeof(*live_ranges), func->next_ssa);
178
179 for (unsigned u = 0; u < func->next_ssa; ++u)
180 live_ranges[u].start = ~0U;
181
182 pco_foreach_instr_in_func (instr, func) {
183 pco_foreach_instr_dest_ssa (pdest, instr) {
184 pco_ref dest = *pdest;
185 struct vec_override *override =
186 _mesa_hash_table_u64_search(overrides, dest.val);
187
188 if (override)
189 dest = override->ref;
190
191 live_ranges[dest.val].start =
192 MIN2(live_ranges[dest.val].start, instr->index);
193
194 if (override)
195 continue;
196
197 /* Set class if it hasn't already been set up in an override. */
198 unsigned chans = pco_ref_get_chans(dest);
199 struct ra_class *ra_class =
200 _mesa_hash_table_u64_search(ra_classes, chans);
201 assert(ra_class);
202
203 ra_set_node_class(ra_graph, dest.val, ra_class);
204 }
205
206 pco_foreach_instr_src_ssa (psrc, instr) {
207 pco_ref src = *psrc;
208 struct vec_override *override =
209 _mesa_hash_table_u64_search(overrides, src.val);
210
211 if (override)
212 src = override->ref;
213
214 live_ranges[src.val].end =
215 MAX2(live_ranges[src.val].end, instr->index);
216 }
217 }
218
219 /* Build interference graph from overlapping live ranges. */
220 for (unsigned ssa0 = 0; ssa0 < func->next_ssa; ++ssa0) {
221 for (unsigned ssa1 = ssa0 + 1; ssa1 < func->next_ssa; ++ssa1) {
222 /* If the live ranges overlap, the register nodes interfere. */
223 if ((live_ranges[ssa0].start != ~0U && live_ranges[ssa1].end != ~0U) &&
224 !(live_ranges[ssa0].start >= live_ranges[ssa1].end ||
225 live_ranges[ssa1].start >= live_ranges[ssa0].end)) {
226 ra_add_node_interference(ra_graph, ssa0, ssa1);
227 }
228 }
229 }
230
231 bool allocated = ra_allocate(ra_graph);
232 assert(allocated);
233 /* TODO: spilling. */
234
235 if (PCO_DEBUG_PRINT(RA)) {
236 printf("RA live ranges:\n");
237 for (unsigned u = 0; u < func->next_ssa; ++u)
238 printf(" %%%u: %u, %u\n", u, live_ranges[u].start, live_ranges[u].end);
239
240 if (_mesa_hash_table_u64_num_entries(overrides)) {
241 printf("RA overrides:\n");
242 hash_table_u64_foreach (overrides, entry) {
243 struct vec_override *override = entry.data;
244 printf(" %%%" PRIu64 ": ref = ", entry.key);
245 pco_print_ref(func->parent_shader, override->ref);
246 printf(", offset = %u\n", override->offset);
247 }
248 printf("\n");
249 }
250 }
251
252 /* Replace SSA regs with allocated registers. */
253 unsigned temps = 0;
254 unsigned vtxins = 0;
255 unsigned interns = 0;
256 pco_foreach_instr_in_func_safe (instr, func) {
257 if (PCO_DEBUG_PRINT(RA))
258 pco_print_shader(func->parent_shader, stdout, "ra debug");
259
260 /* Insert movs for scalar components of super vecs. */
261 if (instr->op == PCO_OP_VEC) {
262 pco_builder b =
263 pco_builder_create(func, pco_cursor_before_instr(instr));
264
265 struct vec_override *override =
266 _mesa_hash_table_u64_search(overrides, instr->dest[0].val);
267
268 unsigned offset = override ? override->offset : 0;
269
270 unsigned temp_dest_base =
271 override ? ra_get_node_reg(ra_graph, override->ref.val) + offset
272 : ra_get_node_reg(ra_graph, instr->dest[0].val);
273
274 pco_foreach_instr_src (psrc, instr) {
275 if (pco_ref_is_ssa(*psrc)) {
276 assert(_mesa_hash_table_u64_search(overrides, psrc->val));
277 } else {
278 unsigned chans = pco_ref_get_chans(*psrc);
279
280 for (unsigned u = 0; u < chans; ++u) {
281 pco_ref dest = pco_ref_hwreg(temp_dest_base + offset + u,
282 PCO_REG_CLASS_TEMP);
283 pco_ref src = pco_ref_chans(*psrc, 1);
284 src = pco_ref_offset(src, u);
285
286 pco_mbyp0(&b, dest, src);
287 }
288
289 temps = MAX2(temps, temp_dest_base + offset + chans);
290 }
291
292 offset += pco_ref_get_chans(*psrc);
293 }
294
295 pco_instr_delete(instr);
296 continue;
297 } else if (instr->op == PCO_OP_COMP) {
298 pco_instr_delete(instr);
299 continue;
300 }
301
302 pco_foreach_instr_dest_ssa (pdest, instr) {
303 struct vec_override *override =
304 _mesa_hash_table_u64_search(overrides, pdest->val);
305
306 unsigned val = ra_get_node_reg(ra_graph, pdest->val);
307 unsigned dest_temps = val + pco_ref_get_chans(*pdest);
308 if (override) {
309 val = ra_get_node_reg(ra_graph, override->ref.val);
310 dest_temps = val + pco_ref_get_chans(override->ref);
311 val += override->offset;
312 }
313
314 pdest->type = PCO_REF_TYPE_REG;
315 pdest->reg_class = PCO_REG_CLASS_TEMP;
316 pdest->val = val;
317 temps = MAX2(temps, dest_temps);
318 }
319
320 pco_foreach_instr_src_ssa (psrc, instr) {
321 struct vec_override *override =
322 _mesa_hash_table_u64_search(overrides, psrc->val);
323
324 unsigned val =
325 override
326 ? ra_get_node_reg(ra_graph, override->ref.val) + override->offset
327 : ra_get_node_reg(ra_graph, psrc->val);
328
329 psrc->type = PCO_REF_TYPE_REG;
330 psrc->reg_class = PCO_REG_CLASS_TEMP;
331 psrc->val = val;
332 }
333 }
334
335 ralloc_free(ra_regs);
336
337 func->temps = temps;
338
339 if (PCO_DEBUG_PRINT(RA)) {
340 printf("RA allocated %u temps, %u vtxins, %u interns.\n",
341 temps,
342 vtxins,
343 interns);
344 }
345
346 return true;
347 }
348
349 /**
350 * \brief Register allocation pass.
351 *
352 * \param[in,out] shader PCO shader.
353 * \return True if the pass made progress.
354 */
pco_ra(pco_shader * shader)355 bool pco_ra(pco_shader *shader)
356 {
357 assert(!shader->is_grouped);
358
359 /* Instruction indices need to be ordered for live ranges. */
360 pco_index(shader, true);
361
362 unsigned hw_temps = rogue_get_temps(shader->ctx->dev_info);
363 /* TODO:
364 * unsigned opt_temps = rogue_get_optimal_temps(shader->ctx->dev_info);
365 */
366
367 /* TODO: different number of temps available if preamble/phase change. */
368 /* TODO: different number of temps available if barriers are in use. */
369 /* TODO: support for internal and vtxin registers. */
370 unsigned allocable_temps = hw_temps;
371 unsigned allocable_vtxins = 0;
372 unsigned allocable_interns = 0;
373
374 /* Perform register allocation for each function. */
375 bool progress = false;
376 pco_foreach_func_in_shader (func, shader) {
377 progress |= pco_ra_func(func,
378 allocable_temps,
379 allocable_vtxins,
380 allocable_interns);
381
382 shader->data.common.temps = MAX2(shader->data.common.temps, func->temps);
383 }
384
385 return progress;
386 }
387