• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2024 Imagination Technologies Ltd.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 /**
8  * \file pco_ra.c
9  *
10  * \brief PCO register allocator.
11  */
12 
13 #include "hwdef/rogue_hw_utils.h"
14 #include "pco.h"
15 #include "pco_builder.h"
16 #include "pco_internal.h"
17 #include "util/bitset.h"
18 #include "util/hash_table.h"
19 #include "util/macros.h"
20 #include "util/register_allocate.h"
21 #include "util/sparse_array.h"
22 #include "util/u_dynarray.h"
23 
24 #include <assert.h>
25 #include <inttypes.h>
26 #include <stdbool.h>
27 #include <stdint.h>
28 
29 /** Live range of an SSA variable. */
30 struct live_range {
31    unsigned start;
32    unsigned end;
33 };
34 
35 /** Vector override information. */
36 struct vec_override {
37    pco_ref ref;
38    unsigned offset;
39 };
40 
41 /**
42  * \brief Performs register allocation on a function.
43  *
44  * \param[in,out] func PCO shader.
45  * \param[in] allocable_temps Number of allocatable temp registers.
46  * \param[in] allocable_vtxins Number of allocatable vertex input registers.
47  * \param[in] allocable_interns Number of allocatable internal registers.
48  * \return True if registers were allocated.
49  */
pco_ra_func(pco_func * func,unsigned allocable_temps,unsigned allocable_vtxins,unsigned allocable_interns)50 static bool pco_ra_func(pco_func *func,
51                         unsigned allocable_temps,
52                         unsigned allocable_vtxins,
53                         unsigned allocable_interns)
54 {
55    /* TODO: support multiple functions and calls. */
56    assert(func->type == PCO_FUNC_TYPE_ENTRYPOINT);
57 
58    /* TODO: loop lifetime extension.
59     * TODO: track successors/predecessors.
60     */
61 
62    /* Collect used bit sizes. */
63    uint8_t ssa_bits = 0;
64    pco_foreach_instr_in_func (instr, func) {
65       pco_foreach_instr_dest_ssa (pdest, instr) {
66          ssa_bits |= (1 << pdest->bits);
67       }
68    }
69 
70    /* No registers to allocate. */
71    if (!ssa_bits)
72       return false;
73 
74    /* 64-bit SSA should've been lowered by now. */
75    assert(!(ssa_bits & (1 << PCO_BITS_64)));
76 
77    /* TODO: support multiple bit sizes. */
78    bool only_32bit = ssa_bits == (1 << PCO_BITS_32);
79    assert(only_32bit);
80 
81    struct ra_regs *ra_regs =
82       ra_alloc_reg_set(func, allocable_temps, !only_32bit);
83 
84    /* Overrides for vector coalescing. */
85    struct hash_table_u64 *overrides = _mesa_hash_table_u64_create(ra_regs);
86    pco_foreach_instr_in_func_rev (instr, func) {
87       if (instr->op != PCO_OP_VEC)
88          continue;
89 
90       pco_ref dest = instr->dest[0];
91       unsigned offset = 0;
92 
93       struct vec_override *src_override =
94          _mesa_hash_table_u64_search(overrides, dest.val);
95 
96       if (src_override) {
97          dest = src_override->ref;
98          offset += src_override->offset;
99       }
100 
101       pco_foreach_instr_src (psrc, instr) {
102          /* TODO: skip if vector producer is used by multiple things in a way
103           * that doesn't allow coalescing. */
104          /* TODO: can NIR scalarise things so that the only remaining vectors
105           * can be used in this way? */
106 
107          if (pco_ref_is_ssa(*psrc)) {
108             /* Make sure this hasn't already been overridden somewhere else! */
109             assert(!_mesa_hash_table_u64_search(overrides, psrc->val));
110 
111             struct vec_override *src_override =
112                rzalloc_size(overrides, sizeof(*src_override));
113             src_override->ref = dest;
114             src_override->offset = offset;
115 
116             _mesa_hash_table_u64_insert(overrides, psrc->val, src_override);
117          }
118 
119          offset += pco_ref_get_chans(*psrc);
120       }
121    }
122 
123    /* Overrides for vector component uses. */
124    pco_foreach_instr_in_func (instr, func) {
125       if (instr->op != PCO_OP_COMP)
126          continue;
127 
128       pco_ref dest = instr->dest[0];
129       pco_ref src = instr->src[0];
130       unsigned offset = pco_ref_get_imm(instr->src[1]);
131 
132       assert(pco_ref_is_ssa(src));
133       assert(pco_ref_is_ssa(dest));
134 
135       struct vec_override *src_override =
136          rzalloc_size(overrides, sizeof(*src_override));
137       src_override->ref = src;
138       src_override->offset = offset;
139       _mesa_hash_table_u64_insert(overrides, dest.val, src_override);
140    }
141 
142    /* Allocate classes. */
143    struct hash_table_u64 *ra_classes = _mesa_hash_table_u64_create(ra_regs);
144    pco_foreach_instr_in_func (instr, func) {
145       pco_foreach_instr_dest_ssa (pdest, instr) {
146          unsigned chans = pco_ref_get_chans(*pdest);
147          /* TODO: bitset instead of search? */
148          if (_mesa_hash_table_u64_search(ra_classes, chans))
149             continue;
150 
151          /* Skip if collated. */
152          if (_mesa_hash_table_u64_search(overrides, pdest->val))
153             continue;
154 
155          struct ra_class *ra_class = ra_alloc_contig_reg_class(ra_regs, chans);
156          _mesa_hash_table_u64_insert(ra_classes, chans, ra_class);
157       }
158    }
159 
160    /* Assign registers to classes. */
161    hash_table_u64_foreach (ra_classes, entry) {
162       const unsigned stride = entry.key;
163       struct ra_class *ra_class = entry.data;
164 
165       for (unsigned t = 0; t < allocable_temps - (stride - 1); ++t)
166          ra_class_add_reg(ra_class, t);
167    }
168 
169    ra_set_finalize(ra_regs, NULL);
170 
171    struct ra_graph *ra_graph =
172       ra_alloc_interference_graph(ra_regs, func->next_ssa);
173    ralloc_steal(ra_regs, ra_graph);
174 
175    /* Allocate and calculate live ranges. */
176    struct live_range *live_ranges =
177       rzalloc_array_size(ra_regs, sizeof(*live_ranges), func->next_ssa);
178 
179    for (unsigned u = 0; u < func->next_ssa; ++u)
180       live_ranges[u].start = ~0U;
181 
182    pco_foreach_instr_in_func (instr, func) {
183       pco_foreach_instr_dest_ssa (pdest, instr) {
184          pco_ref dest = *pdest;
185          struct vec_override *override =
186             _mesa_hash_table_u64_search(overrides, dest.val);
187 
188          if (override)
189             dest = override->ref;
190 
191          live_ranges[dest.val].start =
192             MIN2(live_ranges[dest.val].start, instr->index);
193 
194          if (override)
195             continue;
196 
197          /* Set class if it hasn't already been set up in an override. */
198          unsigned chans = pco_ref_get_chans(dest);
199          struct ra_class *ra_class =
200             _mesa_hash_table_u64_search(ra_classes, chans);
201          assert(ra_class);
202 
203          ra_set_node_class(ra_graph, dest.val, ra_class);
204       }
205 
206       pco_foreach_instr_src_ssa (psrc, instr) {
207          pco_ref src = *psrc;
208          struct vec_override *override =
209             _mesa_hash_table_u64_search(overrides, src.val);
210 
211          if (override)
212             src = override->ref;
213 
214          live_ranges[src.val].end =
215             MAX2(live_ranges[src.val].end, instr->index);
216       }
217    }
218 
219    /* Build interference graph from overlapping live ranges. */
220    for (unsigned ssa0 = 0; ssa0 < func->next_ssa; ++ssa0) {
221       for (unsigned ssa1 = ssa0 + 1; ssa1 < func->next_ssa; ++ssa1) {
222          /* If the live ranges overlap, the register nodes interfere. */
223          if ((live_ranges[ssa0].start != ~0U && live_ranges[ssa1].end != ~0U) &&
224              !(live_ranges[ssa0].start >= live_ranges[ssa1].end ||
225                live_ranges[ssa1].start >= live_ranges[ssa0].end)) {
226             ra_add_node_interference(ra_graph, ssa0, ssa1);
227          }
228       }
229    }
230 
231    bool allocated = ra_allocate(ra_graph);
232    assert(allocated);
233    /* TODO: spilling. */
234 
235    if (PCO_DEBUG_PRINT(RA)) {
236       printf("RA live ranges:\n");
237       for (unsigned u = 0; u < func->next_ssa; ++u)
238          printf("  %%%u: %u, %u\n", u, live_ranges[u].start, live_ranges[u].end);
239 
240       if (_mesa_hash_table_u64_num_entries(overrides)) {
241          printf("RA overrides:\n");
242          hash_table_u64_foreach (overrides, entry) {
243             struct vec_override *override = entry.data;
244             printf("  %%%" PRIu64 ": ref = ", entry.key);
245             pco_print_ref(func->parent_shader, override->ref);
246             printf(", offset = %u\n", override->offset);
247          }
248          printf("\n");
249       }
250    }
251 
252    /* Replace SSA regs with allocated registers. */
253    unsigned temps = 0;
254    unsigned vtxins = 0;
255    unsigned interns = 0;
256    pco_foreach_instr_in_func_safe (instr, func) {
257       if (PCO_DEBUG_PRINT(RA))
258          pco_print_shader(func->parent_shader, stdout, "ra debug");
259 
260       /* Insert movs for scalar components of super vecs. */
261       if (instr->op == PCO_OP_VEC) {
262          pco_builder b =
263             pco_builder_create(func, pco_cursor_before_instr(instr));
264 
265          struct vec_override *override =
266             _mesa_hash_table_u64_search(overrides, instr->dest[0].val);
267 
268          unsigned offset = override ? override->offset : 0;
269 
270          unsigned temp_dest_base =
271             override ? ra_get_node_reg(ra_graph, override->ref.val) + offset
272                      : ra_get_node_reg(ra_graph, instr->dest[0].val);
273 
274          pco_foreach_instr_src (psrc, instr) {
275             if (pco_ref_is_ssa(*psrc)) {
276                assert(_mesa_hash_table_u64_search(overrides, psrc->val));
277             } else {
278                unsigned chans = pco_ref_get_chans(*psrc);
279 
280                for (unsigned u = 0; u < chans; ++u) {
281                   pco_ref dest = pco_ref_hwreg(temp_dest_base + offset + u,
282                                                PCO_REG_CLASS_TEMP);
283                   pco_ref src = pco_ref_chans(*psrc, 1);
284                   src = pco_ref_offset(src, u);
285 
286                   pco_mbyp0(&b, dest, src);
287                }
288 
289                temps = MAX2(temps, temp_dest_base + offset + chans);
290             }
291 
292             offset += pco_ref_get_chans(*psrc);
293          }
294 
295          pco_instr_delete(instr);
296          continue;
297       } else if (instr->op == PCO_OP_COMP) {
298          pco_instr_delete(instr);
299          continue;
300       }
301 
302       pco_foreach_instr_dest_ssa (pdest, instr) {
303          struct vec_override *override =
304             _mesa_hash_table_u64_search(overrides, pdest->val);
305 
306          unsigned val = ra_get_node_reg(ra_graph, pdest->val);
307          unsigned dest_temps = val + pco_ref_get_chans(*pdest);
308          if (override) {
309             val = ra_get_node_reg(ra_graph, override->ref.val);
310             dest_temps = val + pco_ref_get_chans(override->ref);
311             val += override->offset;
312          }
313 
314          pdest->type = PCO_REF_TYPE_REG;
315          pdest->reg_class = PCO_REG_CLASS_TEMP;
316          pdest->val = val;
317          temps = MAX2(temps, dest_temps);
318       }
319 
320       pco_foreach_instr_src_ssa (psrc, instr) {
321          struct vec_override *override =
322             _mesa_hash_table_u64_search(overrides, psrc->val);
323 
324          unsigned val =
325             override
326                ? ra_get_node_reg(ra_graph, override->ref.val) + override->offset
327                : ra_get_node_reg(ra_graph, psrc->val);
328 
329          psrc->type = PCO_REF_TYPE_REG;
330          psrc->reg_class = PCO_REG_CLASS_TEMP;
331          psrc->val = val;
332       }
333    }
334 
335    ralloc_free(ra_regs);
336 
337    func->temps = temps;
338 
339    if (PCO_DEBUG_PRINT(RA)) {
340       printf("RA allocated %u temps, %u vtxins, %u interns.\n",
341              temps,
342              vtxins,
343              interns);
344    }
345 
346    return true;
347 }
348 
349 /**
350  * \brief Register allocation pass.
351  *
352  * \param[in,out] shader PCO shader.
353  * \return True if the pass made progress.
354  */
pco_ra(pco_shader * shader)355 bool pco_ra(pco_shader *shader)
356 {
357    assert(!shader->is_grouped);
358 
359    /* Instruction indices need to be ordered for live ranges. */
360    pco_index(shader, true);
361 
362    unsigned hw_temps = rogue_get_temps(shader->ctx->dev_info);
363    /* TODO:
364     * unsigned opt_temps = rogue_get_optimal_temps(shader->ctx->dev_info);
365     */
366 
367    /* TODO: different number of temps available if preamble/phase change. */
368    /* TODO: different number of temps available if barriers are in use. */
369    /* TODO: support for internal and vtxin registers. */
370    unsigned allocable_temps = hw_temps;
371    unsigned allocable_vtxins = 0;
372    unsigned allocable_interns = 0;
373 
374    /* Perform register allocation for each function. */
375    bool progress = false;
376    pco_foreach_func_in_shader (func, shader) {
377       progress |= pco_ra_func(func,
378                               allocable_temps,
379                               allocable_vtxins,
380                               allocable_interns);
381 
382       shader->data.common.temps = MAX2(shader->data.common.temps, func->temps);
383    }
384 
385    return progress;
386 }
387