• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2009 Nicolai Haehnle.
3  * Copyright 2011 Tom Stellard <tstellar@gmail.com>
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "radeon_regalloc.h"
8 #include "radeon_list.h"
9 
10 #define VERBOSE 0
11 
12 #define DBG(...)                                                                                   \
13    do {                                                                                            \
14       if (VERBOSE)                                                                                 \
15          fprintf(stderr, __VA_ARGS__);                                                             \
16    } while (0)
17 
18 const struct rc_class rc_class_list_vp[] = {
19    {
20       RC_REG_CLASS_VP_SINGLE,
21       4,
22       {RC_MASK_X,
23        RC_MASK_Y,
24        RC_MASK_Z,
25        RC_MASK_W,
26        RC_MASK_NONE,
27        RC_MASK_NONE},
28    },
29    {
30       RC_REG_CLASS_VP_DOUBLE,
31       6,
32       {RC_MASK_X | RC_MASK_Y,
33        RC_MASK_X | RC_MASK_Z,
34        RC_MASK_X | RC_MASK_W,
35        RC_MASK_Y | RC_MASK_Z,
36        RC_MASK_Y | RC_MASK_W,
37        RC_MASK_Z | RC_MASK_W},
38    },
39    {
40       RC_REG_CLASS_VP_TRIPLE,
41       4,
42       {RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
43        RC_MASK_X | RC_MASK_Y | RC_MASK_W,
44        RC_MASK_X | RC_MASK_Z | RC_MASK_W,
45        RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
46        RC_MASK_NONE,
47        RC_MASK_NONE},
48    },
49    {
50       RC_REG_CLASS_VP_QUADRUPLE,
51       1,
52       {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
53        RC_MASK_NONE,
54        RC_MASK_NONE,
55        RC_MASK_NONE,
56        RC_MASK_NONE,
57        RC_MASK_NONE},
58    }};
59 
60 const struct rc_class rc_class_list_fp[] = {
61    {
62       RC_REG_CLASS_FP_SINGLE,
63       3,
64       {RC_MASK_X,
65        RC_MASK_Y,
66        RC_MASK_Z,
67        RC_MASK_NONE,
68        RC_MASK_NONE,
69        RC_MASK_NONE},
70    },
71    {
72       RC_REG_CLASS_FP_DOUBLE,
73       3,
74       {RC_MASK_X | RC_MASK_Y,
75        RC_MASK_X | RC_MASK_Z,
76        RC_MASK_Y | RC_MASK_Z,
77        RC_MASK_NONE,
78        RC_MASK_NONE,
79        RC_MASK_NONE},
80    },
81    {
82       RC_REG_CLASS_FP_TRIPLE,
83       1,
84       {RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
85        RC_MASK_NONE,
86        RC_MASK_NONE,
87        RC_MASK_NONE,
88        RC_MASK_NONE,
89        RC_MASK_NONE},
90    },
91    {
92       RC_REG_CLASS_FP_ALPHA,
93       1,
94       {RC_MASK_W,
95        RC_MASK_NONE,
96        RC_MASK_NONE,
97        RC_MASK_NONE,
98        RC_MASK_NONE,
99        RC_MASK_NONE},
100    },
101    {
102       RC_REG_CLASS_FP_SINGLE_PLUS_ALPHA,
103       3,
104       {RC_MASK_X | RC_MASK_W,
105        RC_MASK_Y | RC_MASK_W,
106        RC_MASK_Z | RC_MASK_W,
107        RC_MASK_NONE,
108        RC_MASK_NONE,
109        RC_MASK_NONE},
110    },
111    {
112       RC_REG_CLASS_FP_DOUBLE_PLUS_ALPHA,
113       3,
114       {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
115        RC_MASK_X | RC_MASK_Z | RC_MASK_W,
116        RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
117        RC_MASK_NONE,
118        RC_MASK_NONE,
119        RC_MASK_NONE},
120    },
121    {
122       RC_REG_CLASS_FP_TRIPLE_PLUS_ALPHA,
123       1,
124       {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
125        RC_MASK_NONE,
126        RC_MASK_NONE,
127        RC_MASK_NONE,
128        RC_MASK_NONE,
129        RC_MASK_NONE},
130    },
131    {
132       RC_REG_CLASS_FP_X,
133       1,
134       {RC_MASK_X,
135        RC_MASK_NONE,
136        RC_MASK_NONE,
137        RC_MASK_NONE,
138        RC_MASK_NONE,
139        RC_MASK_NONE},
140    },
141    {
142       RC_REG_CLASS_FP_Y,
143       1,
144       {RC_MASK_Y,
145        RC_MASK_NONE,
146        RC_MASK_NONE,
147        RC_MASK_NONE,
148        RC_MASK_NONE,
149        RC_MASK_NONE},
150    },
151    {
152       RC_REG_CLASS_FP_Z,
153       1,
154       {RC_MASK_Z,
155        RC_MASK_NONE,
156        RC_MASK_NONE,
157        RC_MASK_NONE,
158        RC_MASK_NONE,
159        RC_MASK_NONE},
160    },
161    {
162       RC_REG_CLASS_FP_XY,
163       1,
164       {RC_MASK_X | RC_MASK_Y,
165        RC_MASK_NONE,
166        RC_MASK_NONE,
167        RC_MASK_NONE,
168        RC_MASK_NONE,
169        RC_MASK_NONE},
170    },
171    {
172       RC_REG_CLASS_FP_YZ,
173       1,
174       {RC_MASK_Y | RC_MASK_Z,
175        RC_MASK_NONE,
176        RC_MASK_NONE,
177        RC_MASK_NONE,
178        RC_MASK_NONE,
179        RC_MASK_NONE},
180    },
181    {
182       RC_REG_CLASS_FP_XZ,
183       1,
184       {RC_MASK_X | RC_MASK_Z,
185        RC_MASK_NONE,
186        RC_MASK_NONE,
187        RC_MASK_NONE,
188        RC_MASK_NONE,
189        RC_MASK_NONE},
190    },
191    {
192       RC_REG_CLASS_FP_XW,
193       1,
194       {RC_MASK_X | RC_MASK_W,
195        RC_MASK_NONE,
196        RC_MASK_NONE,
197        RC_MASK_NONE,
198        RC_MASK_NONE,
199        RC_MASK_NONE},
200    },
201    {
202       RC_REG_CLASS_FP_YW,
203       1,
204       {RC_MASK_Y | RC_MASK_W,
205        RC_MASK_NONE,
206        RC_MASK_NONE,
207        RC_MASK_NONE,
208        RC_MASK_NONE,
209        RC_MASK_NONE},
210    },
211    {
212       RC_REG_CLASS_FP_ZW,
213       1,
214       {RC_MASK_Z | RC_MASK_W,
215        RC_MASK_NONE,
216        RC_MASK_NONE,
217        RC_MASK_NONE,
218        RC_MASK_NONE,
219        RC_MASK_NONE},
220    },
221    {
222       RC_REG_CLASS_FP_XYW,
223       1,
224       {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
225        RC_MASK_NONE,
226        RC_MASK_NONE,
227        RC_MASK_NONE,
228        RC_MASK_NONE,
229        RC_MASK_NONE},
230    },
231    {
232       RC_REG_CLASS_FP_YZW,
233       1,
234       {RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
235        RC_MASK_NONE,
236        RC_MASK_NONE,
237        RC_MASK_NONE,
238        RC_MASK_NONE,
239        RC_MASK_NONE},
240    },
241    {
242       RC_REG_CLASS_FP_XZW,
243       1,
244       {RC_MASK_X | RC_MASK_Z | RC_MASK_W,
245        RC_MASK_NONE,
246        RC_MASK_NONE,
247        RC_MASK_NONE,
248        RC_MASK_NONE,
249        RC_MASK_NONE},
250    }};
251 
252 static void
print_live_intervals(struct live_intervals * src)253 print_live_intervals(struct live_intervals *src)
254 {
255    if (!src || !src->Used) {
256       DBG("(null)");
257       return;
258    }
259 
260    DBG("(%i,%i)", src->Start, src->End);
261 }
262 
263 static int
overlap_live_intervals(struct live_intervals * a,struct live_intervals * b)264 overlap_live_intervals(struct live_intervals *a, struct live_intervals *b)
265 {
266    if (VERBOSE) {
267       DBG("overlap_live_intervals: ");
268       print_live_intervals(a);
269       DBG(" to ");
270       print_live_intervals(b);
271       DBG("\n");
272    }
273 
274    if (!a->Used || !b->Used) {
275       DBG("    unused interval\n");
276       return 0;
277    }
278 
279    if (a->Start > b->Start) {
280       if (a->Start < b->End) {
281          DBG("    overlap\n");
282          return 1;
283       }
284    } else if (b->Start > a->Start) {
285       if (b->Start < a->End) {
286          DBG("    overlap\n");
287          return 1;
288       }
289    } else { /* a->Start == b->Start */
290       if (a->Start != a->End && b->Start != b->End) {
291          DBG("    overlap\n");
292          return 1;
293       }
294    }
295 
296    DBG("    no overlap\n");
297 
298    return 0;
299 }
300 
301 int
rc_find_class(const struct rc_class * classes,unsigned int writemask,unsigned int max_writemask_count)302 rc_find_class(const struct rc_class *classes, unsigned int writemask,
303               unsigned int max_writemask_count)
304 {
305    unsigned int i;
306    for (i = 0; i < RC_REG_CLASS_FP_COUNT; i++) {
307       unsigned int j;
308       if (classes[i].WritemaskCount > max_writemask_count) {
309          continue;
310       }
311       for (j = 0; j < classes[i].WritemaskCount; j++) {
312          if (classes[i].Writemasks[j] == writemask) {
313             return i;
314          }
315       }
316    }
317    return -1;
318 }
319 
320 unsigned int
rc_overlap_live_intervals_array(struct live_intervals * a,struct live_intervals * b)321 rc_overlap_live_intervals_array(struct live_intervals *a, struct live_intervals *b)
322 {
323    unsigned int a_chan, b_chan;
324    for (a_chan = 0; a_chan < 4; a_chan++) {
325       for (b_chan = 0; b_chan < 4; b_chan++) {
326          if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
327             return 1;
328          }
329       }
330    }
331    return 0;
332 }
333 
334 #if VERBOSE
335 static void
print_reg(int reg)336 print_reg(int reg)
337 {
338    unsigned int index = reg_get_index(reg);
339    unsigned int mask = reg_get_writemask(reg);
340    fprintf(stderr, "Temp[%u].%c%c%c%c", index, mask & RC_MASK_X ? 'x' : '_',
341            mask & RC_MASK_Y ? 'y' : '_', mask & RC_MASK_Z ? 'z' : '_',
342            mask & RC_MASK_W ? 'w' : '_');
343 }
344 #endif
345 
346 static void
add_register_conflicts(struct ra_regs * regs,unsigned int max_temp_regs)347 add_register_conflicts(struct ra_regs *regs, unsigned int max_temp_regs)
348 {
349    unsigned int index, a_mask, b_mask;
350    for (index = 0; index < max_temp_regs; index++) {
351       for (a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
352          for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW; b_mask++) {
353             if (a_mask & b_mask) {
354                ra_add_reg_conflict(regs, get_reg_id(index, a_mask), get_reg_id(index, b_mask));
355             }
356          }
357       }
358    }
359 }
360 
361 void
rc_build_interference_graph(struct ra_graph * graph,struct rc_list * variables)362 rc_build_interference_graph(struct ra_graph *graph, struct rc_list *variables)
363 {
364    unsigned node_index;
365    struct rc_list *var_ptr;
366 
367    /* Build the interference graph */
368    for (var_ptr = variables, node_index = 0; var_ptr; var_ptr = var_ptr->Next, node_index++) {
369       struct rc_list *a, *b;
370       unsigned int b_index;
371 
372       for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1; b; b = b->Next, b_index++) {
373          struct rc_variable *var_a = a->Item;
374          while (var_a) {
375             struct rc_variable *var_b = b->Item;
376             while (var_b) {
377                if (rc_overlap_live_intervals_array(var_a->Live, var_b->Live)) {
378                   ra_add_node_interference(graph, node_index, b_index);
379                }
380                var_b = var_b->Friend;
381             }
382             var_a = var_a->Friend;
383          }
384       }
385    }
386 }
387 
388 void
rc_init_regalloc_state(struct rc_regalloc_state * s,enum rc_program_type prog)389 rc_init_regalloc_state(struct rc_regalloc_state *s, enum rc_program_type prog)
390 {
391    unsigned i, j, index, class_count, max_temps;
392    unsigned **ra_q_values;
393 
394    /* Pre-computed q values.  This array describes the maximum number of
395     * a class's [row] registers that are in conflict with a single
396     * register from another class [column].
397     *
398     * For example:
399     * q_values[0][2] is 3, because a register from class 2
400     * (RC_REG_CLASS_FP_TRIPLE) may conflict with at most 3 registers from
401     * class 0 (RC_REG_CLASS_FP_SINGLE) e.g. T0.xyz conflicts with T0.x, T0.y,
402     * and T0.z.
403     *
404     * q_values[2][0] is 1, because a register from class 0
405     * (RC_REG_CLASS_FP_SINGLE) may conflict with at most 1 register from
406     * class 2 (RC_REG_CLASS_FP_TRIPLE) e.g. T0.x conflicts with T0.xyz
407     *
408     * The q values for each register class [row] will never be greater
409     * than the maximum number of writemask combinations for that class.
410     *
411     * For example:
412     *
413     * Class 2 (RC_REG_CLASS_FP_TRIPLE) only has 1 writemask combination,
414     * so no value in q_values[2][0..RC_REG_CLASS_FP_COUNT] will be greater
415     * than 1.
416     */
417    const unsigned q_values_fp[RC_REG_CLASS_FP_COUNT][RC_REG_CLASS_FP_COUNT] = {
418       {1, 2, 3, 0, 1, 2, 3, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2},
419       {2, 3, 3, 0, 2, 3, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3},
420       {1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
421       {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1},
422       {1, 2, 3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3},
423       {2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3},
424       {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
425       {1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1},
426       {1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0},
427       {1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1},
428       {1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1},
429       {1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1},
430       {1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1},
431       {1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1},
432       {1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1},
433       {1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
434       {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1},
435       {1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
436       {1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}};
437 
438    const unsigned q_values_vp[RC_REG_CLASS_VP_COUNT][RC_REG_CLASS_VP_COUNT] = {{1, 2, 3, 4},
439                                                                                {3, 5, 6, 6},
440                                                                                {3, 4, 4, 4},
441                                                                                {1, 1, 1, 1}};
442 
443    if (prog == RC_FRAGMENT_PROGRAM) {
444       s->class_list = rc_class_list_fp;
445       class_count = RC_REG_CLASS_FP_COUNT;
446       max_temps = R500_PFS_NUM_TEMP_REGS;
447    } else {
448       s->class_list = rc_class_list_vp;
449       class_count = RC_REG_CLASS_VP_COUNT;
450       max_temps = R300_VS_MAX_TEMPS;
451    }
452 
453    /* Allocate the main ra data structure */
454    s->regs = ra_alloc_reg_set(NULL, max_temps * RC_MASK_XYZW, true);
455 
456    /* Create the register classes */
457    for (i = 0; i < class_count; i++) {
458       const struct rc_class *class = &s->class_list[i];
459       s->classes[class->ID] = ra_alloc_reg_class(s->regs);
460 
461       /* Assign registers to the classes */
462       for (index = 0; index < max_temps; index++) {
463          for (j = 0; j < class->WritemaskCount; j++) {
464             int reg_id = get_reg_id(index, class->Writemasks[j]);
465             ra_class_add_reg(s->classes[class->ID], reg_id);
466          }
467       }
468    }
469 
470    /* Set the q values.  The q_values array is indexed based on
471     * the rc_reg_class ID (RC_REG_CLASS_FP_*) which might be
472     * different than the ID assigned to that class by ra.
473     * This why we need to manually construct this list.
474     */
475    ra_q_values = MALLOC(class_count * sizeof(unsigned *));
476 
477    for (i = 0; i < class_count; i++) {
478       ra_q_values[i] = MALLOC(class_count * sizeof(unsigned));
479       for (j = 0; j < class_count; j++) {
480          if (prog == RC_FRAGMENT_PROGRAM)
481             ra_q_values[i][j] = q_values_fp[i][j];
482          else
483             ra_q_values[i][j] = q_values_vp[i][j];
484       }
485    }
486 
487    /* Add register conflicts */
488    add_register_conflicts(s->regs, max_temps);
489 
490    ra_set_finalize(s->regs, ra_q_values);
491 
492    for (i = 0; i < class_count; i++) {
493       FREE(ra_q_values[i]);
494    }
495    FREE(ra_q_values);
496 }
497 
498 void
rc_destroy_regalloc_state(struct rc_regalloc_state * s)499 rc_destroy_regalloc_state(struct rc_regalloc_state *s)
500 {
501    ralloc_free(s->regs);
502 }
503