1 /*
2 * Copyright 2009 Nicolai Haehnle.
3 * Copyright 2011 Tom Stellard <tstellar@gmail.com>
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "radeon_regalloc.h"
8 #include "radeon_list.h"
9
10 #define VERBOSE 0
11
12 #define DBG(...) \
13 do { \
14 if (VERBOSE) \
15 fprintf(stderr, __VA_ARGS__); \
16 } while (0)
17
18 const struct rc_class rc_class_list_vp[] = {
19 {
20 RC_REG_CLASS_VP_SINGLE,
21 4,
22 {RC_MASK_X,
23 RC_MASK_Y,
24 RC_MASK_Z,
25 RC_MASK_W,
26 RC_MASK_NONE,
27 RC_MASK_NONE},
28 },
29 {
30 RC_REG_CLASS_VP_DOUBLE,
31 6,
32 {RC_MASK_X | RC_MASK_Y,
33 RC_MASK_X | RC_MASK_Z,
34 RC_MASK_X | RC_MASK_W,
35 RC_MASK_Y | RC_MASK_Z,
36 RC_MASK_Y | RC_MASK_W,
37 RC_MASK_Z | RC_MASK_W},
38 },
39 {
40 RC_REG_CLASS_VP_TRIPLE,
41 4,
42 {RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
43 RC_MASK_X | RC_MASK_Y | RC_MASK_W,
44 RC_MASK_X | RC_MASK_Z | RC_MASK_W,
45 RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
46 RC_MASK_NONE,
47 RC_MASK_NONE},
48 },
49 {
50 RC_REG_CLASS_VP_QUADRUPLE,
51 1,
52 {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
53 RC_MASK_NONE,
54 RC_MASK_NONE,
55 RC_MASK_NONE,
56 RC_MASK_NONE,
57 RC_MASK_NONE},
58 }};
59
60 const struct rc_class rc_class_list_fp[] = {
61 {
62 RC_REG_CLASS_FP_SINGLE,
63 3,
64 {RC_MASK_X,
65 RC_MASK_Y,
66 RC_MASK_Z,
67 RC_MASK_NONE,
68 RC_MASK_NONE,
69 RC_MASK_NONE},
70 },
71 {
72 RC_REG_CLASS_FP_DOUBLE,
73 3,
74 {RC_MASK_X | RC_MASK_Y,
75 RC_MASK_X | RC_MASK_Z,
76 RC_MASK_Y | RC_MASK_Z,
77 RC_MASK_NONE,
78 RC_MASK_NONE,
79 RC_MASK_NONE},
80 },
81 {
82 RC_REG_CLASS_FP_TRIPLE,
83 1,
84 {RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
85 RC_MASK_NONE,
86 RC_MASK_NONE,
87 RC_MASK_NONE,
88 RC_MASK_NONE,
89 RC_MASK_NONE},
90 },
91 {
92 RC_REG_CLASS_FP_ALPHA,
93 1,
94 {RC_MASK_W,
95 RC_MASK_NONE,
96 RC_MASK_NONE,
97 RC_MASK_NONE,
98 RC_MASK_NONE,
99 RC_MASK_NONE},
100 },
101 {
102 RC_REG_CLASS_FP_SINGLE_PLUS_ALPHA,
103 3,
104 {RC_MASK_X | RC_MASK_W,
105 RC_MASK_Y | RC_MASK_W,
106 RC_MASK_Z | RC_MASK_W,
107 RC_MASK_NONE,
108 RC_MASK_NONE,
109 RC_MASK_NONE},
110 },
111 {
112 RC_REG_CLASS_FP_DOUBLE_PLUS_ALPHA,
113 3,
114 {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
115 RC_MASK_X | RC_MASK_Z | RC_MASK_W,
116 RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
117 RC_MASK_NONE,
118 RC_MASK_NONE,
119 RC_MASK_NONE},
120 },
121 {
122 RC_REG_CLASS_FP_TRIPLE_PLUS_ALPHA,
123 1,
124 {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
125 RC_MASK_NONE,
126 RC_MASK_NONE,
127 RC_MASK_NONE,
128 RC_MASK_NONE,
129 RC_MASK_NONE},
130 },
131 {
132 RC_REG_CLASS_FP_X,
133 1,
134 {RC_MASK_X,
135 RC_MASK_NONE,
136 RC_MASK_NONE,
137 RC_MASK_NONE,
138 RC_MASK_NONE,
139 RC_MASK_NONE},
140 },
141 {
142 RC_REG_CLASS_FP_Y,
143 1,
144 {RC_MASK_Y,
145 RC_MASK_NONE,
146 RC_MASK_NONE,
147 RC_MASK_NONE,
148 RC_MASK_NONE,
149 RC_MASK_NONE},
150 },
151 {
152 RC_REG_CLASS_FP_Z,
153 1,
154 {RC_MASK_Z,
155 RC_MASK_NONE,
156 RC_MASK_NONE,
157 RC_MASK_NONE,
158 RC_MASK_NONE,
159 RC_MASK_NONE},
160 },
161 {
162 RC_REG_CLASS_FP_XY,
163 1,
164 {RC_MASK_X | RC_MASK_Y,
165 RC_MASK_NONE,
166 RC_MASK_NONE,
167 RC_MASK_NONE,
168 RC_MASK_NONE,
169 RC_MASK_NONE},
170 },
171 {
172 RC_REG_CLASS_FP_YZ,
173 1,
174 {RC_MASK_Y | RC_MASK_Z,
175 RC_MASK_NONE,
176 RC_MASK_NONE,
177 RC_MASK_NONE,
178 RC_MASK_NONE,
179 RC_MASK_NONE},
180 },
181 {
182 RC_REG_CLASS_FP_XZ,
183 1,
184 {RC_MASK_X | RC_MASK_Z,
185 RC_MASK_NONE,
186 RC_MASK_NONE,
187 RC_MASK_NONE,
188 RC_MASK_NONE,
189 RC_MASK_NONE},
190 },
191 {
192 RC_REG_CLASS_FP_XW,
193 1,
194 {RC_MASK_X | RC_MASK_W,
195 RC_MASK_NONE,
196 RC_MASK_NONE,
197 RC_MASK_NONE,
198 RC_MASK_NONE,
199 RC_MASK_NONE},
200 },
201 {
202 RC_REG_CLASS_FP_YW,
203 1,
204 {RC_MASK_Y | RC_MASK_W,
205 RC_MASK_NONE,
206 RC_MASK_NONE,
207 RC_MASK_NONE,
208 RC_MASK_NONE,
209 RC_MASK_NONE},
210 },
211 {
212 RC_REG_CLASS_FP_ZW,
213 1,
214 {RC_MASK_Z | RC_MASK_W,
215 RC_MASK_NONE,
216 RC_MASK_NONE,
217 RC_MASK_NONE,
218 RC_MASK_NONE,
219 RC_MASK_NONE},
220 },
221 {
222 RC_REG_CLASS_FP_XYW,
223 1,
224 {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
225 RC_MASK_NONE,
226 RC_MASK_NONE,
227 RC_MASK_NONE,
228 RC_MASK_NONE,
229 RC_MASK_NONE},
230 },
231 {
232 RC_REG_CLASS_FP_YZW,
233 1,
234 {RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
235 RC_MASK_NONE,
236 RC_MASK_NONE,
237 RC_MASK_NONE,
238 RC_MASK_NONE,
239 RC_MASK_NONE},
240 },
241 {
242 RC_REG_CLASS_FP_XZW,
243 1,
244 {RC_MASK_X | RC_MASK_Z | RC_MASK_W,
245 RC_MASK_NONE,
246 RC_MASK_NONE,
247 RC_MASK_NONE,
248 RC_MASK_NONE,
249 RC_MASK_NONE},
250 }};
251
252 static void
print_live_intervals(struct live_intervals * src)253 print_live_intervals(struct live_intervals *src)
254 {
255 if (!src || !src->Used) {
256 DBG("(null)");
257 return;
258 }
259
260 DBG("(%i,%i)", src->Start, src->End);
261 }
262
263 static int
overlap_live_intervals(struct live_intervals * a,struct live_intervals * b)264 overlap_live_intervals(struct live_intervals *a, struct live_intervals *b)
265 {
266 if (VERBOSE) {
267 DBG("overlap_live_intervals: ");
268 print_live_intervals(a);
269 DBG(" to ");
270 print_live_intervals(b);
271 DBG("\n");
272 }
273
274 if (!a->Used || !b->Used) {
275 DBG(" unused interval\n");
276 return 0;
277 }
278
279 if (a->Start > b->Start) {
280 if (a->Start < b->End) {
281 DBG(" overlap\n");
282 return 1;
283 }
284 } else if (b->Start > a->Start) {
285 if (b->Start < a->End) {
286 DBG(" overlap\n");
287 return 1;
288 }
289 } else { /* a->Start == b->Start */
290 if (a->Start != a->End && b->Start != b->End) {
291 DBG(" overlap\n");
292 return 1;
293 }
294 }
295
296 DBG(" no overlap\n");
297
298 return 0;
299 }
300
301 int
rc_find_class(const struct rc_class * classes,unsigned int writemask,unsigned int max_writemask_count)302 rc_find_class(const struct rc_class *classes, unsigned int writemask,
303 unsigned int max_writemask_count)
304 {
305 unsigned int i;
306 for (i = 0; i < RC_REG_CLASS_FP_COUNT; i++) {
307 unsigned int j;
308 if (classes[i].WritemaskCount > max_writemask_count) {
309 continue;
310 }
311 for (j = 0; j < classes[i].WritemaskCount; j++) {
312 if (classes[i].Writemasks[j] == writemask) {
313 return i;
314 }
315 }
316 }
317 return -1;
318 }
319
320 unsigned int
rc_overlap_live_intervals_array(struct live_intervals * a,struct live_intervals * b)321 rc_overlap_live_intervals_array(struct live_intervals *a, struct live_intervals *b)
322 {
323 unsigned int a_chan, b_chan;
324 for (a_chan = 0; a_chan < 4; a_chan++) {
325 for (b_chan = 0; b_chan < 4; b_chan++) {
326 if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
327 return 1;
328 }
329 }
330 }
331 return 0;
332 }
333
334 #if VERBOSE
335 static void
print_reg(int reg)336 print_reg(int reg)
337 {
338 unsigned int index = reg_get_index(reg);
339 unsigned int mask = reg_get_writemask(reg);
340 fprintf(stderr, "Temp[%u].%c%c%c%c", index, mask & RC_MASK_X ? 'x' : '_',
341 mask & RC_MASK_Y ? 'y' : '_', mask & RC_MASK_Z ? 'z' : '_',
342 mask & RC_MASK_W ? 'w' : '_');
343 }
344 #endif
345
346 static void
add_register_conflicts(struct ra_regs * regs,unsigned int max_temp_regs)347 add_register_conflicts(struct ra_regs *regs, unsigned int max_temp_regs)
348 {
349 unsigned int index, a_mask, b_mask;
350 for (index = 0; index < max_temp_regs; index++) {
351 for (a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
352 for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW; b_mask++) {
353 if (a_mask & b_mask) {
354 ra_add_reg_conflict(regs, get_reg_id(index, a_mask), get_reg_id(index, b_mask));
355 }
356 }
357 }
358 }
359 }
360
361 void
rc_build_interference_graph(struct ra_graph * graph,struct rc_list * variables)362 rc_build_interference_graph(struct ra_graph *graph, struct rc_list *variables)
363 {
364 unsigned node_index;
365 struct rc_list *var_ptr;
366
367 /* Build the interference graph */
368 for (var_ptr = variables, node_index = 0; var_ptr; var_ptr = var_ptr->Next, node_index++) {
369 struct rc_list *a, *b;
370 unsigned int b_index;
371
372 for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1; b; b = b->Next, b_index++) {
373 struct rc_variable *var_a = a->Item;
374 while (var_a) {
375 struct rc_variable *var_b = b->Item;
376 while (var_b) {
377 if (rc_overlap_live_intervals_array(var_a->Live, var_b->Live)) {
378 ra_add_node_interference(graph, node_index, b_index);
379 }
380 var_b = var_b->Friend;
381 }
382 var_a = var_a->Friend;
383 }
384 }
385 }
386 }
387
388 void
rc_init_regalloc_state(struct rc_regalloc_state * s,enum rc_program_type prog)389 rc_init_regalloc_state(struct rc_regalloc_state *s, enum rc_program_type prog)
390 {
391 unsigned i, j, index, class_count, max_temps;
392 unsigned **ra_q_values;
393
394 /* Pre-computed q values. This array describes the maximum number of
395 * a class's [row] registers that are in conflict with a single
396 * register from another class [column].
397 *
398 * For example:
399 * q_values[0][2] is 3, because a register from class 2
400 * (RC_REG_CLASS_FP_TRIPLE) may conflict with at most 3 registers from
401 * class 0 (RC_REG_CLASS_FP_SINGLE) e.g. T0.xyz conflicts with T0.x, T0.y,
402 * and T0.z.
403 *
404 * q_values[2][0] is 1, because a register from class 0
405 * (RC_REG_CLASS_FP_SINGLE) may conflict with at most 1 register from
406 * class 2 (RC_REG_CLASS_FP_TRIPLE) e.g. T0.x conflicts with T0.xyz
407 *
408 * The q values for each register class [row] will never be greater
409 * than the maximum number of writemask combinations for that class.
410 *
411 * For example:
412 *
413 * Class 2 (RC_REG_CLASS_FP_TRIPLE) only has 1 writemask combination,
414 * so no value in q_values[2][0..RC_REG_CLASS_FP_COUNT] will be greater
415 * than 1.
416 */
417 const unsigned q_values_fp[RC_REG_CLASS_FP_COUNT][RC_REG_CLASS_FP_COUNT] = {
418 {1, 2, 3, 0, 1, 2, 3, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2},
419 {2, 3, 3, 0, 2, 3, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3},
420 {1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
421 {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1},
422 {1, 2, 3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3},
423 {2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3},
424 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
425 {1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1},
426 {1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0},
427 {1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1},
428 {1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1},
429 {1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1},
430 {1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1},
431 {1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1},
432 {1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1},
433 {1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
434 {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1},
435 {1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
436 {1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}};
437
438 const unsigned q_values_vp[RC_REG_CLASS_VP_COUNT][RC_REG_CLASS_VP_COUNT] = {{1, 2, 3, 4},
439 {3, 5, 6, 6},
440 {3, 4, 4, 4},
441 {1, 1, 1, 1}};
442
443 if (prog == RC_FRAGMENT_PROGRAM) {
444 s->class_list = rc_class_list_fp;
445 class_count = RC_REG_CLASS_FP_COUNT;
446 max_temps = R500_PFS_NUM_TEMP_REGS;
447 } else {
448 s->class_list = rc_class_list_vp;
449 class_count = RC_REG_CLASS_VP_COUNT;
450 max_temps = R300_VS_MAX_TEMPS;
451 }
452
453 /* Allocate the main ra data structure */
454 s->regs = ra_alloc_reg_set(NULL, max_temps * RC_MASK_XYZW, true);
455
456 /* Create the register classes */
457 for (i = 0; i < class_count; i++) {
458 const struct rc_class *class = &s->class_list[i];
459 s->classes[class->ID] = ra_alloc_reg_class(s->regs);
460
461 /* Assign registers to the classes */
462 for (index = 0; index < max_temps; index++) {
463 for (j = 0; j < class->WritemaskCount; j++) {
464 int reg_id = get_reg_id(index, class->Writemasks[j]);
465 ra_class_add_reg(s->classes[class->ID], reg_id);
466 }
467 }
468 }
469
470 /* Set the q values. The q_values array is indexed based on
471 * the rc_reg_class ID (RC_REG_CLASS_FP_*) which might be
472 * different than the ID assigned to that class by ra.
473 * This why we need to manually construct this list.
474 */
475 ra_q_values = MALLOC(class_count * sizeof(unsigned *));
476
477 for (i = 0; i < class_count; i++) {
478 ra_q_values[i] = MALLOC(class_count * sizeof(unsigned));
479 for (j = 0; j < class_count; j++) {
480 if (prog == RC_FRAGMENT_PROGRAM)
481 ra_q_values[i][j] = q_values_fp[i][j];
482 else
483 ra_q_values[i][j] = q_values_vp[i][j];
484 }
485 }
486
487 /* Add register conflicts */
488 add_register_conflicts(s->regs, max_temps);
489
490 ra_set_finalize(s->regs, ra_q_values);
491
492 for (i = 0; i < class_count; i++) {
493 FREE(ra_q_values[i]);
494 }
495 FREE(ra_q_values);
496 }
497
498 void
rc_destroy_regalloc_state(struct rc_regalloc_state * s)499 rc_destroy_regalloc_state(struct rc_regalloc_state *s)
500 {
501 ralloc_free(s->regs);
502 }
503