1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2011 Tom Stellard <tstellar@gmail.com>
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 */
28
29 #include "radeon_regalloc.h"
30 #include "radeon_list.h"
31
32 #define VERBOSE 0
33
34 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
35
36 const struct rc_class rc_class_list_vp [] = {
37 {RC_REG_CLASS_VP_SINGLE, 4,
38 {RC_MASK_X,
39 RC_MASK_Y,
40 RC_MASK_Z,
41 RC_MASK_W,
42 RC_MASK_NONE,
43 RC_MASK_NONE}},
44 {RC_REG_CLASS_VP_DOUBLE, 6,
45 {RC_MASK_X | RC_MASK_Y,
46 RC_MASK_X | RC_MASK_Z,
47 RC_MASK_X | RC_MASK_W,
48 RC_MASK_Y | RC_MASK_Z,
49 RC_MASK_Y | RC_MASK_W,
50 RC_MASK_Z | RC_MASK_W}},
51 {RC_REG_CLASS_VP_TRIPLE, 4,
52 {RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
53 RC_MASK_X | RC_MASK_Y | RC_MASK_W,
54 RC_MASK_X | RC_MASK_Z | RC_MASK_W,
55 RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
56 RC_MASK_NONE,
57 RC_MASK_NONE}},
58 {RC_REG_CLASS_VP_QUADRUPLE, 1,
59 {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
60 RC_MASK_NONE,
61 RC_MASK_NONE,
62 RC_MASK_NONE,
63 RC_MASK_NONE,
64 RC_MASK_NONE}}
65 };
66
67 const struct rc_class rc_class_list_fp [] = {
68 {RC_REG_CLASS_FP_SINGLE, 3,
69 {RC_MASK_X,
70 RC_MASK_Y,
71 RC_MASK_Z,
72 RC_MASK_NONE,
73 RC_MASK_NONE,
74 RC_MASK_NONE}},
75 {RC_REG_CLASS_FP_DOUBLE, 3,
76 {RC_MASK_X | RC_MASK_Y,
77 RC_MASK_X | RC_MASK_Z,
78 RC_MASK_Y | RC_MASK_Z,
79 RC_MASK_NONE,
80 RC_MASK_NONE,
81 RC_MASK_NONE}},
82 {RC_REG_CLASS_FP_TRIPLE, 1,
83 {RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
84 RC_MASK_NONE,
85 RC_MASK_NONE,
86 RC_MASK_NONE,
87 RC_MASK_NONE,
88 RC_MASK_NONE}},
89 {RC_REG_CLASS_FP_ALPHA, 1,
90 {RC_MASK_W,
91 RC_MASK_NONE,
92 RC_MASK_NONE,
93 RC_MASK_NONE,
94 RC_MASK_NONE,
95 RC_MASK_NONE}},
96 {RC_REG_CLASS_FP_SINGLE_PLUS_ALPHA, 3,
97 {RC_MASK_X | RC_MASK_W,
98 RC_MASK_Y | RC_MASK_W,
99 RC_MASK_Z | RC_MASK_W,
100 RC_MASK_NONE,
101 RC_MASK_NONE,
102 RC_MASK_NONE}},
103 {RC_REG_CLASS_FP_DOUBLE_PLUS_ALPHA, 3,
104 {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
105 RC_MASK_X | RC_MASK_Z | RC_MASK_W,
106 RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
107 RC_MASK_NONE,
108 RC_MASK_NONE,
109 RC_MASK_NONE}},
110 {RC_REG_CLASS_FP_TRIPLE_PLUS_ALPHA, 1,
111 {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
112 RC_MASK_NONE,
113 RC_MASK_NONE,
114 RC_MASK_NONE,
115 RC_MASK_NONE,
116 RC_MASK_NONE}},
117 {RC_REG_CLASS_FP_X, 1,
118 {RC_MASK_X,
119 RC_MASK_NONE,
120 RC_MASK_NONE,
121 RC_MASK_NONE,
122 RC_MASK_NONE,
123 RC_MASK_NONE}},
124 {RC_REG_CLASS_FP_Y, 1,
125 {RC_MASK_Y,
126 RC_MASK_NONE,
127 RC_MASK_NONE,
128 RC_MASK_NONE,
129 RC_MASK_NONE,
130 RC_MASK_NONE}},
131 {RC_REG_CLASS_FP_Z, 1,
132 {RC_MASK_Z,
133 RC_MASK_NONE,
134 RC_MASK_NONE,
135 RC_MASK_NONE,
136 RC_MASK_NONE,
137 RC_MASK_NONE}},
138 {RC_REG_CLASS_FP_XY, 1,
139 {RC_MASK_X | RC_MASK_Y,
140 RC_MASK_NONE,
141 RC_MASK_NONE,
142 RC_MASK_NONE,
143 RC_MASK_NONE,
144 RC_MASK_NONE}},
145 {RC_REG_CLASS_FP_YZ, 1,
146 {RC_MASK_Y | RC_MASK_Z,
147 RC_MASK_NONE,
148 RC_MASK_NONE,
149 RC_MASK_NONE,
150 RC_MASK_NONE,
151 RC_MASK_NONE}},
152 {RC_REG_CLASS_FP_XZ, 1,
153 {RC_MASK_X | RC_MASK_Z,
154 RC_MASK_NONE,
155 RC_MASK_NONE,
156 RC_MASK_NONE,
157 RC_MASK_NONE,
158 RC_MASK_NONE}},
159 {RC_REG_CLASS_FP_XW, 1,
160 {RC_MASK_X | RC_MASK_W,
161 RC_MASK_NONE,
162 RC_MASK_NONE,
163 RC_MASK_NONE,
164 RC_MASK_NONE,
165 RC_MASK_NONE}},
166 {RC_REG_CLASS_FP_YW, 1,
167 {RC_MASK_Y | RC_MASK_W,
168 RC_MASK_NONE,
169 RC_MASK_NONE,
170 RC_MASK_NONE,
171 RC_MASK_NONE,
172 RC_MASK_NONE}},
173 {RC_REG_CLASS_FP_ZW, 1,
174 {RC_MASK_Z | RC_MASK_W,
175 RC_MASK_NONE,
176 RC_MASK_NONE,
177 RC_MASK_NONE,
178 RC_MASK_NONE,
179 RC_MASK_NONE}},
180 {RC_REG_CLASS_FP_XYW, 1,
181 {RC_MASK_X | RC_MASK_Y | RC_MASK_W,
182 RC_MASK_NONE,
183 RC_MASK_NONE,
184 RC_MASK_NONE,
185 RC_MASK_NONE,
186 RC_MASK_NONE}},
187 {RC_REG_CLASS_FP_YZW, 1,
188 {RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
189 RC_MASK_NONE,
190 RC_MASK_NONE,
191 RC_MASK_NONE,
192 RC_MASK_NONE,
193 RC_MASK_NONE}},
194 {RC_REG_CLASS_FP_XZW, 1,
195 {RC_MASK_X | RC_MASK_Z | RC_MASK_W,
196 RC_MASK_NONE,
197 RC_MASK_NONE,
198 RC_MASK_NONE,
199 RC_MASK_NONE,
200 RC_MASK_NONE}}
201 };
202
print_live_intervals(struct live_intervals * src)203 static void print_live_intervals(struct live_intervals * src)
204 {
205 if (!src || !src->Used) {
206 DBG("(null)");
207 return;
208 }
209
210 DBG("(%i,%i)", src->Start, src->End);
211 }
212
overlap_live_intervals(struct live_intervals * a,struct live_intervals * b)213 static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)
214 {
215 if (VERBOSE) {
216 DBG("overlap_live_intervals: ");
217 print_live_intervals(a);
218 DBG(" to ");
219 print_live_intervals(b);
220 DBG("\n");
221 }
222
223 if (!a->Used || !b->Used) {
224 DBG(" unused interval\n");
225 return 0;
226 }
227
228 if (a->Start > b->Start) {
229 if (a->Start < b->End) {
230 DBG(" overlap\n");
231 return 1;
232 }
233 } else if (b->Start > a->Start) {
234 if (b->Start < a->End) {
235 DBG(" overlap\n");
236 return 1;
237 }
238 } else { /* a->Start == b->Start */
239 if (a->Start != a->End && b->Start != b->End) {
240 DBG(" overlap\n");
241 return 1;
242 }
243 }
244
245 DBG(" no overlap\n");
246
247 return 0;
248 }
249
rc_find_class(const struct rc_class * classes,unsigned int writemask,unsigned int max_writemask_count)250 int rc_find_class(
251 const struct rc_class * classes,
252 unsigned int writemask,
253 unsigned int max_writemask_count)
254 {
255 unsigned int i;
256 for (i = 0; i < RC_REG_CLASS_FP_COUNT; i++) {
257 unsigned int j;
258 if (classes[i].WritemaskCount > max_writemask_count) {
259 continue;
260 }
261 for (j = 0; j < classes[i].WritemaskCount; j++) {
262 if (classes[i].Writemasks[j] == writemask) {
263 return i;
264 }
265 }
266 }
267 return -1;
268 }
269
rc_overlap_live_intervals_array(struct live_intervals * a,struct live_intervals * b)270 unsigned int rc_overlap_live_intervals_array(
271 struct live_intervals * a,
272 struct live_intervals * b)
273 {
274 unsigned int a_chan, b_chan;
275 for (a_chan = 0; a_chan < 4; a_chan++) {
276 for (b_chan = 0; b_chan < 4; b_chan++) {
277 if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
278 return 1;
279 }
280 }
281 }
282 return 0;
283 }
284
285 #if VERBOSE
print_reg(int reg)286 static void print_reg(int reg)
287 {
288 unsigned int index = reg_get_index(reg);
289 unsigned int mask = reg_get_writemask(reg);
290 fprintf(stderr, "Temp[%u].%c%c%c%c", index,
291 mask & RC_MASK_X ? 'x' : '_',
292 mask & RC_MASK_Y ? 'y' : '_',
293 mask & RC_MASK_Z ? 'z' : '_',
294 mask & RC_MASK_W ? 'w' : '_');
295 }
296 #endif
297
add_register_conflicts(struct ra_regs * regs,unsigned int max_temp_regs)298 static void add_register_conflicts(
299 struct ra_regs * regs,
300 unsigned int max_temp_regs)
301 {
302 unsigned int index, a_mask, b_mask;
303 for (index = 0; index < max_temp_regs; index++) {
304 for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
305 for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;
306 b_mask++) {
307 if (a_mask & b_mask) {
308 ra_add_reg_conflict(regs,
309 get_reg_id(index, a_mask),
310 get_reg_id(index, b_mask));
311 }
312 }
313 }
314 }
315 }
316
rc_build_interference_graph(struct ra_graph * graph,struct rc_list * variables)317 void rc_build_interference_graph(
318 struct ra_graph * graph,
319 struct rc_list * variables)
320 {
321 unsigned node_index;
322 struct rc_list * var_ptr;
323
324 /* Build the interference graph */
325 for (var_ptr = variables, node_index = 0; var_ptr;
326 var_ptr = var_ptr->Next, node_index++) {
327 struct rc_list * a, * b;
328 unsigned int b_index;
329
330 for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
331 b; b = b->Next, b_index++) {
332 struct rc_variable * var_a = a->Item;
333 while (var_a) {
334 struct rc_variable * var_b = b->Item;
335 while (var_b) {
336 if (rc_overlap_live_intervals_array(var_a->Live, var_b->Live)) {
337 ra_add_node_interference(graph,
338 node_index, b_index);
339 }
340 var_b = var_b->Friend;
341 }
342 var_a = var_a->Friend;
343 }
344 }
345 }
346 }
347
rc_init_regalloc_state(struct rc_regalloc_state * s,enum rc_program_type prog)348 void rc_init_regalloc_state(struct rc_regalloc_state *s, enum rc_program_type prog)
349 {
350 unsigned i, j, index, class_count, max_temps;
351 unsigned **ra_q_values;
352
353 /* Pre-computed q values. This array describes the maximum number of
354 * a class's [row] registers that are in conflict with a single
355 * register from another class [column].
356 *
357 * For example:
358 * q_values[0][2] is 3, because a register from class 2
359 * (RC_REG_CLASS_FP_TRIPLE) may conflict with at most 3 registers from
360 * class 0 (RC_REG_CLASS_FP_SINGLE) e.g. T0.xyz conflicts with T0.x, T0.y,
361 * and T0.z.
362 *
363 * q_values[2][0] is 1, because a register from class 0
364 * (RC_REG_CLASS_FP_SINGLE) may conflict with at most 1 register from
365 * class 2 (RC_REG_CLASS_FP_TRIPLE) e.g. T0.x conflicts with T0.xyz
366 *
367 * The q values for each register class [row] will never be greater
368 * than the maximum number of writemask combinations for that class.
369 *
370 * For example:
371 *
372 * Class 2 (RC_REG_CLASS_FP_TRIPLE) only has 1 writemask combination,
373 * so no value in q_values[2][0..RC_REG_CLASS_FP_COUNT] will be greater
374 * than 1.
375 */
376 const unsigned q_values_fp[RC_REG_CLASS_FP_COUNT][RC_REG_CLASS_FP_COUNT] = {
377 {1, 2, 3, 0, 1, 2, 3, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2},
378 {2, 3, 3, 0, 2, 3, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3},
379 {1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
380 {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1},
381 {1, 2, 3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3},
382 {2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3},
383 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
384 {1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1},
385 {1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0},
386 {1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1},
387 {1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1},
388 {1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1},
389 {1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1},
390 {1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1},
391 {1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1},
392 {1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
393 {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1},
394 {1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
395 {1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
396 };
397
398 const unsigned q_values_vp[RC_REG_CLASS_VP_COUNT][RC_REG_CLASS_VP_COUNT] = {
399 {1, 2, 3, 4},
400 {3, 5, 6, 6},
401 {3, 4, 4, 4},
402 {1, 1, 1, 1}
403 };
404
405 if (prog == RC_FRAGMENT_PROGRAM) {
406 s->class_list = rc_class_list_fp;
407 class_count = RC_REG_CLASS_FP_COUNT;
408 max_temps = R500_PFS_NUM_TEMP_REGS;
409 } else {
410 s->class_list = rc_class_list_vp;
411 class_count = RC_REG_CLASS_VP_COUNT;
412 max_temps = R300_VS_MAX_TEMPS;
413 }
414
415 /* Allocate the main ra data structure */
416 s->regs = ra_alloc_reg_set(NULL, max_temps * RC_MASK_XYZW,
417 true);
418
419 /* Create the register classes */
420 for (i = 0; i < class_count; i++) {
421 const struct rc_class *class = &s->class_list[i];
422 s->classes[class->ID] = ra_alloc_reg_class(s->regs);
423
424 /* Assign registers to the classes */
425 for (index = 0; index < max_temps; index++) {
426 for (j = 0; j < class->WritemaskCount; j++) {
427 int reg_id = get_reg_id(index,
428 class->Writemasks[j]);
429 ra_class_add_reg(s->classes[class->ID], reg_id);
430 }
431 }
432 }
433
434 /* Set the q values. The q_values array is indexed based on
435 * the rc_reg_class ID (RC_REG_CLASS_FP_*) which might be
436 * different than the ID assigned to that class by ra.
437 * This why we need to manually construct this list.
438 */
439 ra_q_values = MALLOC(class_count * sizeof(unsigned *));
440
441 for (i = 0; i < class_count; i++) {
442 ra_q_values[i] = MALLOC(class_count * sizeof(unsigned));
443 for (j = 0; j < class_count; j++) {
444 if (prog == RC_FRAGMENT_PROGRAM)
445 ra_q_values[i][j] = q_values_fp[i][j];
446 else
447 ra_q_values[i][j] = q_values_vp[i][j];
448 }
449 }
450
451 /* Add register conflicts */
452 add_register_conflicts(s->regs, max_temps);
453
454 ra_set_finalize(s->regs, ra_q_values);
455
456 for (i = 0; i < class_count; i++) {
457 FREE(ra_q_values[i]);
458 }
459 FREE(ra_q_values);
460 }
461
rc_destroy_regalloc_state(struct rc_regalloc_state * s)462 void rc_destroy_regalloc_state(struct rc_regalloc_state *s)
463 {
464 ralloc_free(s->regs);
465 }
466