• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009 Nicolai Haehnle.
3  * Copyright 2011 Tom Stellard <tstellar@gmail.com>
4  *
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining
8  * a copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sublicense, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial
17  * portions of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  */
28 
29 #include "radeon_regalloc.h"
30 #include "radeon_list.h"
31 
32 #define VERBOSE 0
33 
34 #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
35 
36 const struct rc_class rc_class_list_vp [] = {
37 	{RC_REG_CLASS_VP_SINGLE, 4,
38 		{RC_MASK_X,
39 		 RC_MASK_Y,
40 		 RC_MASK_Z,
41 		 RC_MASK_W,
42 		 RC_MASK_NONE,
43 		 RC_MASK_NONE}},
44 	{RC_REG_CLASS_VP_DOUBLE, 6,
45 		{RC_MASK_X | RC_MASK_Y,
46 		 RC_MASK_X | RC_MASK_Z,
47 		 RC_MASK_X | RC_MASK_W,
48 		 RC_MASK_Y | RC_MASK_Z,
49 		 RC_MASK_Y | RC_MASK_W,
50 		 RC_MASK_Z | RC_MASK_W}},
51 	{RC_REG_CLASS_VP_TRIPLE, 4,
52 		{RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
53 		 RC_MASK_X | RC_MASK_Y | RC_MASK_W,
54 		 RC_MASK_X | RC_MASK_Z | RC_MASK_W,
55 		 RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
56 		 RC_MASK_NONE,
57 		 RC_MASK_NONE}},
58 	{RC_REG_CLASS_VP_QUADRUPLE, 1,
59 		{RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
60 		 RC_MASK_NONE,
61 		 RC_MASK_NONE,
62 		 RC_MASK_NONE,
63 		 RC_MASK_NONE,
64 		 RC_MASK_NONE}}
65 };
66 
67 const struct rc_class rc_class_list_fp [] = {
68 	{RC_REG_CLASS_FP_SINGLE, 3,
69 		{RC_MASK_X,
70 		 RC_MASK_Y,
71 		 RC_MASK_Z,
72 		 RC_MASK_NONE,
73 		 RC_MASK_NONE,
74 		 RC_MASK_NONE}},
75 	{RC_REG_CLASS_FP_DOUBLE, 3,
76 		{RC_MASK_X | RC_MASK_Y,
77 		 RC_MASK_X | RC_MASK_Z,
78 		 RC_MASK_Y | RC_MASK_Z,
79 		 RC_MASK_NONE,
80 		 RC_MASK_NONE,
81 		 RC_MASK_NONE}},
82 	{RC_REG_CLASS_FP_TRIPLE, 1,
83 		{RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
84 		 RC_MASK_NONE,
85 		 RC_MASK_NONE,
86 		 RC_MASK_NONE,
87 		 RC_MASK_NONE,
88 		 RC_MASK_NONE}},
89 	{RC_REG_CLASS_FP_ALPHA, 1,
90 		{RC_MASK_W,
91 		 RC_MASK_NONE,
92 		 RC_MASK_NONE,
93 		 RC_MASK_NONE,
94 		 RC_MASK_NONE,
95 		 RC_MASK_NONE}},
96 	{RC_REG_CLASS_FP_SINGLE_PLUS_ALPHA, 3,
97 		{RC_MASK_X | RC_MASK_W,
98 		 RC_MASK_Y | RC_MASK_W,
99 		 RC_MASK_Z | RC_MASK_W,
100 		 RC_MASK_NONE,
101 		 RC_MASK_NONE,
102 		 RC_MASK_NONE}},
103 	{RC_REG_CLASS_FP_DOUBLE_PLUS_ALPHA, 3,
104 		{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
105 		 RC_MASK_X | RC_MASK_Z | RC_MASK_W,
106 		 RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
107 		 RC_MASK_NONE,
108 		 RC_MASK_NONE,
109 		 RC_MASK_NONE}},
110 	{RC_REG_CLASS_FP_TRIPLE_PLUS_ALPHA, 1,
111 		{RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
112 		 RC_MASK_NONE,
113 		 RC_MASK_NONE,
114 		 RC_MASK_NONE,
115 		 RC_MASK_NONE,
116 		 RC_MASK_NONE}},
117 	{RC_REG_CLASS_FP_X, 1,
118 		{RC_MASK_X,
119 		 RC_MASK_NONE,
120 		 RC_MASK_NONE,
121 		 RC_MASK_NONE,
122 		 RC_MASK_NONE,
123 		 RC_MASK_NONE}},
124 	{RC_REG_CLASS_FP_Y, 1,
125 		{RC_MASK_Y,
126 		 RC_MASK_NONE,
127 		 RC_MASK_NONE,
128 		 RC_MASK_NONE,
129 		 RC_MASK_NONE,
130 		 RC_MASK_NONE}},
131 	{RC_REG_CLASS_FP_Z, 1,
132 		{RC_MASK_Z,
133 		 RC_MASK_NONE,
134 		 RC_MASK_NONE,
135 		 RC_MASK_NONE,
136 		 RC_MASK_NONE,
137 		 RC_MASK_NONE}},
138 	{RC_REG_CLASS_FP_XY, 1,
139 		{RC_MASK_X | RC_MASK_Y,
140 		 RC_MASK_NONE,
141 		 RC_MASK_NONE,
142 		 RC_MASK_NONE,
143 		 RC_MASK_NONE,
144 		 RC_MASK_NONE}},
145 	{RC_REG_CLASS_FP_YZ, 1,
146 		{RC_MASK_Y | RC_MASK_Z,
147 		 RC_MASK_NONE,
148 		 RC_MASK_NONE,
149 		 RC_MASK_NONE,
150 		 RC_MASK_NONE,
151 		 RC_MASK_NONE}},
152 	{RC_REG_CLASS_FP_XZ, 1,
153 		{RC_MASK_X | RC_MASK_Z,
154 		 RC_MASK_NONE,
155 		 RC_MASK_NONE,
156 		 RC_MASK_NONE,
157 		 RC_MASK_NONE,
158 		 RC_MASK_NONE}},
159 	{RC_REG_CLASS_FP_XW, 1,
160 		{RC_MASK_X | RC_MASK_W,
161 		 RC_MASK_NONE,
162 		 RC_MASK_NONE,
163 		 RC_MASK_NONE,
164 		 RC_MASK_NONE,
165 		 RC_MASK_NONE}},
166 	{RC_REG_CLASS_FP_YW, 1,
167 		{RC_MASK_Y | RC_MASK_W,
168 		 RC_MASK_NONE,
169 		 RC_MASK_NONE,
170 		 RC_MASK_NONE,
171 		 RC_MASK_NONE,
172 		 RC_MASK_NONE}},
173 	{RC_REG_CLASS_FP_ZW, 1,
174 		{RC_MASK_Z | RC_MASK_W,
175 		 RC_MASK_NONE,
176 		 RC_MASK_NONE,
177 		 RC_MASK_NONE,
178 		 RC_MASK_NONE,
179 		 RC_MASK_NONE}},
180 	{RC_REG_CLASS_FP_XYW, 1,
181 		{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
182 		 RC_MASK_NONE,
183 		 RC_MASK_NONE,
184 		 RC_MASK_NONE,
185 		 RC_MASK_NONE,
186 		 RC_MASK_NONE}},
187 	{RC_REG_CLASS_FP_YZW, 1,
188 		{RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
189 		 RC_MASK_NONE,
190 		 RC_MASK_NONE,
191 		 RC_MASK_NONE,
192 		 RC_MASK_NONE,
193 		 RC_MASK_NONE}},
194 	{RC_REG_CLASS_FP_XZW, 1,
195 		{RC_MASK_X | RC_MASK_Z | RC_MASK_W,
196 		 RC_MASK_NONE,
197 		 RC_MASK_NONE,
198 		 RC_MASK_NONE,
199 		 RC_MASK_NONE,
200 		 RC_MASK_NONE}}
201 };
202 
print_live_intervals(struct live_intervals * src)203 static void print_live_intervals(struct live_intervals * src)
204 {
205 	if (!src || !src->Used) {
206 		DBG("(null)");
207 		return;
208 	}
209 
210 	DBG("(%i,%i)", src->Start, src->End);
211 }
212 
overlap_live_intervals(struct live_intervals * a,struct live_intervals * b)213 static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)
214 {
215 	if (VERBOSE) {
216 		DBG("overlap_live_intervals: ");
217 		print_live_intervals(a);
218 		DBG(" to ");
219 		print_live_intervals(b);
220 		DBG("\n");
221 	}
222 
223 	if (!a->Used || !b->Used) {
224 		DBG("    unused interval\n");
225 		return 0;
226 	}
227 
228 	if (a->Start > b->Start) {
229 		if (a->Start < b->End) {
230 			DBG("    overlap\n");
231 			return 1;
232 		}
233 	} else if (b->Start > a->Start) {
234 		if (b->Start < a->End) {
235 			DBG("    overlap\n");
236 			return 1;
237 		}
238 	} else { /* a->Start == b->Start */
239 		if (a->Start != a->End && b->Start != b->End) {
240 			DBG("    overlap\n");
241 			return 1;
242 		}
243 	}
244 
245 	DBG("    no overlap\n");
246 
247 	return 0;
248 }
249 
rc_find_class(const struct rc_class * classes,unsigned int writemask,unsigned int max_writemask_count)250 int rc_find_class(
251 	const struct rc_class * classes,
252 	unsigned int writemask,
253 	unsigned int max_writemask_count)
254 {
255 	unsigned int i;
256 	for (i = 0; i < RC_REG_CLASS_FP_COUNT; i++) {
257 		unsigned int j;
258 		if (classes[i].WritemaskCount > max_writemask_count) {
259 			continue;
260 		}
261 		for (j = 0; j < classes[i].WritemaskCount; j++) {
262 			if (classes[i].Writemasks[j] == writemask) {
263 				return i;
264 			}
265 		}
266 	}
267 	return -1;
268 }
269 
rc_overlap_live_intervals_array(struct live_intervals * a,struct live_intervals * b)270 unsigned int rc_overlap_live_intervals_array(
271 	struct live_intervals * a,
272 	struct live_intervals * b)
273 {
274 	unsigned int a_chan, b_chan;
275 	for (a_chan = 0; a_chan < 4; a_chan++) {
276 		for (b_chan = 0; b_chan < 4; b_chan++) {
277 			if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
278 					return 1;
279 			}
280 		}
281 	}
282 	return 0;
283 }
284 
285 #if VERBOSE
print_reg(int reg)286 static void print_reg(int reg)
287 {
288 	unsigned int index = reg_get_index(reg);
289 	unsigned int mask = reg_get_writemask(reg);
290 	fprintf(stderr, "Temp[%u].%c%c%c%c", index,
291 		mask & RC_MASK_X ? 'x' : '_',
292 		mask & RC_MASK_Y ? 'y' : '_',
293 		mask & RC_MASK_Z ? 'z' : '_',
294 		mask & RC_MASK_W ? 'w' : '_');
295 }
296 #endif
297 
add_register_conflicts(struct ra_regs * regs,unsigned int max_temp_regs)298 static void add_register_conflicts(
299 	struct ra_regs * regs,
300 	unsigned int max_temp_regs)
301 {
302 	unsigned int index, a_mask, b_mask;
303 	for (index = 0; index < max_temp_regs; index++) {
304 		for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
305 			for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;
306 								b_mask++) {
307 				if (a_mask & b_mask) {
308 					ra_add_reg_conflict(regs,
309 						get_reg_id(index, a_mask),
310 						get_reg_id(index, b_mask));
311 				}
312 			}
313 		}
314 	}
315 }
316 
rc_build_interference_graph(struct ra_graph * graph,struct rc_list * variables)317 void rc_build_interference_graph(
318 	struct ra_graph * graph,
319 	struct rc_list * variables)
320 {
321 	unsigned node_index;
322 	struct rc_list * var_ptr;
323 
324 	/* Build the interference graph */
325 	for (var_ptr = variables, node_index = 0; var_ptr;
326 					var_ptr = var_ptr->Next, node_index++) {
327 		struct rc_list * a, * b;
328 		unsigned int b_index;
329 
330 		for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
331 						b; b = b->Next, b_index++) {
332 			struct rc_variable * var_a = a->Item;
333 			while (var_a) {
334 				struct rc_variable * var_b = b->Item;
335 				while (var_b) {
336 					if (rc_overlap_live_intervals_array(var_a->Live, var_b->Live)) {
337 						ra_add_node_interference(graph,
338 							node_index, b_index);
339 					}
340 					var_b = var_b->Friend;
341 				}
342 				var_a = var_a->Friend;
343 			}
344 		}
345 	}
346 }
347 
rc_init_regalloc_state(struct rc_regalloc_state * s,enum rc_program_type prog)348 void rc_init_regalloc_state(struct rc_regalloc_state *s, enum rc_program_type prog)
349 {
350 	unsigned i, j, index, class_count, max_temps;
351 	unsigned **ra_q_values;
352 
353 	/* Pre-computed q values.  This array describes the maximum number of
354 	 * a class's [row] registers that are in conflict with a single
355 	 * register from another class [column].
356 	 *
357 	 * For example:
358 	 * q_values[0][2] is 3, because a register from class 2
359 	 * (RC_REG_CLASS_FP_TRIPLE) may conflict with at most 3 registers from
360 	 * class 0 (RC_REG_CLASS_FP_SINGLE) e.g. T0.xyz conflicts with T0.x, T0.y,
361 	 * and T0.z.
362 	 *
363 	 * q_values[2][0] is 1, because a register from class 0
364 	 * (RC_REG_CLASS_FP_SINGLE) may conflict with at most 1 register from
365 	 * class 2 (RC_REG_CLASS_FP_TRIPLE) e.g. T0.x conflicts with T0.xyz
366 	 *
367 	 * The q values for each register class [row] will never be greater
368 	 * than the maximum number of writemask combinations for that class.
369 	 *
370 	 * For example:
371 	 *
372 	 * Class 2 (RC_REG_CLASS_FP_TRIPLE) only has 1 writemask combination,
373 	 * so no value in q_values[2][0..RC_REG_CLASS_FP_COUNT] will be greater
374 	 * than 1.
375 	 */
376 	const unsigned q_values_fp[RC_REG_CLASS_FP_COUNT][RC_REG_CLASS_FP_COUNT] = {
377 	{1, 2, 3, 0, 1, 2, 3, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2},
378 	{2, 3, 3, 0, 2, 3, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3},
379 	{1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
380 	{0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1},
381 	{1, 2, 3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3},
382 	{2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3},
383 	{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
384 	{1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1},
385 	{1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0},
386 	{1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1},
387 	{1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1},
388 	{1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1},
389 	{1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1},
390 	{1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1},
391 	{1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1},
392 	{1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
393 	{1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1},
394 	{1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
395 	{1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
396 	};
397 
398 	const unsigned q_values_vp[RC_REG_CLASS_VP_COUNT][RC_REG_CLASS_VP_COUNT] = {
399 	{1, 2, 3, 4},
400 	{3, 5, 6, 6},
401 	{3, 4, 4, 4},
402 	{1, 1, 1, 1}
403 	};
404 
405 	if (prog == RC_FRAGMENT_PROGRAM) {
406 		s->class_list = rc_class_list_fp;
407 		class_count = RC_REG_CLASS_FP_COUNT;
408 		max_temps = R500_PFS_NUM_TEMP_REGS;
409 	} else {
410 		s->class_list = rc_class_list_vp;
411 		class_count = RC_REG_CLASS_VP_COUNT;
412 		max_temps = R300_VS_MAX_TEMPS;
413 	}
414 
415 	/* Allocate the main ra data structure */
416 	s->regs = ra_alloc_reg_set(NULL, max_temps * RC_MASK_XYZW,
417                                    true);
418 
419 	/* Create the register classes */
420 	for (i = 0; i < class_count; i++) {
421 		const struct rc_class *class = &s->class_list[i];
422 		s->classes[class->ID] = ra_alloc_reg_class(s->regs);
423 
424 		/* Assign registers to the classes */
425 		for (index = 0; index < max_temps; index++) {
426 			for (j = 0; j < class->WritemaskCount; j++) {
427 				int reg_id = get_reg_id(index,
428 						class->Writemasks[j]);
429 				ra_class_add_reg(s->classes[class->ID], reg_id);
430 			}
431 		}
432 	}
433 
434 	/* Set the q values.  The q_values array is indexed based on
435 	 * the rc_reg_class ID (RC_REG_CLASS_FP_*) which might be
436 	 * different than the ID assigned to that class by ra.
437 	 * This why we need to manually construct this list.
438 	 */
439 	ra_q_values = MALLOC(class_count * sizeof(unsigned *));
440 
441 	for (i = 0; i < class_count; i++) {
442 		ra_q_values[i] = MALLOC(class_count * sizeof(unsigned));
443 		for (j = 0; j < class_count; j++) {
444 			if (prog == RC_FRAGMENT_PROGRAM)
445 				ra_q_values[i][j] = q_values_fp[i][j];
446 			else
447 				ra_q_values[i][j] = q_values_vp[i][j];
448 		}
449 	}
450 
451 	/* Add register conflicts */
452 	add_register_conflicts(s->regs, max_temps);
453 
454 	ra_set_finalize(s->regs, ra_q_values);
455 
456 	for (i = 0; i < class_count; i++) {
457 		FREE(ra_q_values[i]);
458 	}
459 	FREE(ra_q_values);
460 }
461 
rc_destroy_regalloc_state(struct rc_regalloc_state * s)462 void rc_destroy_regalloc_state(struct rc_regalloc_state *s)
463 {
464 	ralloc_free(s->regs);
465 }
466