• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009 Nicolai Haehnle.
3  * Copyright 2011 Tom Stellard <tstellar@gmail.com>
4  *
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining
8  * a copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sublicense, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial
17  * portions of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  */
28 
29 #include "radeon_program_pair.h"
30 
31 #include <stdio.h>
32 
33 #include "util/glheader.h"
34 #include "util/register_allocate.h"
35 #include "util/u_memory.h"
36 #include "util/ralloc.h"
37 
38 #include "r300_fragprog_swizzle.h"
39 #include "radeon_compiler.h"
40 #include "radeon_compiler_util.h"
41 #include "radeon_dataflow.h"
42 #include "radeon_list.h"
43 #include "radeon_regalloc.h"
44 #include "radeon_variable.h"
45 
scan_read_callback(void * data,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)46 static void scan_read_callback(void * data, struct rc_instruction * inst,
47 		rc_register_file file, unsigned int index, unsigned int mask)
48 {
49 	struct regalloc_state * s = data;
50 	struct register_info * reg;
51 	unsigned int i;
52 
53 	if (file != RC_FILE_INPUT)
54 		return;
55 
56 	s->Input[index].Used = 1;
57 	reg = &s->Input[index];
58 
59 	for (i = 0; i < 4; i++) {
60 		if (!((mask >> i) & 0x1)) {
61 			continue;
62 		}
63 		reg->Live[i].Used = 1;
64 		reg->Live[i].Start = 0;
65 		reg->Live[i].End =
66 			s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
67 	}
68 }
69 
remap_register(void * data,struct rc_instruction * inst,rc_register_file * file,unsigned int * index)70 static void remap_register(void * data, struct rc_instruction * inst,
71 		rc_register_file * file, unsigned int * index)
72 {
73 	struct regalloc_state * s = data;
74 	const struct register_info * reg;
75 
76 	if (*file == RC_FILE_TEMPORARY && s->Simple)
77 		reg = &s->Temporary[*index];
78 	else if (*file == RC_FILE_INPUT)
79 		reg = &s->Input[*index];
80 	else
81 		return;
82 
83 	if (reg->Allocated) {
84 		*index = reg->Index;
85 	}
86 }
87 
alloc_input_simple(void * data,unsigned int input,unsigned int hwreg)88 static void alloc_input_simple(void * data, unsigned int input,
89 							unsigned int hwreg)
90 {
91 	struct regalloc_state * s = data;
92 
93 	if (input >= s->NumInputs)
94 		return;
95 
96 	s->Input[input].Allocated = 1;
97 	s->Input[input].File = RC_FILE_TEMPORARY;
98 	s->Input[input].Index = hwreg;
99 }
100 
101 /* This functions offsets the temporary register indices by the number
102  * of input registers, because input registers are actually temporaries and
103  * should not occupy the same space.
104  *
105  * This pass is supposed to be used to maintain correct allocation of inputs
106  * if the standard register allocation is disabled. */
do_regalloc_inputs_only(struct regalloc_state * s)107 static void do_regalloc_inputs_only(struct regalloc_state * s)
108 {
109 	for (unsigned i = 0; i < s->NumTemporaries; i++) {
110 		s->Temporary[i].Allocated = 1;
111 		s->Temporary[i].File = RC_FILE_TEMPORARY;
112 		s->Temporary[i].Index = i + s->NumInputs;
113 	}
114 }
115 
is_derivative(rc_opcode op)116 static unsigned int is_derivative(rc_opcode op)
117 {
118 	return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
119 }
120 
121 struct variable_get_class_cb_data {
122 	unsigned int * can_change_writemask;
123 	unsigned int conversion_swizzle;
124 	struct radeon_compiler * c;
125 };
126 
variable_get_class_read_cb(void * userdata,struct rc_instruction * inst,struct rc_pair_instruction_arg * arg,struct rc_pair_instruction_source * src)127 static void variable_get_class_read_cb(
128 	void * userdata,
129 	struct rc_instruction * inst,
130 	struct rc_pair_instruction_arg * arg,
131 	struct rc_pair_instruction_source * src)
132 {
133 	struct variable_get_class_cb_data * d = userdata;
134 	unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle,
135 							d->conversion_swizzle);
136 	/* We can't just call r300_swizzle_is_native basic here, because it ignores the
137 	 * extra requirements for presubtract. However, after pair translation we no longer
138 	 * have the rc_src_register required for the native swizzle, so we have to
139 	 * reconstruct it. */
140 	struct rc_src_register reg = {};
141 	reg.Swizzle = new_swizzle;
142 	reg.File = src->File;
143 
144 	assert(inst->Type == RC_INSTRUCTION_PAIR);
145 	/* The opcode is unimportant, we can't have TEX here. */
146 	if (!d->c->SwizzleCaps->IsNative(RC_OPCODE_MAD, reg)) {
147 		*d->can_change_writemask = 0;
148 	}
149 }
150 
variable_get_class(struct rc_variable * variable,const struct rc_class * classes)151 static unsigned variable_get_class(
152 	struct rc_variable * variable,
153 	const struct rc_class * classes)
154 {
155 	unsigned int i;
156 	unsigned int can_change_writemask= 1;
157 	unsigned int writemask = rc_variable_writemask_sum(variable);
158 	struct rc_list * readers = rc_variable_readers_union(variable);
159 	int class_index;
160 
161 	if (!variable->C->is_r500) {
162 		struct rc_class c;
163 		struct rc_variable * var_ptr;
164 		/* The assumption here is that if an instruction has type
165 		 * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
166 		 * r300 and r400 can't swizzle the result of a TEX lookup. */
167 		for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) {
168 			if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
169 				writemask = RC_MASK_XYZW;
170 			}
171 		}
172 
173 		/* Check if it is possible to do swizzle packing for r300/r400
174 		 * without creating non-native swizzles. */
175 		class_index = rc_find_class(classes, writemask, 3);
176 		if (class_index < 0) {
177 			goto error;
178 		}
179 		c = classes[class_index];
180 		if (c.WritemaskCount == 1) {
181 			goto done;
182 		}
183 		for (i = 0; i < c.WritemaskCount; i++) {
184 			struct rc_variable * var_ptr;
185 			for (var_ptr = variable; var_ptr;
186 						var_ptr = var_ptr->Friend) {
187 				int j;
188 				unsigned int conversion_swizzle =
189 						rc_make_conversion_swizzle(
190 						writemask, c.Writemasks[i]);
191 				struct variable_get_class_cb_data d;
192 				d.can_change_writemask = &can_change_writemask;
193 				d.conversion_swizzle = conversion_swizzle;
194 				d.c = variable->C;
195 				/* If we get this far var_ptr->Inst has to
196 				 * be a pair instruction.  If variable or any
197 				 * of its friends are normal instructions,
198 				 * then the writemask will be set to RC_MASK_XYZW
199 				 * and the function will return before it gets
200 				 * here. */
201 				rc_pair_for_all_reads_arg(var_ptr->Inst,
202 					variable_get_class_read_cb, &d);
203 
204 				for (j = 0; j < var_ptr->ReaderCount; j++) {
205 					unsigned int old_swizzle;
206 					unsigned int new_swizzle;
207 					struct rc_reader r = var_ptr->Readers[j];
208 					if (r.Inst->Type ==
209 							RC_INSTRUCTION_PAIR ) {
210 						old_swizzle = r.U.P.Arg->Swizzle;
211 					} else {
212 						/* Source operands of TEX
213 						 * instructions can't be
214 						 * swizzle on r300/r400 GPUs.
215 						 */
216 						can_change_writemask = 0;
217 						break;
218 					}
219 					new_swizzle = rc_rewrite_swizzle(
220 						old_swizzle, conversion_swizzle);
221 					if (!r300_swizzle_is_native_basic(
222 								new_swizzle)) {
223 						can_change_writemask = 0;
224 						break;
225 					}
226 				}
227 				if (!can_change_writemask) {
228 					break;
229 				}
230 			}
231 			if (!can_change_writemask) {
232 				break;
233 			}
234 		}
235 	}
236 
237 	if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
238 		/* DDX/DDY seem to always fail when their writemasks are
239 		 * changed.*/
240 		if (is_derivative(variable->Inst->U.P.RGB.Opcode)
241 		    || is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
242 			can_change_writemask = 0;
243 		}
244 	}
245 	for ( ; readers; readers = readers->Next) {
246 		struct rc_reader * r = readers->Item;
247 		if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
248 			if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
249 				can_change_writemask = 0;
250 				break;
251 			}
252 			/* DDX/DDY also fail when their swizzles are changed. */
253 			if (is_derivative(r->Inst->U.P.RGB.Opcode)
254 			    || is_derivative(r->Inst->U.P.Alpha.Opcode)) {
255 				can_change_writemask = 0;
256 				break;
257 			}
258 		}
259 	}
260 
261 	class_index = rc_find_class(classes, writemask,
262 						can_change_writemask ? 3 : 1);
263 done:
264 	if (class_index > -1) {
265 		return classes[class_index].ID;
266 	} else {
267 error:
268 		rc_error(variable->C,
269 				"Could not find class for index=%u mask=%u\n",
270 				variable->Dst.Index, writemask);
271 		return 0;
272 	}
273 }
274 
do_advanced_regalloc(struct regalloc_state * s)275 static void do_advanced_regalloc(struct regalloc_state * s)
276 {
277 
278 	unsigned int i, input_node, node_count, node_index;
279 	struct ra_class ** node_classes;
280 	struct rc_instruction * inst;
281 	struct rc_list * var_ptr;
282 	struct rc_list * variables;
283 	struct ra_graph * graph;
284 	const struct rc_regalloc_state *ra_state = s->C->regalloc_state;
285 
286 	/* Get list of program variables */
287 	variables = rc_get_variables(s->C);
288 	node_count = rc_list_count(variables);
289 	node_classes = memory_pool_malloc(&s->C->Pool,
290 			node_count * sizeof(struct ra_class *));
291 
292 	for (var_ptr = variables, node_index = 0; var_ptr;
293 					var_ptr = var_ptr->Next, node_index++) {
294 		unsigned int class_index;
295 		/* Compute the live intervals */
296 		rc_variable_compute_live_intervals(var_ptr->Item);
297 
298 		class_index = variable_get_class(var_ptr->Item, ra_state->class_list);
299 		node_classes[node_index] = ra_state->classes[class_index];
300 	}
301 
302 
303 	/* Calculate live intervals for input registers */
304 	for (inst = s->C->Program.Instructions.Next;
305 					inst != &s->C->Program.Instructions;
306 					inst = inst->Next) {
307 		rc_opcode op = rc_get_flow_control_inst(inst);
308 		if (op == RC_OPCODE_BGNLOOP) {
309 			struct rc_instruction * endloop =
310 							rc_match_bgnloop(inst);
311 			if (endloop->IP > s->LoopEnd) {
312 				s->LoopEnd = endloop->IP;
313 			}
314 		}
315 		rc_for_all_reads_mask(inst, scan_read_callback, s);
316 	}
317 
318 	/* Compute the writemask for inputs. */
319 	for (i = 0; i < s->NumInputs; i++) {
320 		unsigned int chan, writemask = 0;
321 		for (chan = 0; chan < 4; chan++) {
322 			if (s->Input[i].Live[chan].Used) {
323 				writemask |= (1 << chan);
324 			}
325 		}
326 		s->Input[i].Writemask = writemask;
327 	}
328 
329 	graph = ra_alloc_interference_graph(ra_state->regs,
330 						node_count + s->NumInputs);
331 
332 	for (node_index = 0; node_index < node_count; node_index++) {
333 		ra_set_node_class(graph, node_index, node_classes[node_index]);
334 	}
335 
336 	rc_build_interference_graph(graph, variables);
337 
338 	/* Add input registers to the interference graph */
339 	for (i = 0, input_node = 0; i< s->NumInputs; i++) {
340 		if (!s->Input[i].Writemask) {
341 			continue;
342 		}
343 		for (var_ptr = variables, node_index = 0;
344 				var_ptr; var_ptr = var_ptr->Next, node_index++) {
345 			struct rc_variable * var = var_ptr->Item;
346 			if (rc_overlap_live_intervals_array(s->Input[i].Live,
347 								var->Live)) {
348 				ra_add_node_interference(graph, node_index,
349 						node_count + input_node);
350 			}
351 		}
352 		/* Manually allocate a register for this input */
353 		ra_set_node_reg(graph, node_count + input_node, get_reg_id(
354 				s->Input[i].Index, s->Input[i].Writemask));
355 		input_node++;
356 	}
357 
358 	if (!ra_allocate(graph)) {
359 		rc_error(s->C, "Ran out of hardware temporaries\n");
360                 ralloc_free(graph);
361 		return;
362 	}
363 
364 	/* Rewrite the registers */
365 	for (var_ptr = variables, node_index = 0; var_ptr;
366 				var_ptr = var_ptr->Next, node_index++) {
367 		int reg = ra_get_node_reg(graph, node_index);
368 		unsigned int writemask = reg_get_writemask(reg);
369 		unsigned int index = reg_get_index(reg);
370 		struct rc_variable * var = var_ptr->Item;
371 
372 		if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
373 			writemask = rc_variable_writemask_sum(var);
374 		}
375 
376 		if (var->Dst.File == RC_FILE_INPUT) {
377 			continue;
378 		}
379 		rc_variable_change_dst(var, index, writemask);
380 	}
381 
382 	ralloc_free(graph);
383 }
384 
385 /**
386  * @param user This parameter should be a pointer to an integer value.  If this
387  * integer value is zero, then a simple register allocator will be used that
388  * only allocates space for input registers (\sa do_regalloc_inputs_only).  If
389  * user is non-zero, then the regular register allocator will be used
390  * (\sa do_regalloc).
391   */
rc_pair_regalloc(struct radeon_compiler * cc,void * user)392 void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
393 {
394 	struct r300_fragment_program_compiler *c =
395 				(struct r300_fragment_program_compiler*)cc;
396 	struct regalloc_state s;
397 	int * do_full_regalloc = (int*)user;
398 
399 	memset(&s, 0, sizeof(s));
400 	s.C = cc;
401 	s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
402 	s.Input = memory_pool_malloc(&cc->Pool,
403 			s.NumInputs * sizeof(struct register_info));
404 	memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
405 
406 	s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
407 	s.Temporary = memory_pool_malloc(&cc->Pool,
408 			s.NumTemporaries * sizeof(struct register_info));
409 	memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
410 
411 	rc_recompute_ips(s.C);
412 
413 	c->AllocateHwInputs(c, &alloc_input_simple, &s);
414 	if (*do_full_regalloc) {
415 		do_advanced_regalloc(&s);
416 	} else {
417 		s.Simple = 1;
418 		do_regalloc_inputs_only(&s);
419 	}
420 
421 	/* Rewrite inputs and if we are doing the simple allocation, rewrite
422 	 * temporaries too. */
423 	for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
424 					inst != &s.C->Program.Instructions;
425 					inst = inst->Next) {
426 		rc_remap_registers(inst, &remap_register, &s);
427 	}
428 }
429