• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
22 
23 #include "radeon_compiler.h"
24 
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 
30 #include "util/u_debug.h"
31 #include "pipe/p_state.h"
32 #include "radeon_dataflow.h"
33 #include "radeon_program.h"
34 #include "radeon_program_pair.h"
35 #include "radeon_regalloc.h"
36 #include "radeon_compiler_util.h"
37 
38 
rc_init(struct radeon_compiler * c,const struct rc_regalloc_state * rs)39 void rc_init(struct radeon_compiler * c, const struct rc_regalloc_state *rs)
40 {
41 	memset(c, 0, sizeof(*c));
42 
43 	memory_pool_init(&c->Pool);
44 	c->Program.Instructions.Prev = &c->Program.Instructions;
45 	c->Program.Instructions.Next = &c->Program.Instructions;
46 	c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
47 	c->regalloc_state = rs;
48 }
49 
rc_destroy(struct radeon_compiler * c)50 void rc_destroy(struct radeon_compiler * c)
51 {
52 	rc_constants_destroy(&c->Program.Constants);
53 	memory_pool_destroy(&c->Pool);
54 	free(c->ErrorMsg);
55 }
56 
rc_debug(struct radeon_compiler * c,const char * fmt,...)57 void rc_debug(struct radeon_compiler * c, const char * fmt, ...)
58 {
59 	va_list ap;
60 
61 	if (!(c->Debug & RC_DBG_LOG))
62 		return;
63 
64 	va_start(ap, fmt);
65 	vfprintf(stderr, fmt, ap);
66 	va_end(ap);
67 }
68 
rc_error(struct radeon_compiler * c,const char * fmt,...)69 void rc_error(struct radeon_compiler * c, const char * fmt, ...)
70 {
71 	va_list ap;
72 
73 	c->Error = 1;
74 
75 	if (!c->ErrorMsg) {
76 		/* Only remember the first error */
77 		char buf[1024];
78 		int written;
79 
80 		va_start(ap, fmt);
81 		written = vsnprintf(buf, sizeof(buf), fmt, ap);
82 		va_end(ap);
83 
84 		if (written < sizeof(buf)) {
85 			c->ErrorMsg = strdup(buf);
86 		} else {
87 			c->ErrorMsg = malloc(written + 1);
88 
89 			va_start(ap, fmt);
90 			vsnprintf(c->ErrorMsg, written + 1, fmt, ap);
91 			va_end(ap);
92 		}
93 	}
94 
95 	if (c->Debug & RC_DBG_LOG) {
96 		fprintf(stderr, "r300compiler error: ");
97 
98 		va_start(ap, fmt);
99 		vfprintf(stderr, fmt, ap);
100 		va_end(ap);
101 	}
102 }
103 
rc_if_fail_helper(struct radeon_compiler * c,const char * file,int line,const char * assertion)104 int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion)
105 {
106 	rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
107 	return 1;
108 }
109 
110 /**
111  * Recompute c->Program.InputsRead and c->Program.OutputsWritten
112  * based on which inputs and outputs are actually referenced
113  * in program instructions.
114  */
rc_calculate_inputs_outputs(struct radeon_compiler * c)115 void rc_calculate_inputs_outputs(struct radeon_compiler * c)
116 {
117 	struct rc_instruction *inst;
118 
119 	c->Program.InputsRead = 0;
120 	c->Program.OutputsWritten = 0;
121 
122 	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
123 	{
124 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
125 		int i;
126 
127 		for (i = 0; i < opcode->NumSrcRegs; ++i) {
128 			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT)
129 				c->Program.InputsRead |= 1U << inst->U.I.SrcReg[i].Index;
130 		}
131 
132 		if (opcode->HasDstReg) {
133 			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
134 				c->Program.OutputsWritten |= 1U << inst->U.I.DstReg.Index;
135 		}
136 	}
137 }
138 
139 /**
140  * Rewrite the program such that a given output is duplicated.
141  */
rc_copy_output(struct radeon_compiler * c,unsigned output,unsigned dup_output)142 void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output)
143 {
144 	unsigned tempreg = rc_find_free_temporary(c);
145 	struct rc_instruction * inst;
146 	struct rc_instruction * insert_pos = c->Program.Instructions.Prev;
147 	struct rc_instruction * last_write_inst = NULL;
148 	unsigned branch_depth = 0;
149 	unsigned loop_depth = 0;
150 	bool emit_after_control_flow = false;
151 	unsigned num_writes = 0;
152 
153 	for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
154 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
155 
156 		if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP)
157 			loop_depth++;
158 		if (inst->U.I.Opcode == RC_OPCODE_IF)
159 			branch_depth++;
160 		if ((inst->U.I.Opcode == RC_OPCODE_ENDLOOP && loop_depth--) ||
161 		    (inst->U.I.Opcode == RC_OPCODE_ENDIF && branch_depth--))
162 			if (emit_after_control_flow && loop_depth == 0 && branch_depth == 0) {
163 				insert_pos = inst;
164 				emit_after_control_flow = false;
165 			}
166 
167 		if (opcode->HasDstReg) {
168 			if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
169 				num_writes++;
170 				inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
171 				inst->U.I.DstReg.Index = tempreg;
172 				insert_pos = inst;
173 				last_write_inst = inst;
174 				if (loop_depth != 0 && branch_depth != 0)
175 					emit_after_control_flow = true;
176 			}
177 		}
178 	}
179 
180 	/* If there is only a single write, just duplicate the whole instruction instead.
181 	 * We can do this even when the single write was is a control flow.
182 	 */
183 	if (num_writes == 1) {
184 		last_write_inst->U.I.DstReg.File = RC_FILE_OUTPUT;
185 		last_write_inst->U.I.DstReg.Index = output;
186 
187 		inst = rc_insert_new_instruction(c, last_write_inst);
188 		struct rc_instruction * prev = inst->Prev;
189 		struct rc_instruction * next = inst->Next;
190 		memcpy(inst, last_write_inst, sizeof(struct rc_instruction));
191 		inst->Prev = prev;
192 		inst->Next = next;
193 		inst->U.I.DstReg.Index = dup_output;
194 	} else {
195 		inst = rc_insert_new_instruction(c, insert_pos);
196 		inst->U.I.Opcode = RC_OPCODE_MOV;
197 		inst->U.I.DstReg.File = RC_FILE_OUTPUT;
198 		inst->U.I.DstReg.Index = output;
199 
200 		inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
201 		inst->U.I.SrcReg[0].Index = tempreg;
202 		inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
203 
204 		inst = rc_insert_new_instruction(c, inst);
205 		inst->U.I.Opcode = RC_OPCODE_MOV;
206 		inst->U.I.DstReg.File = RC_FILE_OUTPUT;
207 		inst->U.I.DstReg.Index = dup_output;
208 
209 		inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
210 		inst->U.I.SrcReg[0].Index = tempreg;
211 		inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
212 	}
213 
214 	c->Program.OutputsWritten |= 1U << dup_output;
215 }
216 
217 
218 /**
219  * Introduce standard code fragment to deal with fragment.position.
220  */
rc_transform_fragment_wpos(struct radeon_compiler * c,unsigned wpos,unsigned new_input,int full_vtransform)221 void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
222                                 int full_vtransform)
223 {
224 	unsigned tempregi = rc_find_free_temporary(c);
225 	struct rc_instruction * inst_rcp;
226 	struct rc_instruction * inst_mul;
227 	struct rc_instruction * inst_mad;
228 	struct rc_instruction * inst;
229 
230 	c->Program.InputsRead &= ~(1U << wpos);
231 	c->Program.InputsRead |= 1U << new_input;
232 
233 	/* perspective divide */
234 	inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
235 	inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
236 
237 	inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
238 	inst_rcp->U.I.DstReg.Index = tempregi;
239 	inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
240 
241 	inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;
242 	inst_rcp->U.I.SrcReg[0].Index = new_input;
243 	inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
244 
245 	inst_mul = rc_insert_new_instruction(c, inst_rcp);
246 	inst_mul->U.I.Opcode = RC_OPCODE_MUL;
247 
248 	inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
249 	inst_mul->U.I.DstReg.Index = tempregi;
250 	inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
251 
252 	inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;
253 	inst_mul->U.I.SrcReg[0].Index = new_input;
254 
255 	inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
256 	inst_mul->U.I.SrcReg[1].Index = tempregi;
257 	inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
258 
259 	/* viewport transformation */
260 	inst_mad = rc_insert_new_instruction(c, inst_mul);
261 	inst_mad->U.I.Opcode = RC_OPCODE_MAD;
262 
263 	inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
264 	inst_mad->U.I.DstReg.Index = tempregi;
265 	inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
266 
267 	inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
268 	inst_mad->U.I.SrcReg[0].Index = tempregi;
269 	inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
270 
271 	inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
272 	inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
273 
274 	inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
275 	inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0;
276 
277 	if (full_vtransform) {
278 		inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0);
279 		inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0);
280 	} else {
281 		inst_mad->U.I.SrcReg[1].Index =
282 		inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
283 	}
284 
285 	for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
286 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
287 		unsigned i;
288 
289 		for(i = 0; i < opcode->NumSrcRegs; i++) {
290 			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
291 			    inst->U.I.SrcReg[i].Index == wpos) {
292 				inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
293 				inst->U.I.SrcReg[i].Index = tempregi;
294 			}
295 		}
296 	}
297 }
298 
299 
300 /**
301  * The FACE input in hardware contains 1 if it's a back face, 0 otherwise.
302  * Gallium and OpenGL define it the other way around.
303  *
304  * So let's just negate FACE at the beginning of the shader and rewrite the rest
305  * of the shader to read from the newly allocated temporary.
306  */
rc_transform_fragment_face(struct radeon_compiler * c,unsigned face)307 void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face)
308 {
309 	unsigned tempregi = rc_find_free_temporary(c);
310 	struct rc_instruction *inst_add;
311 	struct rc_instruction *inst;
312 
313 	/* perspective divide */
314 	inst_add = rc_insert_new_instruction(c, &c->Program.Instructions);
315 	inst_add->U.I.Opcode = RC_OPCODE_ADD;
316 
317 	inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
318 	inst_add->U.I.DstReg.Index = tempregi;
319 	inst_add->U.I.DstReg.WriteMask = RC_MASK_X;
320 
321 	inst_add->U.I.SrcReg[0].File = RC_FILE_NONE;
322 	inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
323 
324 	inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT;
325 	inst_add->U.I.SrcReg[1].Index = face;
326 	inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX;
327 	inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
328 
329 	for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) {
330 		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
331 		unsigned i;
332 
333 		for(i = 0; i < opcode->NumSrcRegs; i++) {
334 			if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
335 			    inst->U.I.SrcReg[i].Index == face) {
336 				inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
337 				inst->U.I.SrcReg[i].Index = tempregi;
338 			}
339 		}
340 	}
341 }
342 
reg_count_callback(void * userdata,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)343 static void reg_count_callback(void * userdata, struct rc_instruction * inst,
344 		rc_register_file file, unsigned int index, unsigned int mask)
345 {
346 	struct rc_program_stats *s = userdata;
347 	if (file == RC_FILE_TEMPORARY)
348 		(int)index > s->num_temp_regs ? s->num_temp_regs = index : 0;
349 	if (file == RC_FILE_INLINE)
350 		s->num_inline_literals++;
351 	if (file == RC_FILE_CONSTANT)
352 		s->num_consts = MAX2(s->num_consts, index + 1);
353 }
354 
rc_get_stats(struct radeon_compiler * c,struct rc_program_stats * s)355 void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s)
356 {
357 	struct rc_instruction * tmp;
358 	memset(s, 0, sizeof(*s));
359 
360 	for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions;
361 							tmp = tmp->Next){
362 		const struct rc_opcode_info * info;
363 		rc_for_all_reads_mask(tmp, reg_count_callback, s);
364 		if (tmp->Type == RC_INSTRUCTION_NORMAL) {
365 			info = rc_get_opcode_info(tmp->U.I.Opcode);
366 			if (info->Opcode == RC_OPCODE_BEGIN_TEX)
367 				continue;
368 			if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE)
369 				s->num_presub_ops++;
370 		} else {
371 			if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used)
372 				s->num_presub_ops++;
373 			if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used)
374 				s->num_presub_ops++;
375 			/* Assuming alpha will never be a flow control or
376 			 * a tex instruction. */
377 			if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP)
378 				s->num_alpha_insts++;
379 			if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP)
380 				s->num_rgb_insts++;
381 			if (tmp->U.P.RGB.Omod != RC_OMOD_MUL_1 &&
382 				tmp->U.P.RGB.Omod != RC_OMOD_DISABLE) {
383 				s->num_omod_ops++;
384 			}
385 			if (tmp->U.P.Alpha.Omod != RC_OMOD_MUL_1 &&
386 				tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) {
387 				s->num_omod_ops++;
388 			}
389 			info = rc_get_opcode_info(tmp->U.P.RGB.Opcode);
390 		}
391 		if (info->IsFlowControl) {
392 			s->num_fc_insts++;
393 			if (info->Opcode == RC_OPCODE_BGNLOOP)
394 				s->num_loops++;
395 		}
396 		/* VS flow control was already translated to the predicate instructions */
397 		if (c->type == RC_VERTEX_PROGRAM)
398 			if (strstr(info->Name, "PRED") != NULL)
399 				s->num_pred_insts++;
400 
401 		if (info->HasTexture)
402 			s->num_tex_insts++;
403 		s->num_insts++;
404 	}
405 	/* Increment here because the reg_count_callback store the max
406 	 * temporary reg index in s->nun_temp_regs. */
407 	s->num_temp_regs++;
408 }
409 
print_stats(struct radeon_compiler * c)410 static void print_stats(struct radeon_compiler * c)
411 {
412 	struct rc_program_stats s;
413 
414 	rc_get_stats(c, &s);
415 
416 	/* Note that we print some dummy values for instruction categories that
417 	 * only the FS has, becasue shader-db's report.py wants all shaders to
418 	 * have the same set.
419 	 */
420 	util_debug_message(c->debug, SHADER_INFO, "%s shader: %u inst, %u vinst, %u sinst, %u predicate, %u flowcontrol, %u loops, %u tex, %u presub, %u omod, %u temps, %u consts, %u lits",
421 	                   c->type == RC_VERTEX_PROGRAM ? "VS" : "FS",
422 	                   s.num_insts, s.num_rgb_insts, s.num_alpha_insts, s.num_pred_insts,
423 	                   s.num_fc_insts, s.num_loops, s.num_tex_insts, s.num_presub_ops,
424 	                   s.num_omod_ops, s.num_temp_regs, s.num_consts, s.num_inline_literals);
425 }
426 
427 static const char *shader_name[RC_NUM_PROGRAM_TYPES] = {
428 	"Vertex Program",
429 	"Fragment Program"
430 };
431 
rc_run_compiler_passes(struct radeon_compiler * c,struct radeon_compiler_pass * list)432 bool rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list)
433 {
434 	for (unsigned i = 0; list[i].name; i++) {
435 		if (list[i].predicate) {
436 			list[i].run(c, list[i].user);
437 
438 			if (c->Error)
439 				return false;
440 
441 			if ((c->Debug & RC_DBG_LOG) && list[i].dump) {
442 				fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name);
443 				rc_print_program(&c->Program);
444 			}
445 		}
446 	}
447 	return true;
448 }
449 
450 /* Executes a list of compiler passes given in the parameter 'list'. */
rc_run_compiler(struct radeon_compiler * c,struct radeon_compiler_pass * list)451 void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list)
452 {
453 	if (c->Debug & RC_DBG_LOG) {
454 		fprintf(stderr, "%s: before compilation\n", shader_name[c->type]);
455 		rc_print_program(&c->Program);
456 	}
457 
458 	if(rc_run_compiler_passes(c, list)) {
459 		print_stats(c);
460 	}
461 }
462 
rc_validate_final_shader(struct radeon_compiler * c,void * user)463 void rc_validate_final_shader(struct radeon_compiler *c, void *user)
464 {
465 	/* Check the number of constants. */
466 	if (c->Program.Constants.Count > c->max_constants) {
467 		rc_error(c, "Too many constants. Max: %i, Got: %i\n",
468 			 c->max_constants, c->Program.Constants.Count);
469 	}
470 }
471