• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2005 Ben Skeggs.
3  *
4  * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
5  * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
6  *
7  * All Rights Reserved.
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining
10  * a copy of this software and associated documentation files (the
11  * "Software"), to deal in the Software without restriction, including
12  * without limitation the rights to use, copy, modify, merge, publish,
13  * distribute, sublicense, and/or sell copies of the Software, and to
14  * permit persons to whom the Software is furnished to do so, subject to
15  * the following conditions:
16  *
17  * The above copyright notice and this permission notice (including the
18  * next paragraph) shall be included in all copies or substantial
19  * portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28  *
29  */
30 
31 /**
32  * \file
33  *
34  * \author Ben Skeggs <darktama@iinet.net.au>
35  *
36  * \author Jerome Glisse <j.glisse@gmail.com>
37  *
38  * \author Corbin Simpson <MostAwesomeDude@gmail.com>
39  *
40  */
41 
42 #include "r500_fragprog.h"
43 
44 #include "r300_reg.h"
45 
46 #include "radeon_program_pair.h"
47 
48 #include "util/compiler.h"
49 
50 #define PROG_CODE \
51 	struct r500_fragment_program_code *code = &c->code->code.r500
52 
53 #define error(fmt, args...) do {			\
54 		rc_error(&c->Base, "%s::%s(): " fmt "\n",	\
55 			__FILE__, __func__, ##args);	\
56 	} while(0)
57 
58 
59 struct branch_info {
60 	int If;
61 	int Else;
62 	int Endif;
63 };
64 
65 struct r500_loop_info {
66 	int BgnLoop;
67 
68 	int BranchDepth;
69 	int * Brks;
70 	int BrkCount;
71 	int BrkReserved;
72 
73 	int * Conts;
74 	int ContCount;
75 	int ContReserved;
76 };
77 
78 struct emit_state {
79 	struct radeon_compiler * C;
80 	struct r500_fragment_program_code * Code;
81 
82 	struct branch_info * Branches;
83 	unsigned int CurrentBranchDepth;
84 	unsigned int BranchesReserved;
85 
86 	struct r500_loop_info * Loops;
87 	unsigned int CurrentLoopDepth;
88 	unsigned int LoopsReserved;
89 
90 	unsigned int MaxBranchDepth;
91 
92 };
93 
translate_rgb_op(struct r300_fragment_program_compiler * c,rc_opcode opcode)94 static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
95 {
96 	switch(opcode) {
97 	case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
98 	case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND;
99 	case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
100 	case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
101 	case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
102 	case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
103 	case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
104 	default:
105 		error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
106 		FALLTHROUGH;
107 	case RC_OPCODE_NOP:
108 		FALLTHROUGH;
109 	case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
110 	case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
111 	case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
112 	case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
113 	}
114 }
115 
translate_alpha_op(struct r300_fragment_program_compiler * c,rc_opcode opcode)116 static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
117 {
118 	switch(opcode) {
119 	case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
120 	case RC_OPCODE_CND: return R500_ALPHA_OP_CND;
121 	case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
122 	case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
123 	case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
124 	case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
125 	case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
126 	case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
127 	case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
128 	case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
129 	default:
130 		error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
131 		FALLTHROUGH;
132 	case RC_OPCODE_NOP:
133 		FALLTHROUGH;
134 	case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
135 	case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
136 	case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
137 	case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
138 	case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
139 	case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
140 	}
141 }
142 
fix_hw_swizzle(unsigned int swz)143 static unsigned int fix_hw_swizzle(unsigned int swz)
144 {
145     switch (swz) {
146         case RC_SWIZZLE_ZERO:
147         case RC_SWIZZLE_UNUSED:
148             swz = 4;
149             break;
150         case RC_SWIZZLE_HALF:
151             swz = 5;
152             break;
153         case RC_SWIZZLE_ONE:
154             swz = 6;
155             break;
156     }
157 
158 	return swz;
159 }
160 
translate_arg_rgb(struct rc_pair_instruction * inst,int arg)161 static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
162 {
163 	unsigned int t = inst->RGB.Arg[arg].Source;
164 	int comp;
165 	t |= inst->RGB.Arg[arg].Negate << 11;
166 	t |= inst->RGB.Arg[arg].Abs << 12;
167 
168 	for(comp = 0; comp < 3; ++comp)
169 		t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
170 
171 	return t;
172 }
173 
translate_arg_alpha(struct rc_pair_instruction * inst,int i)174 static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
175 {
176 	unsigned int t = inst->Alpha.Arg[i].Source;
177 	t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2;
178 	t |= inst->Alpha.Arg[i].Negate << 5;
179 	t |= inst->Alpha.Arg[i].Abs << 6;
180 	return t;
181 }
182 
translate_alu_result_op(struct r300_fragment_program_compiler * c,rc_compare_func func)183 static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
184 {
185 	switch(func) {
186 	case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
187 	case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
188 	case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
189 	case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
190 	default:
191 		rc_error(&c->Base, "%s: unsupported compare func %i\n", __func__, func);
192 		return 0;
193 	}
194 }
195 
use_temporary(struct r500_fragment_program_code * code,unsigned int index)196 static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
197 {
198 	if (index > code->max_temp_idx)
199 		code->max_temp_idx = index;
200 }
201 
use_source(struct r500_fragment_program_code * code,struct rc_pair_instruction_source src)202 static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
203 {
204 	/* From docs:
205 	 *   Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST.
206 	 * MSB = 1 << 7 */
207 	if (!src.Used)
208 		return 1 << 7;
209 
210 	if (src.File == RC_FILE_CONSTANT) {
211 		return src.Index | R500_RGB_ADDR0_CONST;
212 	} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
213 		use_temporary(code, src.Index);
214 		return src.Index;
215 	} else if (src.File == RC_FILE_INLINE) {
216 		return src.Index | (1 << 7);
217 	}
218 
219 	return 0;
220 }
221 
222 /**
223  * NOP the specified instruction if it is not a texture lookup.
224  */
alu_nop(struct r300_fragment_program_compiler * c,int ip)225 static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
226 {
227 	PROG_CODE;
228 
229 	if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
230 		code->inst[ip].inst0 |= R500_INST_NOP;
231 	}
232 }
233 
234 /**
235  * Emit a paired ALU instruction.
236  */
emit_paired(struct r300_fragment_program_compiler * c,struct rc_pair_instruction * inst)237 static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
238 {
239 	int ip;
240 	PROG_CODE;
241 
242 	if (code->inst_end >= c->Base.max_alu_insts-1) {
243 		error("emit_alu: Too many instructions");
244 		return;
245 	}
246 
247 	ip = ++code->inst_end;
248 
249 	/* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
250 	if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
251 		inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
252 		if (ip > 0) {
253 			alu_nop(c, ip - 1);
254 		}
255 	}
256 
257 	code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
258 	code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
259 
260 	if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
261 		code->inst[ip].inst0 = R500_INST_TYPE_OUT;
262 		if (inst->WriteALUResult) {
263 			error("Cannot write output and ALU result at the same time");
264 			return;
265 		}
266 	} else {
267 		code->inst[ip].inst0 = R500_INST_TYPE_ALU;
268 	}
269 	code->inst[ip].inst0 |= (inst->SemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
270 
271 	code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11);
272 	code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0;
273 	code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
274 	if (inst->Nop) {
275 		code->inst[ip].inst0 |= R500_INST_NOP;
276 	}
277 	if (inst->Alpha.DepthWriteMask) {
278 		code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
279 		c->code->writes_depth = 1;
280 	}
281 
282 	code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
283 	code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
284 	if (inst->Alpha.WriteMask)
285 		use_temporary(code, inst->Alpha.DestIndex);
286 	if (inst->RGB.WriteMask)
287 		use_temporary(code, inst->RGB.DestIndex);
288 
289 	if (inst->RGB.Saturate)
290 		code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
291 	if (inst->Alpha.Saturate)
292 		code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
293 
294 	/* Set the presubtract operation. */
295 	switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
296 		case RC_PRESUB_BIAS:
297 			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
298 			break;
299 		case RC_PRESUB_SUB:
300 			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
301 			break;
302 		case RC_PRESUB_ADD:
303 			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
304 			break;
305 		case RC_PRESUB_INV:
306 			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
307 			break;
308 		default:
309 			break;
310 	}
311 	switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
312 		case RC_PRESUB_BIAS:
313 			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
314 			break;
315 		case RC_PRESUB_SUB:
316 			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
317 			break;
318 		case RC_PRESUB_ADD:
319 			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
320 			break;
321 		case RC_PRESUB_INV:
322 			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
323 			break;
324 		default:
325 			break;
326 	}
327 
328 	/* Set the output modifier */
329 	code->inst[ip].inst3 |= inst->RGB.Omod << R500_ALU_RGB_OMOD_SHIFT;
330 	code->inst[ip].inst4 |= inst->Alpha.Omod << R500_ALPHA_OMOD_SHIFT;
331 
332 	code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
333 	code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
334 	code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
335 
336 	code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
337 	code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
338 	code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
339 
340 	code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
341 	code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
342 	code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
343 
344 	code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
345 	code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
346 	code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
347 
348 	code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
349 	code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
350 
351 	if (inst->WriteALUResult) {
352 		code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
353 
354 		if (inst->WriteALUResult == RC_ALURESULT_X)
355 			code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
356 		else
357 			code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
358 
359 		code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
360 	}
361 }
362 
translate_strq_swizzle(unsigned int swizzle)363 static unsigned int translate_strq_swizzle(unsigned int swizzle)
364 {
365 	unsigned int swiz = 0;
366 	int i;
367 	for (i = 0; i < 4; i++)
368 		swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
369 	return swiz;
370 }
371 
372 /**
373  * Emit a single TEX instruction
374  */
emit_tex(struct r300_fragment_program_compiler * c,struct rc_sub_instruction * inst)375 static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
376 {
377 	int ip;
378 	PROG_CODE;
379 
380 	if (code->inst_end >= c->Base.max_alu_insts-1) {
381 		error("emit_tex: Too many instructions");
382 		return 0;
383 	}
384 
385 	ip = ++code->inst_end;
386 
387 	code->inst[ip].inst0 = R500_INST_TYPE_TEX
388 		| (inst->DstReg.WriteMask << 11)
389 		| (inst->TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
390 	code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
391 		| (inst->TexSemAcquire << R500_TEX_SEM_ACQUIRE_SHIFT);
392 
393 	if (inst->TexSrcTarget == RC_TEXTURE_RECT)
394 		code->inst[ip].inst1 |= R500_TEX_UNSCALED;
395 
396 	switch (inst->Opcode) {
397 	case RC_OPCODE_KIL:
398 		code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
399 		break;
400 	case RC_OPCODE_TEX:
401 		code->inst[ip].inst1 |= R500_TEX_INST_LD;
402 		break;
403 	case RC_OPCODE_TXB:
404 		code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
405 		break;
406 	case RC_OPCODE_TXP:
407 		code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
408 		break;
409 	case RC_OPCODE_TXD:
410 		code->inst[ip].inst1 |= R500_TEX_INST_DXDY;
411 		break;
412 	case RC_OPCODE_TXL:
413 		code->inst[ip].inst1 |= R500_TEX_INST_LOD;
414 		break;
415 	default:
416 		error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
417 	}
418 
419 	use_temporary(code, inst->SrcReg[0].Index);
420 	if (inst->Opcode != RC_OPCODE_KIL)
421 		use_temporary(code, inst->DstReg.Index);
422 
423 	code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
424 		| (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
425 		| R500_TEX_DST_ADDR(inst->DstReg.Index)
426 		| (GET_SWZ(inst->TexSwizzle, 0) << 24)
427 		| (GET_SWZ(inst->TexSwizzle, 1) << 26)
428 		| (GET_SWZ(inst->TexSwizzle, 2) << 28)
429 		| (GET_SWZ(inst->TexSwizzle, 3) << 30)
430 		;
431 
432 	if (inst->Opcode == RC_OPCODE_TXD) {
433 		use_temporary(code, inst->SrcReg[1].Index);
434 		use_temporary(code, inst->SrcReg[2].Index);
435 
436 		/* DX and DY parameters are specified in a separate register. */
437 		code->inst[ip].inst3 =
438 			R500_DX_ADDR(inst->SrcReg[1].Index) |
439 			(translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) |
440 			R500_DY_ADDR(inst->SrcReg[2].Index) |
441 			(translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24);
442 	}
443 
444 	return 1;
445 }
446 
emit_flowcontrol(struct emit_state * s,struct rc_instruction * inst)447 static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
448 {
449 	unsigned int newip;
450 
451 	if (s->Code->inst_end >= s->C->max_alu_insts-1) {
452 		rc_error(s->C, "emit_tex: Too many instructions");
453 		return;
454 	}
455 
456 	newip = ++s->Code->inst_end;
457 
458 	/* Currently all loops use the same integer constant to initialize
459 	 * the loop variables. */
460 	if(!s->Code->int_constants[0]) {
461 		s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
462 		s->Code->int_constant_count = 1;
463 	}
464 	s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
465 	s->Code->inst[newip].inst0 |= (inst->U.I.TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
466 
467 	switch(inst->U.I.Opcode){
468 	struct branch_info * branch;
469 	struct r500_loop_info * loop;
470 	case RC_OPCODE_BGNLOOP:
471 		memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info,
472 			s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
473 
474 		loop = &s->Loops[s->CurrentLoopDepth++];
475 		memset(loop, 0, sizeof(struct r500_loop_info));
476 		loop->BranchDepth = s->CurrentBranchDepth;
477 		loop->BgnLoop = newip;
478 
479 		s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
480 			| R500_FC_JUMP_FUNC(0x00)
481 			| R500_FC_IGNORE_UNCOVERED
482 			;
483 		break;
484 	case RC_OPCODE_BRK:
485 		loop = &s->Loops[s->CurrentLoopDepth - 1];
486 		memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
487 					loop->BrkCount, loop->BrkReserved, 1);
488 
489 		loop->Brks[loop->BrkCount++] = newip;
490 		s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
491 			| R500_FC_JUMP_FUNC(0xff)
492 			| R500_FC_B_OP1_DECR
493 			| R500_FC_B_POP_CNT(
494 				s->CurrentBranchDepth - loop->BranchDepth)
495 			| R500_FC_IGNORE_UNCOVERED
496 			;
497 		break;
498 
499 	case RC_OPCODE_CONT:
500 		loop = &s->Loops[s->CurrentLoopDepth - 1];
501 		memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,
502 					loop->ContCount, loop->ContReserved, 1);
503 		loop->Conts[loop->ContCount++] = newip;
504 		s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE
505 			| R500_FC_JUMP_FUNC(0xff)
506 			| R500_FC_B_OP1_DECR
507 			| R500_FC_B_POP_CNT(
508 				s->CurrentBranchDepth -	loop->BranchDepth)
509 			| R500_FC_IGNORE_UNCOVERED
510 			;
511 		break;
512 
513 	case RC_OPCODE_ENDLOOP:
514 	{
515 		loop = &s->Loops[s->CurrentLoopDepth - 1];
516 		/* Emit ENDLOOP */
517 		s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
518 			| R500_FC_JUMP_FUNC(0xff)
519 			| R500_FC_JUMP_ANY
520 			| R500_FC_IGNORE_UNCOVERED
521 			;
522 		/* The constant integer at index 0 is used by all loops. */
523 		s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
524 			| R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
525 			;
526 
527 		/* Set jump address and int constant for BGNLOOP */
528 		s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
529 			| R500_FC_JUMP_ADDR(newip)
530 			;
531 
532 		/* Set jump address for the BRK instructions. */
533 		while(loop->BrkCount--) {
534 			s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
535 						R500_FC_JUMP_ADDR(newip + 1);
536 		}
537 
538 		/* Set jump address for CONT instructions. */
539 		while(loop->ContCount--) {
540 			s->Code->inst[loop->Conts[loop->ContCount]].inst3 =
541 						R500_FC_JUMP_ADDR(newip);
542 		}
543 		s->CurrentLoopDepth--;
544 		break;
545 	}
546 	case RC_OPCODE_IF:
547 		if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) {
548 			rc_error(s->C, "Branch depth exceeds hardware limit");
549 			return;
550 		}
551 		memory_pool_array_reserve(&s->C->Pool, struct branch_info,
552 				s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
553 
554 		branch = &s->Branches[s->CurrentBranchDepth++];
555 		branch->If = newip;
556 		branch->Else = -1;
557 		branch->Endif = -1;
558 
559 		if (s->CurrentBranchDepth > s->MaxBranchDepth)
560 			s->MaxBranchDepth = s->CurrentBranchDepth;
561 
562 		/* actual instruction is filled in at ENDIF time */
563 		break;
564 
565 	case RC_OPCODE_ELSE:
566 		if (!s->CurrentBranchDepth) {
567 			rc_error(s->C, "%s: got ELSE outside a branch", __func__);
568 			return;
569 		}
570 
571 		branch = &s->Branches[s->CurrentBranchDepth - 1];
572 		branch->Else = newip;
573 
574 		/* actual instruction is filled in at ENDIF time */
575 		break;
576 
577 	case RC_OPCODE_ENDIF:
578 		if (!s->CurrentBranchDepth) {
579 			rc_error(s->C, "%s: got ELSE outside a branch", __func__);
580 			return;
581 		}
582 
583 		branch = &s->Branches[s->CurrentBranchDepth - 1];
584 		branch->Endif = newip;
585 
586 		s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
587 			| R500_FC_A_OP_NONE /* no address stack */
588 			| R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
589 			| R500_FC_B_OP0_DECR /* decrement branch counter if stay */
590 			| R500_FC_B_OP1_NONE /* no branch counter if stay */
591 			| R500_FC_B_POP_CNT(1)
592 			;
593 		s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
594 		s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
595 			| R500_FC_A_OP_NONE /* no address stack */
596 			| R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
597 			| R500_FC_B_OP0_INCR /* increment branch counter if stay */
598 			| R500_FC_IGNORE_UNCOVERED
599 		;
600 
601 		if (branch->Else >= 0) {
602 			/* increment branch counter also if jump */
603 			s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
604 			s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
605 
606 			s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
607 				| R500_FC_A_OP_NONE /* no address stack */
608 				| R500_FC_B_ELSE /* all active pixels want to jump */
609 				| R500_FC_B_OP0_NONE /* no counter op if stay */
610 				| R500_FC_B_OP1_DECR /* decrement branch counter if jump */
611 				| R500_FC_B_POP_CNT(1)
612 			;
613 			s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
614 		} else {
615 			/* don't touch branch counter on jump */
616 			s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
617 			s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
618 		}
619 
620 
621 		s->CurrentBranchDepth--;
622 		break;
623 	default:
624 		rc_error(s->C, "%s: unknown opcode %s\n", __func__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
625 	}
626 }
627 
r500BuildFragmentProgramHwCode(struct radeon_compiler * c,void * user)628 void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
629 {
630 	struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
631 	struct emit_state s;
632 	struct r500_fragment_program_code *code = &compiler->code->code.r500;
633 
634 	memset(&s, 0, sizeof(s));
635 	s.C = &compiler->Base;
636 	s.Code = code;
637 
638 	memset(code, 0, sizeof(*code));
639 	code->max_temp_idx = 1;
640 	code->inst_end = -1;
641 
642 	for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
643 	    inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
644 	    inst = inst->Next) {
645 		if (inst->Type == RC_INSTRUCTION_NORMAL) {
646 			const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
647 
648 			if (opcode->IsFlowControl) {
649 				emit_flowcontrol(&s, inst);
650 			} else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
651 				continue;
652 			} else {
653 				emit_tex(compiler, &inst->U.I);
654 			}
655 		} else {
656 			emit_paired(compiler, &inst->U.P);
657 		}
658 	}
659 
660 	if (code->max_temp_idx >= compiler->Base.max_temp_regs)
661 		rc_error(&compiler->Base, "Too many hardware temporaries used\n");
662 
663 	if (compiler->Base.Error)
664 		return;
665 
666 	if (code->inst_end == -1 ||
667 	    (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
668 		int ip;
669 
670 		/* This may happen when dead-code elimination is disabled or
671 		 * when most of the fragment program logic is leading to a KIL */
672 		if (code->inst_end >= compiler->Base.max_alu_insts-1) {
673 			rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
674 			return;
675 		}
676 
677 		ip = ++code->inst_end;
678 		code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
679 	}
680 
681 	/* Make sure TEX_SEM_WAIT is set on the last instruction */
682 	code->inst[code->inst_end].inst0 |= R500_INST_TEX_SEM_WAIT;
683 
684 	/* Enable full flow control mode if we are using loops or have if
685 	 * statements nested at least four deep. */
686 	if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
687 		if (code->max_temp_idx < 1)
688 			code->max_temp_idx = 1;
689 
690 		code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
691 	}
692 }
693