1 /*
2 * Copyright 2005 Ben Skeggs.
3 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
4 * SPDX-License-Identifier: MIT
5 * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
6 */
7 /**
8 * \file
9 *
10 * \author Ben Skeggs <darktama@iinet.net.au>
11 *
12 * \author Jerome Glisse <j.glisse@gmail.com>
13 *
14 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
15 *
16 */
17
18 #include "r500_fragprog.h"
19
20 #include "r300_reg.h"
21
22 #include "radeon_program_pair.h"
23
24 #include "util/compiler.h"
25
26 #define PROG_CODE struct r500_fragment_program_code *code = &c->code->code.r500
27
28 #define error(fmt, args...) \
29 do { \
30 rc_error(&c->Base, "%s::%s(): " fmt "\n", __FILE__, __func__, ##args); \
31 } while (0)
32
33 struct branch_info {
34 int If;
35 int Else;
36 int Endif;
37 };
38
39 struct r500_loop_info {
40 int BgnLoop;
41
42 int BranchDepth;
43 int *Brks;
44 int BrkCount;
45 int BrkReserved;
46
47 int *Conts;
48 int ContCount;
49 int ContReserved;
50 };
51
52 struct emit_state {
53 struct radeon_compiler *C;
54 struct r500_fragment_program_code *Code;
55
56 struct branch_info *Branches;
57 unsigned int CurrentBranchDepth;
58 unsigned int BranchesReserved;
59
60 struct r500_loop_info *Loops;
61 unsigned int CurrentLoopDepth;
62 unsigned int LoopsReserved;
63
64 unsigned int MaxBranchDepth;
65 };
66
67 static unsigned int
translate_rgb_op(struct r300_fragment_program_compiler * c,rc_opcode opcode)68 translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
69 {
70 switch (opcode) {
71 case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
72 case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND;
73 case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
74 case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
75 case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
76 case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
77 case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
78 default:
79 error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
80 FALLTHROUGH;
81 case RC_OPCODE_NOP: FALLTHROUGH;
82 case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
83 case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
84 case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
85 case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
86 }
87 }
88
89 static unsigned int
translate_alpha_op(struct r300_fragment_program_compiler * c,rc_opcode opcode)90 translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
91 {
92 switch (opcode) {
93 case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
94 case RC_OPCODE_CND: return R500_ALPHA_OP_CND;
95 case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
96 case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
97 case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
98 case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
99 case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
100 case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
101 case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
102 case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
103 default:
104 error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
105 FALLTHROUGH;
106 case RC_OPCODE_NOP: FALLTHROUGH;
107 case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
108 case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
109 case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
110 case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
111 case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
112 case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
113 }
114 }
115
116 static unsigned int
fix_hw_swizzle(unsigned int swz)117 fix_hw_swizzle(unsigned int swz)
118 {
119 switch (swz) {
120 case RC_SWIZZLE_ZERO:
121 case RC_SWIZZLE_UNUSED: swz = 4; break;
122 case RC_SWIZZLE_HALF: swz = 5; break;
123 case RC_SWIZZLE_ONE: swz = 6; break;
124 }
125
126 return swz;
127 }
128
129 static unsigned int
translate_arg_rgb(struct rc_pair_instruction * inst,int arg)130 translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
131 {
132 unsigned int t = inst->RGB.Arg[arg].Source;
133 int comp;
134 t |= inst->RGB.Arg[arg].Negate << 11;
135 t |= inst->RGB.Arg[arg].Abs << 12;
136
137 for (comp = 0; comp < 3; ++comp)
138 t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3 * comp + 2);
139
140 return t;
141 }
142
143 static unsigned int
translate_arg_alpha(struct rc_pair_instruction * inst,int i)144 translate_arg_alpha(struct rc_pair_instruction *inst, int i)
145 {
146 unsigned int t = inst->Alpha.Arg[i].Source;
147 t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2;
148 t |= inst->Alpha.Arg[i].Negate << 5;
149 t |= inst->Alpha.Arg[i].Abs << 6;
150 return t;
151 }
152
153 static uint32_t
translate_alu_result_op(struct r300_fragment_program_compiler * c,rc_compare_func func)154 translate_alu_result_op(struct r300_fragment_program_compiler *c, rc_compare_func func)
155 {
156 switch (func) {
157 case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
158 case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
159 case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
160 case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
161 default:
162 rc_error(&c->Base, "%s: unsupported compare func %i\n", __func__, func);
163 return 0;
164 }
165 }
166
167 static void
use_temporary(struct r500_fragment_program_code * code,unsigned int index)168 use_temporary(struct r500_fragment_program_code *code, unsigned int index)
169 {
170 if (index > code->max_temp_idx)
171 code->max_temp_idx = index;
172 }
173
174 static unsigned int
use_source(struct r500_fragment_program_code * code,struct rc_pair_instruction_source src)175 use_source(struct r500_fragment_program_code *code, struct rc_pair_instruction_source src)
176 {
177 /* From docs:
178 * Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST.
179 * MSB = 1 << 7 */
180 if (!src.Used)
181 return 1 << 7;
182
183 if (src.File == RC_FILE_CONSTANT) {
184 return src.Index | R500_RGB_ADDR0_CONST;
185 } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
186 use_temporary(code, src.Index);
187 return src.Index;
188 } else if (src.File == RC_FILE_INLINE) {
189 return src.Index | (1 << 7);
190 }
191
192 return 0;
193 }
194
195 /**
196 * NOP the specified instruction if it is not a texture lookup.
197 */
198 static void
alu_nop(struct r300_fragment_program_compiler * c,int ip)199 alu_nop(struct r300_fragment_program_compiler *c, int ip)
200 {
201 PROG_CODE;
202
203 if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
204 code->inst[ip].inst0 |= R500_INST_NOP;
205 }
206 }
207
208 /**
209 * Emit a paired ALU instruction.
210 */
211 static void
emit_paired(struct r300_fragment_program_compiler * c,struct rc_pair_instruction * inst)212 emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
213 {
214 int ip;
215 PROG_CODE;
216
217 if (code->inst_end >= c->Base.max_alu_insts - 1) {
218 error("emit_alu: Too many instructions");
219 return;
220 }
221
222 ip = ++code->inst_end;
223
224 /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
225 if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
226 inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
227 if (ip > 0) {
228 alu_nop(c, ip - 1);
229 }
230 }
231
232 code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
233 code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
234
235 if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
236 code->inst[ip].inst0 = R500_INST_TYPE_OUT;
237 if (inst->WriteALUResult) {
238 error("Cannot write output and ALU result at the same time");
239 return;
240 }
241 } else {
242 code->inst[ip].inst0 = R500_INST_TYPE_ALU;
243 }
244 code->inst[ip].inst0 |= (inst->SemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
245
246 code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11);
247 code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0;
248 code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
249 if (inst->Nop) {
250 code->inst[ip].inst0 |= R500_INST_NOP;
251 }
252 if (inst->Alpha.DepthWriteMask) {
253 code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
254 c->code->writes_depth = 1;
255 }
256
257 code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
258 code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
259 if (inst->Alpha.WriteMask)
260 use_temporary(code, inst->Alpha.DestIndex);
261 if (inst->RGB.WriteMask)
262 use_temporary(code, inst->RGB.DestIndex);
263
264 if (inst->RGB.Saturate)
265 code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
266 if (inst->Alpha.Saturate)
267 code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
268
269 /* Set the presubtract operation. */
270 switch (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
271 case RC_PRESUB_BIAS:
272 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
273 break;
274 case RC_PRESUB_SUB:
275 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
276 break;
277 case RC_PRESUB_ADD:
278 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
279 break;
280 case RC_PRESUB_INV:
281 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
282 break;
283 default:
284 break;
285 }
286 switch (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
287 case RC_PRESUB_BIAS:
288 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
289 break;
290 case RC_PRESUB_SUB:
291 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
292 break;
293 case RC_PRESUB_ADD:
294 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
295 break;
296 case RC_PRESUB_INV:
297 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
298 break;
299 default:
300 break;
301 }
302
303 /* Set the output modifier */
304 code->inst[ip].inst3 |= inst->RGB.Omod << R500_ALU_RGB_OMOD_SHIFT;
305 code->inst[ip].inst4 |= inst->Alpha.Omod << R500_ALPHA_OMOD_SHIFT;
306
307 code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
308 code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
309 code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
310
311 code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
312 code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
313 code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
314
315 code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
316 code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
317 code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
318
319 code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
320 code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
321 code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
322
323 code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
324 code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
325
326 if (inst->WriteALUResult) {
327 code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
328
329 if (inst->WriteALUResult == RC_ALURESULT_X)
330 code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
331 else
332 code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
333
334 code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
335 }
336 }
337
338 static unsigned int
translate_strq_swizzle(unsigned int swizzle)339 translate_strq_swizzle(unsigned int swizzle)
340 {
341 unsigned int swiz = 0;
342 int i;
343 for (i = 0; i < 4; i++)
344 swiz |= (GET_SWZ(swizzle, i) & 0x3) << i * 2;
345 return swiz;
346 }
347
348 /**
349 * Emit a single TEX instruction
350 */
351 static int
emit_tex(struct r300_fragment_program_compiler * c,struct rc_sub_instruction * inst)352 emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
353 {
354 int ip;
355 PROG_CODE;
356
357 if (code->inst_end >= c->Base.max_alu_insts - 1) {
358 error("emit_tex: Too many instructions");
359 return 0;
360 }
361
362 ip = ++code->inst_end;
363
364 code->inst[ip].inst0 = R500_INST_TYPE_TEX | (inst->DstReg.WriteMask << 11) |
365 (inst->TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
366 code->inst[ip].inst1 =
367 R500_TEX_ID(inst->TexSrcUnit) | (inst->TexSemAcquire << R500_TEX_SEM_ACQUIRE_SHIFT);
368
369 if (inst->TexSrcTarget == RC_TEXTURE_RECT)
370 code->inst[ip].inst1 |= R500_TEX_UNSCALED;
371
372 switch (inst->Opcode) {
373 case RC_OPCODE_KIL:
374 code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
375 break;
376 case RC_OPCODE_TEX:
377 code->inst[ip].inst1 |= R500_TEX_INST_LD;
378 break;
379 case RC_OPCODE_TXB:
380 code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
381 break;
382 case RC_OPCODE_TXP:
383 code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
384 break;
385 case RC_OPCODE_TXD:
386 code->inst[ip].inst1 |= R500_TEX_INST_DXDY;
387 break;
388 case RC_OPCODE_TXL:
389 code->inst[ip].inst1 |= R500_TEX_INST_LOD;
390 break;
391 default:
392 error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
393 }
394
395 use_temporary(code, inst->SrcReg[0].Index);
396 if (inst->Opcode != RC_OPCODE_KIL)
397 use_temporary(code, inst->DstReg.Index);
398
399 code->inst[ip].inst2 =
400 R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) |
401 (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8) |
402 R500_TEX_DST_ADDR(inst->DstReg.Index) | (GET_SWZ(inst->TexSwizzle, 0) << 24) |
403 (GET_SWZ(inst->TexSwizzle, 1) << 26) | (GET_SWZ(inst->TexSwizzle, 2) << 28) |
404 (GET_SWZ(inst->TexSwizzle, 3) << 30);
405
406 if (inst->Opcode == RC_OPCODE_TXD) {
407 use_temporary(code, inst->SrcReg[1].Index);
408 use_temporary(code, inst->SrcReg[2].Index);
409
410 /* DX and DY parameters are specified in a separate register. */
411 code->inst[ip].inst3 = R500_DX_ADDR(inst->SrcReg[1].Index) |
412 (translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) |
413 R500_DY_ADDR(inst->SrcReg[2].Index) |
414 (translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24);
415 }
416
417 return 1;
418 }
419
420 static void
emit_flowcontrol(struct emit_state * s,struct rc_instruction * inst)421 emit_flowcontrol(struct emit_state *s, struct rc_instruction *inst)
422 {
423 unsigned int newip;
424
425 if (s->Code->inst_end >= s->C->max_alu_insts - 1) {
426 rc_error(s->C, "emit_tex: Too many instructions");
427 return;
428 }
429
430 newip = ++s->Code->inst_end;
431
432 /* Currently all loops use the same integer constant to initialize
433 * the loop variables. */
434 if (!s->Code->int_constants[0]) {
435 s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
436 s->Code->int_constant_count = 1;
437 }
438 s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
439 s->Code->inst[newip].inst0 |= (inst->U.I.TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
440
441 switch (inst->U.I.Opcode) {
442 struct branch_info *branch;
443 struct r500_loop_info *loop;
444 case RC_OPCODE_BGNLOOP:
445 memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info, s->Loops, s->CurrentLoopDepth,
446 s->LoopsReserved, 1);
447
448 loop = &s->Loops[s->CurrentLoopDepth++];
449 memset(loop, 0, sizeof(struct r500_loop_info));
450 loop->BranchDepth = s->CurrentBranchDepth;
451 loop->BgnLoop = newip;
452
453 s->Code->inst[newip].inst2 =
454 R500_FC_OP_LOOP | R500_FC_JUMP_FUNC(0x00) | R500_FC_IGNORE_UNCOVERED;
455 break;
456 case RC_OPCODE_BRK:
457 loop = &s->Loops[s->CurrentLoopDepth - 1];
458 memory_pool_array_reserve(&s->C->Pool, int, loop->Brks, loop->BrkCount, loop->BrkReserved, 1);
459
460 loop->Brks[loop->BrkCount++] = newip;
461 s->Code->inst[newip].inst2 =
462 R500_FC_OP_BREAKLOOP | R500_FC_JUMP_FUNC(0xff) | R500_FC_B_OP1_DECR |
463 R500_FC_B_POP_CNT(s->CurrentBranchDepth - loop->BranchDepth) | R500_FC_IGNORE_UNCOVERED;
464 break;
465
466 case RC_OPCODE_CONT:
467 loop = &s->Loops[s->CurrentLoopDepth - 1];
468 memory_pool_array_reserve(&s->C->Pool, int, loop->Conts, loop->ContCount, loop->ContReserved,
469 1);
470 loop->Conts[loop->ContCount++] = newip;
471 s->Code->inst[newip].inst2 =
472 R500_FC_OP_CONTINUE | R500_FC_JUMP_FUNC(0xff) | R500_FC_B_OP1_DECR |
473 R500_FC_B_POP_CNT(s->CurrentBranchDepth - loop->BranchDepth) | R500_FC_IGNORE_UNCOVERED;
474 break;
475
476 case RC_OPCODE_ENDLOOP: {
477 loop = &s->Loops[s->CurrentLoopDepth - 1];
478 /* Emit ENDLOOP */
479 s->Code->inst[newip].inst2 =
480 R500_FC_OP_ENDLOOP | R500_FC_JUMP_FUNC(0xff) | R500_FC_JUMP_ANY | R500_FC_IGNORE_UNCOVERED;
481 /* The constant integer at index 0 is used by all loops. */
482 s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0) | R500_FC_JUMP_ADDR(loop->BgnLoop + 1);
483
484 /* Set jump address and int constant for BGNLOOP */
485 s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0) | R500_FC_JUMP_ADDR(newip);
486
487 /* Set jump address for the BRK instructions. */
488 while (loop->BrkCount--) {
489 s->Code->inst[loop->Brks[loop->BrkCount]].inst3 = R500_FC_JUMP_ADDR(newip + 1);
490 }
491
492 /* Set jump address for CONT instructions. */
493 while (loop->ContCount--) {
494 s->Code->inst[loop->Conts[loop->ContCount]].inst3 = R500_FC_JUMP_ADDR(newip);
495 }
496 s->CurrentLoopDepth--;
497 break;
498 }
499 case RC_OPCODE_IF:
500 if (s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) {
501 rc_error(s->C, "Branch depth exceeds hardware limit");
502 return;
503 }
504 memory_pool_array_reserve(&s->C->Pool, struct branch_info, s->Branches, s->CurrentBranchDepth,
505 s->BranchesReserved, 1);
506
507 branch = &s->Branches[s->CurrentBranchDepth++];
508 branch->If = newip;
509 branch->Else = -1;
510 branch->Endif = -1;
511
512 if (s->CurrentBranchDepth > s->MaxBranchDepth)
513 s->MaxBranchDepth = s->CurrentBranchDepth;
514
515 /* actual instruction is filled in at ENDIF time */
516 break;
517
518 case RC_OPCODE_ELSE:
519 if (!s->CurrentBranchDepth) {
520 rc_error(s->C, "%s: got ELSE outside a branch", __func__);
521 return;
522 }
523
524 branch = &s->Branches[s->CurrentBranchDepth - 1];
525 branch->Else = newip;
526
527 /* actual instruction is filled in at ENDIF time */
528 break;
529
530 case RC_OPCODE_ENDIF:
531 if (!s->CurrentBranchDepth) {
532 rc_error(s->C, "%s: got ELSE outside a branch", __func__);
533 return;
534 }
535
536 branch = &s->Branches[s->CurrentBranchDepth - 1];
537 branch->Endif = newip;
538
539 s->Code->inst[branch->Endif].inst2 =
540 R500_FC_OP_JUMP | R500_FC_A_OP_NONE /* no address stack */
541 | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
542 | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
543 | R500_FC_B_OP1_NONE /* no branch counter if stay */
544 | R500_FC_B_POP_CNT(1);
545 s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
546 s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP | R500_FC_A_OP_NONE /* no address stack */
547 | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
548 | R500_FC_B_OP0_INCR /* increment branch counter if stay */
549 | R500_FC_IGNORE_UNCOVERED;
550
551 if (branch->Else >= 0) {
552 /* increment branch counter also if jump */
553 s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
554 s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
555
556 s->Code->inst[branch->Else].inst2 =
557 R500_FC_OP_JUMP | R500_FC_A_OP_NONE /* no address stack */
558 | R500_FC_B_ELSE /* all active pixels want to jump */
559 | R500_FC_B_OP0_NONE /* no counter op if stay */
560 | R500_FC_B_OP1_DECR /* decrement branch counter if jump */
561 | R500_FC_B_POP_CNT(1);
562 s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
563 } else {
564 /* don't touch branch counter on jump */
565 s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
566 s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
567 }
568
569 s->CurrentBranchDepth--;
570 break;
571 default:
572 rc_error(s->C, "%s: unknown opcode %s\n", __func__,
573 rc_get_opcode_info(inst->U.I.Opcode)->Name);
574 }
575 }
576
577 void
r500BuildFragmentProgramHwCode(struct radeon_compiler * c,void * user)578 r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
579 {
580 struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler *)c;
581 struct emit_state s;
582 struct r500_fragment_program_code *code = &compiler->code->code.r500;
583
584 memset(&s, 0, sizeof(s));
585 s.C = &compiler->Base;
586 s.Code = code;
587
588 memset(code, 0, sizeof(*code));
589 code->max_temp_idx = 1;
590 code->inst_end = -1;
591
592 for (struct rc_instruction *inst = compiler->Base.Program.Instructions.Next;
593 inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; inst = inst->Next) {
594 if (inst->Type == RC_INSTRUCTION_NORMAL) {
595 const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
596
597 if (opcode->IsFlowControl) {
598 emit_flowcontrol(&s, inst);
599 } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
600 continue;
601 } else {
602 emit_tex(compiler, &inst->U.I);
603 }
604 } else {
605 emit_paired(compiler, &inst->U.P);
606 }
607 }
608
609 if (code->max_temp_idx >= compiler->Base.max_temp_regs)
610 rc_error(&compiler->Base, "Too many hardware temporaries used\n");
611
612 if (compiler->Base.Error)
613 return;
614
615 if (code->inst_end == -1 ||
616 (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
617 int ip;
618
619 /* This may happen when dead-code elimination is disabled or
620 * when most of the fragment program logic is leading to a KIL */
621 if (code->inst_end >= compiler->Base.max_alu_insts - 1) {
622 rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
623 return;
624 }
625
626 ip = ++code->inst_end;
627 code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
628 }
629
630 /* Make sure TEX_SEM_WAIT is set on the last instruction */
631 code->inst[code->inst_end].inst0 |= R500_INST_TEX_SEM_WAIT;
632
633 /* Enable full flow control mode if we are using loops or have if
634 * statements nested at least four deep. */
635 if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
636 if (code->max_temp_idx < 1)
637 code->max_temp_idx = 1;
638
639 code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
640 }
641 }
642