• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2005 Ben Skeggs.
3  * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
4  * SPDX-License-Identifier: MIT
5  * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
6  */
7 /**
8  * \file
9  *
10  * \author Ben Skeggs <darktama@iinet.net.au>
11  *
12  * \author Jerome Glisse <j.glisse@gmail.com>
13  *
14  * \author Corbin Simpson <MostAwesomeDude@gmail.com>
15  *
16  */
17 
18 #include "r500_fragprog.h"
19 
20 #include "r300_reg.h"
21 
22 #include "radeon_program_pair.h"
23 
24 #include "util/compiler.h"
25 
26 #define PROG_CODE struct r500_fragment_program_code *code = &c->code->code.r500
27 
28 #define error(fmt, args...)                                                                        \
29    do {                                                                                            \
30       rc_error(&c->Base, "%s::%s(): " fmt "\n", __FILE__, __func__, ##args);                       \
31    } while (0)
32 
33 struct branch_info {
34    int If;
35    int Else;
36    int Endif;
37 };
38 
39 struct r500_loop_info {
40    int BgnLoop;
41 
42    int BranchDepth;
43    int *Brks;
44    int BrkCount;
45    int BrkReserved;
46 
47    int *Conts;
48    int ContCount;
49    int ContReserved;
50 };
51 
52 struct emit_state {
53    struct radeon_compiler *C;
54    struct r500_fragment_program_code *Code;
55 
56    struct branch_info *Branches;
57    unsigned int CurrentBranchDepth;
58    unsigned int BranchesReserved;
59 
60    struct r500_loop_info *Loops;
61    unsigned int CurrentLoopDepth;
62    unsigned int LoopsReserved;
63 
64    unsigned int MaxBranchDepth;
65 };
66 
67 static unsigned int
translate_rgb_op(struct r300_fragment_program_compiler * c,rc_opcode opcode)68 translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
69 {
70    switch (opcode) {
71    case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
72    case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND;
73    case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
74    case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
75    case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
76    case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
77    case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
78    default:
79       error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
80       FALLTHROUGH;
81    case RC_OPCODE_NOP: FALLTHROUGH;
82    case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
83    case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
84    case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
85    case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
86    }
87 }
88 
89 static unsigned int
translate_alpha_op(struct r300_fragment_program_compiler * c,rc_opcode opcode)90 translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
91 {
92    switch (opcode) {
93    case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
94    case RC_OPCODE_CND: return R500_ALPHA_OP_CND;
95    case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
96    case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
97    case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
98    case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
99    case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
100    case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
101    case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
102    case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
103    default:
104       error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
105       FALLTHROUGH;
106    case RC_OPCODE_NOP: FALLTHROUGH;
107    case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
108    case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
109    case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
110    case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
111    case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
112    case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
113    }
114 }
115 
116 static unsigned int
fix_hw_swizzle(unsigned int swz)117 fix_hw_swizzle(unsigned int swz)
118 {
119    switch (swz) {
120    case RC_SWIZZLE_ZERO:
121    case RC_SWIZZLE_UNUSED: swz = 4; break;
122    case RC_SWIZZLE_HALF: swz = 5; break;
123    case RC_SWIZZLE_ONE: swz = 6; break;
124    }
125 
126    return swz;
127 }
128 
129 static unsigned int
translate_arg_rgb(struct rc_pair_instruction * inst,int arg)130 translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
131 {
132    unsigned int t = inst->RGB.Arg[arg].Source;
133    int comp;
134    t |= inst->RGB.Arg[arg].Negate << 11;
135    t |= inst->RGB.Arg[arg].Abs << 12;
136 
137    for (comp = 0; comp < 3; ++comp)
138       t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3 * comp + 2);
139 
140    return t;
141 }
142 
143 static unsigned int
translate_arg_alpha(struct rc_pair_instruction * inst,int i)144 translate_arg_alpha(struct rc_pair_instruction *inst, int i)
145 {
146    unsigned int t = inst->Alpha.Arg[i].Source;
147    t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2;
148    t |= inst->Alpha.Arg[i].Negate << 5;
149    t |= inst->Alpha.Arg[i].Abs << 6;
150    return t;
151 }
152 
153 static uint32_t
translate_alu_result_op(struct r300_fragment_program_compiler * c,rc_compare_func func)154 translate_alu_result_op(struct r300_fragment_program_compiler *c, rc_compare_func func)
155 {
156    switch (func) {
157    case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
158    case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
159    case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
160    case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
161    default:
162       rc_error(&c->Base, "%s: unsupported compare func %i\n", __func__, func);
163       return 0;
164    }
165 }
166 
167 static void
use_temporary(struct r500_fragment_program_code * code,unsigned int index)168 use_temporary(struct r500_fragment_program_code *code, unsigned int index)
169 {
170    if (index > code->max_temp_idx)
171       code->max_temp_idx = index;
172 }
173 
174 static unsigned int
use_source(struct r500_fragment_program_code * code,struct rc_pair_instruction_source src)175 use_source(struct r500_fragment_program_code *code, struct rc_pair_instruction_source src)
176 {
177    /* From docs:
178     *   Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST.
179     * MSB = 1 << 7 */
180    if (!src.Used)
181       return 1 << 7;
182 
183    if (src.File == RC_FILE_CONSTANT) {
184       return src.Index | R500_RGB_ADDR0_CONST;
185    } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
186       use_temporary(code, src.Index);
187       return src.Index;
188    } else if (src.File == RC_FILE_INLINE) {
189       return src.Index | (1 << 7);
190    }
191 
192    return 0;
193 }
194 
195 /**
196  * NOP the specified instruction if it is not a texture lookup.
197  */
198 static void
alu_nop(struct r300_fragment_program_compiler * c,int ip)199 alu_nop(struct r300_fragment_program_compiler *c, int ip)
200 {
201    PROG_CODE;
202 
203    if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
204       code->inst[ip].inst0 |= R500_INST_NOP;
205    }
206 }
207 
208 /**
209  * Emit a paired ALU instruction.
210  */
211 static void
emit_paired(struct r300_fragment_program_compiler * c,struct rc_pair_instruction * inst)212 emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
213 {
214    int ip;
215    PROG_CODE;
216 
217    if (code->inst_end >= c->Base.max_alu_insts - 1) {
218       error("emit_alu: Too many instructions");
219       return;
220    }
221 
222    ip = ++code->inst_end;
223 
224    /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
225    if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
226        inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
227       if (ip > 0) {
228          alu_nop(c, ip - 1);
229       }
230    }
231 
232    code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
233    code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
234 
235    if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
236       code->inst[ip].inst0 = R500_INST_TYPE_OUT;
237       if (inst->WriteALUResult) {
238          error("Cannot write output and ALU result at the same time");
239          return;
240       }
241    } else {
242       code->inst[ip].inst0 = R500_INST_TYPE_ALU;
243    }
244    code->inst[ip].inst0 |= (inst->SemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
245 
246    code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11);
247    code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0;
248    code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
249    if (inst->Nop) {
250       code->inst[ip].inst0 |= R500_INST_NOP;
251    }
252    if (inst->Alpha.DepthWriteMask) {
253       code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
254       c->code->writes_depth = 1;
255    }
256 
257    code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
258    code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
259    if (inst->Alpha.WriteMask)
260       use_temporary(code, inst->Alpha.DestIndex);
261    if (inst->RGB.WriteMask)
262       use_temporary(code, inst->RGB.DestIndex);
263 
264    if (inst->RGB.Saturate)
265       code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
266    if (inst->Alpha.Saturate)
267       code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
268 
269    /* Set the presubtract operation. */
270    switch (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
271    case RC_PRESUB_BIAS:
272       code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
273       break;
274    case RC_PRESUB_SUB:
275       code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
276       break;
277    case RC_PRESUB_ADD:
278       code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
279       break;
280    case RC_PRESUB_INV:
281       code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
282       break;
283    default:
284       break;
285    }
286    switch (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
287    case RC_PRESUB_BIAS:
288       code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
289       break;
290    case RC_PRESUB_SUB:
291       code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
292       break;
293    case RC_PRESUB_ADD:
294       code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
295       break;
296    case RC_PRESUB_INV:
297       code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
298       break;
299    default:
300       break;
301    }
302 
303    /* Set the output modifier */
304    code->inst[ip].inst3 |= inst->RGB.Omod << R500_ALU_RGB_OMOD_SHIFT;
305    code->inst[ip].inst4 |= inst->Alpha.Omod << R500_ALPHA_OMOD_SHIFT;
306 
307    code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
308    code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
309    code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
310 
311    code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
312    code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
313    code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
314 
315    code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
316    code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
317    code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
318 
319    code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
320    code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
321    code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
322 
323    code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
324    code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
325 
326    if (inst->WriteALUResult) {
327       code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
328 
329       if (inst->WriteALUResult == RC_ALURESULT_X)
330          code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
331       else
332          code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
333 
334       code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
335    }
336 }
337 
338 static unsigned int
translate_strq_swizzle(unsigned int swizzle)339 translate_strq_swizzle(unsigned int swizzle)
340 {
341    unsigned int swiz = 0;
342    int i;
343    for (i = 0; i < 4; i++)
344       swiz |= (GET_SWZ(swizzle, i) & 0x3) << i * 2;
345    return swiz;
346 }
347 
348 /**
349  * Emit a single TEX instruction
350  */
351 static int
emit_tex(struct r300_fragment_program_compiler * c,struct rc_sub_instruction * inst)352 emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
353 {
354    int ip;
355    PROG_CODE;
356 
357    if (code->inst_end >= c->Base.max_alu_insts - 1) {
358       error("emit_tex: Too many instructions");
359       return 0;
360    }
361 
362    ip = ++code->inst_end;
363 
364    code->inst[ip].inst0 = R500_INST_TYPE_TEX | (inst->DstReg.WriteMask << 11) |
365                           (inst->TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
366    code->inst[ip].inst1 =
367       R500_TEX_ID(inst->TexSrcUnit) | (inst->TexSemAcquire << R500_TEX_SEM_ACQUIRE_SHIFT);
368 
369    if (inst->TexSrcTarget == RC_TEXTURE_RECT)
370       code->inst[ip].inst1 |= R500_TEX_UNSCALED;
371 
372    switch (inst->Opcode) {
373    case RC_OPCODE_KIL:
374       code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
375       break;
376    case RC_OPCODE_TEX:
377       code->inst[ip].inst1 |= R500_TEX_INST_LD;
378       break;
379    case RC_OPCODE_TXB:
380       code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
381       break;
382    case RC_OPCODE_TXP:
383       code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
384       break;
385    case RC_OPCODE_TXD:
386       code->inst[ip].inst1 |= R500_TEX_INST_DXDY;
387       break;
388    case RC_OPCODE_TXL:
389       code->inst[ip].inst1 |= R500_TEX_INST_LOD;
390       break;
391    default:
392       error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
393    }
394 
395    use_temporary(code, inst->SrcReg[0].Index);
396    if (inst->Opcode != RC_OPCODE_KIL)
397       use_temporary(code, inst->DstReg.Index);
398 
399    code->inst[ip].inst2 =
400       R500_TEX_SRC_ADDR(inst->SrcReg[0].Index) |
401       (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8) |
402       R500_TEX_DST_ADDR(inst->DstReg.Index) | (GET_SWZ(inst->TexSwizzle, 0) << 24) |
403       (GET_SWZ(inst->TexSwizzle, 1) << 26) | (GET_SWZ(inst->TexSwizzle, 2) << 28) |
404       (GET_SWZ(inst->TexSwizzle, 3) << 30);
405 
406    if (inst->Opcode == RC_OPCODE_TXD) {
407       use_temporary(code, inst->SrcReg[1].Index);
408       use_temporary(code, inst->SrcReg[2].Index);
409 
410       /* DX and DY parameters are specified in a separate register. */
411       code->inst[ip].inst3 = R500_DX_ADDR(inst->SrcReg[1].Index) |
412                              (translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) |
413                              R500_DY_ADDR(inst->SrcReg[2].Index) |
414                              (translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24);
415    }
416 
417    return 1;
418 }
419 
420 static void
emit_flowcontrol(struct emit_state * s,struct rc_instruction * inst)421 emit_flowcontrol(struct emit_state *s, struct rc_instruction *inst)
422 {
423    unsigned int newip;
424 
425    if (s->Code->inst_end >= s->C->max_alu_insts - 1) {
426       rc_error(s->C, "emit_tex: Too many instructions");
427       return;
428    }
429 
430    newip = ++s->Code->inst_end;
431 
432    /* Currently all loops use the same integer constant to initialize
433     * the loop variables. */
434    if (!s->Code->int_constants[0]) {
435       s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
436       s->Code->int_constant_count = 1;
437    }
438    s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
439    s->Code->inst[newip].inst0 |= (inst->U.I.TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
440 
441    switch (inst->U.I.Opcode) {
442       struct branch_info *branch;
443       struct r500_loop_info *loop;
444    case RC_OPCODE_BGNLOOP:
445       memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info, s->Loops, s->CurrentLoopDepth,
446                                 s->LoopsReserved, 1);
447 
448       loop = &s->Loops[s->CurrentLoopDepth++];
449       memset(loop, 0, sizeof(struct r500_loop_info));
450       loop->BranchDepth = s->CurrentBranchDepth;
451       loop->BgnLoop = newip;
452 
453       s->Code->inst[newip].inst2 =
454          R500_FC_OP_LOOP | R500_FC_JUMP_FUNC(0x00) | R500_FC_IGNORE_UNCOVERED;
455       break;
456    case RC_OPCODE_BRK:
457       loop = &s->Loops[s->CurrentLoopDepth - 1];
458       memory_pool_array_reserve(&s->C->Pool, int, loop->Brks, loop->BrkCount, loop->BrkReserved, 1);
459 
460       loop->Brks[loop->BrkCount++] = newip;
461       s->Code->inst[newip].inst2 =
462          R500_FC_OP_BREAKLOOP | R500_FC_JUMP_FUNC(0xff) | R500_FC_B_OP1_DECR |
463          R500_FC_B_POP_CNT(s->CurrentBranchDepth - loop->BranchDepth) | R500_FC_IGNORE_UNCOVERED;
464       break;
465 
466    case RC_OPCODE_CONT:
467       loop = &s->Loops[s->CurrentLoopDepth - 1];
468       memory_pool_array_reserve(&s->C->Pool, int, loop->Conts, loop->ContCount, loop->ContReserved,
469                                 1);
470       loop->Conts[loop->ContCount++] = newip;
471       s->Code->inst[newip].inst2 =
472          R500_FC_OP_CONTINUE | R500_FC_JUMP_FUNC(0xff) | R500_FC_B_OP1_DECR |
473          R500_FC_B_POP_CNT(s->CurrentBranchDepth - loop->BranchDepth) | R500_FC_IGNORE_UNCOVERED;
474       break;
475 
476    case RC_OPCODE_ENDLOOP: {
477       loop = &s->Loops[s->CurrentLoopDepth - 1];
478       /* Emit ENDLOOP */
479       s->Code->inst[newip].inst2 =
480          R500_FC_OP_ENDLOOP | R500_FC_JUMP_FUNC(0xff) | R500_FC_JUMP_ANY | R500_FC_IGNORE_UNCOVERED;
481       /* The constant integer at index 0 is used by all loops. */
482       s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0) | R500_FC_JUMP_ADDR(loop->BgnLoop + 1);
483 
484       /* Set jump address and int constant for BGNLOOP */
485       s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0) | R500_FC_JUMP_ADDR(newip);
486 
487       /* Set jump address for the BRK instructions. */
488       while (loop->BrkCount--) {
489          s->Code->inst[loop->Brks[loop->BrkCount]].inst3 = R500_FC_JUMP_ADDR(newip + 1);
490       }
491 
492       /* Set jump address for CONT instructions. */
493       while (loop->ContCount--) {
494          s->Code->inst[loop->Conts[loop->ContCount]].inst3 = R500_FC_JUMP_ADDR(newip);
495       }
496       s->CurrentLoopDepth--;
497       break;
498    }
499    case RC_OPCODE_IF:
500       if (s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) {
501          rc_error(s->C, "Branch depth exceeds hardware limit");
502          return;
503       }
504       memory_pool_array_reserve(&s->C->Pool, struct branch_info, s->Branches, s->CurrentBranchDepth,
505                                 s->BranchesReserved, 1);
506 
507       branch = &s->Branches[s->CurrentBranchDepth++];
508       branch->If = newip;
509       branch->Else = -1;
510       branch->Endif = -1;
511 
512       if (s->CurrentBranchDepth > s->MaxBranchDepth)
513          s->MaxBranchDepth = s->CurrentBranchDepth;
514 
515       /* actual instruction is filled in at ENDIF time */
516       break;
517 
518    case RC_OPCODE_ELSE:
519       if (!s->CurrentBranchDepth) {
520          rc_error(s->C, "%s: got ELSE outside a branch", __func__);
521          return;
522       }
523 
524       branch = &s->Branches[s->CurrentBranchDepth - 1];
525       branch->Else = newip;
526 
527       /* actual instruction is filled in at ENDIF time */
528       break;
529 
530    case RC_OPCODE_ENDIF:
531       if (!s->CurrentBranchDepth) {
532          rc_error(s->C, "%s: got ELSE outside a branch", __func__);
533          return;
534       }
535 
536       branch = &s->Branches[s->CurrentBranchDepth - 1];
537       branch->Endif = newip;
538 
539       s->Code->inst[branch->Endif].inst2 =
540          R500_FC_OP_JUMP | R500_FC_A_OP_NONE /* no address stack */
541          | R500_FC_JUMP_ANY                  /* docs says set this, but I don't understand why */
542          | R500_FC_B_OP0_DECR                /* decrement branch counter if stay */
543          | R500_FC_B_OP1_NONE                /* no branch counter if stay */
544          | R500_FC_B_POP_CNT(1);
545       s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
546       s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP | R500_FC_A_OP_NONE /* no address stack */
547                                         | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
548                                         | R500_FC_B_OP0_INCR /* increment branch counter if stay */
549                                         | R500_FC_IGNORE_UNCOVERED;
550 
551       if (branch->Else >= 0) {
552          /* increment branch counter also if jump */
553          s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
554          s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
555 
556          s->Code->inst[branch->Else].inst2 =
557             R500_FC_OP_JUMP | R500_FC_A_OP_NONE /* no address stack */
558             | R500_FC_B_ELSE                    /* all active pixels want to jump */
559             | R500_FC_B_OP0_NONE                /* no counter op if stay */
560             | R500_FC_B_OP1_DECR                /* decrement branch counter if jump */
561             | R500_FC_B_POP_CNT(1);
562          s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
563       } else {
564          /* don't touch branch counter on jump */
565          s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
566          s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
567       }
568 
569       s->CurrentBranchDepth--;
570       break;
571    default:
572       rc_error(s->C, "%s: unknown opcode %s\n", __func__,
573                rc_get_opcode_info(inst->U.I.Opcode)->Name);
574    }
575 }
576 
577 void
r500BuildFragmentProgramHwCode(struct radeon_compiler * c,void * user)578 r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
579 {
580    struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler *)c;
581    struct emit_state s;
582    struct r500_fragment_program_code *code = &compiler->code->code.r500;
583 
584    memset(&s, 0, sizeof(s));
585    s.C = &compiler->Base;
586    s.Code = code;
587 
588    memset(code, 0, sizeof(*code));
589    code->max_temp_idx = 1;
590    code->inst_end = -1;
591 
592    for (struct rc_instruction *inst = compiler->Base.Program.Instructions.Next;
593         inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; inst = inst->Next) {
594       if (inst->Type == RC_INSTRUCTION_NORMAL) {
595          const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
596 
597          if (opcode->IsFlowControl) {
598             emit_flowcontrol(&s, inst);
599          } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
600             continue;
601          } else {
602             emit_tex(compiler, &inst->U.I);
603          }
604       } else {
605          emit_paired(compiler, &inst->U.P);
606       }
607    }
608 
609    if (code->max_temp_idx >= compiler->Base.max_temp_regs)
610       rc_error(&compiler->Base, "Too many hardware temporaries used\n");
611 
612    if (compiler->Base.Error)
613       return;
614 
615    if (code->inst_end == -1 ||
616        (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
617       int ip;
618 
619       /* This may happen when dead-code elimination is disabled or
620        * when most of the fragment program logic is leading to a KIL */
621       if (code->inst_end >= compiler->Base.max_alu_insts - 1) {
622          rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
623          return;
624       }
625 
626       ip = ++code->inst_end;
627       code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
628    }
629 
630    /* Make sure TEX_SEM_WAIT is set on the last instruction */
631    code->inst[code->inst_end].inst0 |= R500_INST_TEX_SEM_WAIT;
632 
633    /* Enable full flow control mode if we are using loops or have if
634     * statements nested at least four deep. */
635    if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
636       if (code->max_temp_idx < 1)
637          code->max_temp_idx = 1;
638 
639       code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
640    }
641 }
642