1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
5 * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
6 *
7 * All Rights Reserved.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining
10 * a copy of this software and associated documentation files (the
11 * "Software"), to deal in the Software without restriction, including
12 * without limitation the rights to use, copy, modify, merge, publish,
13 * distribute, sublicense, and/or sell copies of the Software, and to
14 * permit persons to whom the Software is furnished to do so, subject to
15 * the following conditions:
16 *
17 * The above copyright notice and this permission notice (including the
18 * next paragraph) shall be included in all copies or substantial
19 * portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 *
29 */
30
31 /**
32 * \file
33 *
34 * \author Ben Skeggs <darktama@iinet.net.au>
35 *
36 * \author Jerome Glisse <j.glisse@gmail.com>
37 *
38 * \author Corbin Simpson <MostAwesomeDude@gmail.com>
39 *
40 */
41
42 #include "r500_fragprog.h"
43
44 #include "r300_reg.h"
45
46 #include "radeon_program_pair.h"
47
48 #include "util/compiler.h"
49
50 #define PROG_CODE \
51 struct r500_fragment_program_code *code = &c->code->code.r500
52
53 #define error(fmt, args...) do { \
54 rc_error(&c->Base, "%s::%s(): " fmt "\n", \
55 __FILE__, __func__, ##args); \
56 } while(0)
57
58
59 struct branch_info {
60 int If;
61 int Else;
62 int Endif;
63 };
64
65 struct r500_loop_info {
66 int BgnLoop;
67
68 int BranchDepth;
69 int * Brks;
70 int BrkCount;
71 int BrkReserved;
72
73 int * Conts;
74 int ContCount;
75 int ContReserved;
76 };
77
78 struct emit_state {
79 struct radeon_compiler * C;
80 struct r500_fragment_program_code * Code;
81
82 struct branch_info * Branches;
83 unsigned int CurrentBranchDepth;
84 unsigned int BranchesReserved;
85
86 struct r500_loop_info * Loops;
87 unsigned int CurrentLoopDepth;
88 unsigned int LoopsReserved;
89
90 unsigned int MaxBranchDepth;
91
92 };
93
translate_rgb_op(struct r300_fragment_program_compiler * c,rc_opcode opcode)94 static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
95 {
96 switch(opcode) {
97 case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
98 case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND;
99 case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
100 case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
101 case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
102 case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
103 case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
104 default:
105 error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
106 FALLTHROUGH;
107 case RC_OPCODE_NOP:
108 FALLTHROUGH;
109 case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
110 case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
111 case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
112 case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
113 }
114 }
115
translate_alpha_op(struct r300_fragment_program_compiler * c,rc_opcode opcode)116 static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
117 {
118 switch(opcode) {
119 case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
120 case RC_OPCODE_CND: return R500_ALPHA_OP_CND;
121 case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
122 case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
123 case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
124 case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
125 case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
126 case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
127 case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
128 case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
129 default:
130 error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
131 FALLTHROUGH;
132 case RC_OPCODE_NOP:
133 FALLTHROUGH;
134 case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
135 case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
136 case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
137 case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
138 case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
139 case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
140 }
141 }
142
fix_hw_swizzle(unsigned int swz)143 static unsigned int fix_hw_swizzle(unsigned int swz)
144 {
145 switch (swz) {
146 case RC_SWIZZLE_ZERO:
147 case RC_SWIZZLE_UNUSED:
148 swz = 4;
149 break;
150 case RC_SWIZZLE_HALF:
151 swz = 5;
152 break;
153 case RC_SWIZZLE_ONE:
154 swz = 6;
155 break;
156 }
157
158 return swz;
159 }
160
translate_arg_rgb(struct rc_pair_instruction * inst,int arg)161 static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
162 {
163 unsigned int t = inst->RGB.Arg[arg].Source;
164 int comp;
165 t |= inst->RGB.Arg[arg].Negate << 11;
166 t |= inst->RGB.Arg[arg].Abs << 12;
167
168 for(comp = 0; comp < 3; ++comp)
169 t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
170
171 return t;
172 }
173
translate_arg_alpha(struct rc_pair_instruction * inst,int i)174 static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
175 {
176 unsigned int t = inst->Alpha.Arg[i].Source;
177 t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2;
178 t |= inst->Alpha.Arg[i].Negate << 5;
179 t |= inst->Alpha.Arg[i].Abs << 6;
180 return t;
181 }
182
translate_alu_result_op(struct r300_fragment_program_compiler * c,rc_compare_func func)183 static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
184 {
185 switch(func) {
186 case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
187 case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
188 case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
189 case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
190 default:
191 rc_error(&c->Base, "%s: unsupported compare func %i\n", __func__, func);
192 return 0;
193 }
194 }
195
use_temporary(struct r500_fragment_program_code * code,unsigned int index)196 static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
197 {
198 if (index > code->max_temp_idx)
199 code->max_temp_idx = index;
200 }
201
use_source(struct r500_fragment_program_code * code,struct rc_pair_instruction_source src)202 static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
203 {
204 /* From docs:
205 * Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST.
206 * MSB = 1 << 7 */
207 if (!src.Used)
208 return 1 << 7;
209
210 if (src.File == RC_FILE_CONSTANT) {
211 return src.Index | R500_RGB_ADDR0_CONST;
212 } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
213 use_temporary(code, src.Index);
214 return src.Index;
215 } else if (src.File == RC_FILE_INLINE) {
216 return src.Index | (1 << 7);
217 }
218
219 return 0;
220 }
221
222 /**
223 * NOP the specified instruction if it is not a texture lookup.
224 */
alu_nop(struct r300_fragment_program_compiler * c,int ip)225 static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
226 {
227 PROG_CODE;
228
229 if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
230 code->inst[ip].inst0 |= R500_INST_NOP;
231 }
232 }
233
234 /**
235 * Emit a paired ALU instruction.
236 */
emit_paired(struct r300_fragment_program_compiler * c,struct rc_pair_instruction * inst)237 static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
238 {
239 int ip;
240 PROG_CODE;
241
242 if (code->inst_end >= c->Base.max_alu_insts-1) {
243 error("emit_alu: Too many instructions");
244 return;
245 }
246
247 ip = ++code->inst_end;
248
249 /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
250 if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
251 inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
252 if (ip > 0) {
253 alu_nop(c, ip - 1);
254 }
255 }
256
257 code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
258 code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
259
260 if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
261 code->inst[ip].inst0 = R500_INST_TYPE_OUT;
262 if (inst->WriteALUResult) {
263 error("Cannot write output and ALU result at the same time");
264 return;
265 }
266 } else {
267 code->inst[ip].inst0 = R500_INST_TYPE_ALU;
268 }
269 code->inst[ip].inst0 |= (inst->SemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
270
271 code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11);
272 code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0;
273 code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
274 if (inst->Nop) {
275 code->inst[ip].inst0 |= R500_INST_NOP;
276 }
277 if (inst->Alpha.DepthWriteMask) {
278 code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
279 c->code->writes_depth = 1;
280 }
281
282 code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
283 code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
284 if (inst->Alpha.WriteMask)
285 use_temporary(code, inst->Alpha.DestIndex);
286 if (inst->RGB.WriteMask)
287 use_temporary(code, inst->RGB.DestIndex);
288
289 if (inst->RGB.Saturate)
290 code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
291 if (inst->Alpha.Saturate)
292 code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
293
294 /* Set the presubtract operation. */
295 switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
296 case RC_PRESUB_BIAS:
297 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
298 break;
299 case RC_PRESUB_SUB:
300 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
301 break;
302 case RC_PRESUB_ADD:
303 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
304 break;
305 case RC_PRESUB_INV:
306 code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
307 break;
308 default:
309 break;
310 }
311 switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
312 case RC_PRESUB_BIAS:
313 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
314 break;
315 case RC_PRESUB_SUB:
316 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
317 break;
318 case RC_PRESUB_ADD:
319 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
320 break;
321 case RC_PRESUB_INV:
322 code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
323 break;
324 default:
325 break;
326 }
327
328 /* Set the output modifier */
329 code->inst[ip].inst3 |= inst->RGB.Omod << R500_ALU_RGB_OMOD_SHIFT;
330 code->inst[ip].inst4 |= inst->Alpha.Omod << R500_ALPHA_OMOD_SHIFT;
331
332 code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
333 code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
334 code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
335
336 code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
337 code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
338 code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
339
340 code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
341 code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
342 code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
343
344 code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
345 code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
346 code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
347
348 code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
349 code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
350
351 if (inst->WriteALUResult) {
352 code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
353
354 if (inst->WriteALUResult == RC_ALURESULT_X)
355 code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
356 else
357 code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
358
359 code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
360 }
361 }
362
translate_strq_swizzle(unsigned int swizzle)363 static unsigned int translate_strq_swizzle(unsigned int swizzle)
364 {
365 unsigned int swiz = 0;
366 int i;
367 for (i = 0; i < 4; i++)
368 swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
369 return swiz;
370 }
371
372 /**
373 * Emit a single TEX instruction
374 */
emit_tex(struct r300_fragment_program_compiler * c,struct rc_sub_instruction * inst)375 static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
376 {
377 int ip;
378 PROG_CODE;
379
380 if (code->inst_end >= c->Base.max_alu_insts-1) {
381 error("emit_tex: Too many instructions");
382 return 0;
383 }
384
385 ip = ++code->inst_end;
386
387 code->inst[ip].inst0 = R500_INST_TYPE_TEX
388 | (inst->DstReg.WriteMask << 11)
389 | (inst->TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
390 code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
391 | (inst->TexSemAcquire << R500_TEX_SEM_ACQUIRE_SHIFT);
392
393 if (inst->TexSrcTarget == RC_TEXTURE_RECT)
394 code->inst[ip].inst1 |= R500_TEX_UNSCALED;
395
396 switch (inst->Opcode) {
397 case RC_OPCODE_KIL:
398 code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
399 break;
400 case RC_OPCODE_TEX:
401 code->inst[ip].inst1 |= R500_TEX_INST_LD;
402 break;
403 case RC_OPCODE_TXB:
404 code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
405 break;
406 case RC_OPCODE_TXP:
407 code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
408 break;
409 case RC_OPCODE_TXD:
410 code->inst[ip].inst1 |= R500_TEX_INST_DXDY;
411 break;
412 case RC_OPCODE_TXL:
413 code->inst[ip].inst1 |= R500_TEX_INST_LOD;
414 break;
415 default:
416 error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
417 }
418
419 use_temporary(code, inst->SrcReg[0].Index);
420 if (inst->Opcode != RC_OPCODE_KIL)
421 use_temporary(code, inst->DstReg.Index);
422
423 code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
424 | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
425 | R500_TEX_DST_ADDR(inst->DstReg.Index)
426 | (GET_SWZ(inst->TexSwizzle, 0) << 24)
427 | (GET_SWZ(inst->TexSwizzle, 1) << 26)
428 | (GET_SWZ(inst->TexSwizzle, 2) << 28)
429 | (GET_SWZ(inst->TexSwizzle, 3) << 30)
430 ;
431
432 if (inst->Opcode == RC_OPCODE_TXD) {
433 use_temporary(code, inst->SrcReg[1].Index);
434 use_temporary(code, inst->SrcReg[2].Index);
435
436 /* DX and DY parameters are specified in a separate register. */
437 code->inst[ip].inst3 =
438 R500_DX_ADDR(inst->SrcReg[1].Index) |
439 (translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) |
440 R500_DY_ADDR(inst->SrcReg[2].Index) |
441 (translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24);
442 }
443
444 return 1;
445 }
446
emit_flowcontrol(struct emit_state * s,struct rc_instruction * inst)447 static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
448 {
449 unsigned int newip;
450
451 if (s->Code->inst_end >= s->C->max_alu_insts-1) {
452 rc_error(s->C, "emit_tex: Too many instructions");
453 return;
454 }
455
456 newip = ++s->Code->inst_end;
457
458 /* Currently all loops use the same integer constant to initialize
459 * the loop variables. */
460 if(!s->Code->int_constants[0]) {
461 s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
462 s->Code->int_constant_count = 1;
463 }
464 s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
465 s->Code->inst[newip].inst0 |= (inst->U.I.TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
466
467 switch(inst->U.I.Opcode){
468 struct branch_info * branch;
469 struct r500_loop_info * loop;
470 case RC_OPCODE_BGNLOOP:
471 memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info,
472 s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
473
474 loop = &s->Loops[s->CurrentLoopDepth++];
475 memset(loop, 0, sizeof(struct r500_loop_info));
476 loop->BranchDepth = s->CurrentBranchDepth;
477 loop->BgnLoop = newip;
478
479 s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
480 | R500_FC_JUMP_FUNC(0x00)
481 | R500_FC_IGNORE_UNCOVERED
482 ;
483 break;
484 case RC_OPCODE_BRK:
485 loop = &s->Loops[s->CurrentLoopDepth - 1];
486 memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
487 loop->BrkCount, loop->BrkReserved, 1);
488
489 loop->Brks[loop->BrkCount++] = newip;
490 s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
491 | R500_FC_JUMP_FUNC(0xff)
492 | R500_FC_B_OP1_DECR
493 | R500_FC_B_POP_CNT(
494 s->CurrentBranchDepth - loop->BranchDepth)
495 | R500_FC_IGNORE_UNCOVERED
496 ;
497 break;
498
499 case RC_OPCODE_CONT:
500 loop = &s->Loops[s->CurrentLoopDepth - 1];
501 memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,
502 loop->ContCount, loop->ContReserved, 1);
503 loop->Conts[loop->ContCount++] = newip;
504 s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE
505 | R500_FC_JUMP_FUNC(0xff)
506 | R500_FC_B_OP1_DECR
507 | R500_FC_B_POP_CNT(
508 s->CurrentBranchDepth - loop->BranchDepth)
509 | R500_FC_IGNORE_UNCOVERED
510 ;
511 break;
512
513 case RC_OPCODE_ENDLOOP:
514 {
515 loop = &s->Loops[s->CurrentLoopDepth - 1];
516 /* Emit ENDLOOP */
517 s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
518 | R500_FC_JUMP_FUNC(0xff)
519 | R500_FC_JUMP_ANY
520 | R500_FC_IGNORE_UNCOVERED
521 ;
522 /* The constant integer at index 0 is used by all loops. */
523 s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
524 | R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
525 ;
526
527 /* Set jump address and int constant for BGNLOOP */
528 s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
529 | R500_FC_JUMP_ADDR(newip)
530 ;
531
532 /* Set jump address for the BRK instructions. */
533 while(loop->BrkCount--) {
534 s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
535 R500_FC_JUMP_ADDR(newip + 1);
536 }
537
538 /* Set jump address for CONT instructions. */
539 while(loop->ContCount--) {
540 s->Code->inst[loop->Conts[loop->ContCount]].inst3 =
541 R500_FC_JUMP_ADDR(newip);
542 }
543 s->CurrentLoopDepth--;
544 break;
545 }
546 case RC_OPCODE_IF:
547 if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) {
548 rc_error(s->C, "Branch depth exceeds hardware limit");
549 return;
550 }
551 memory_pool_array_reserve(&s->C->Pool, struct branch_info,
552 s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
553
554 branch = &s->Branches[s->CurrentBranchDepth++];
555 branch->If = newip;
556 branch->Else = -1;
557 branch->Endif = -1;
558
559 if (s->CurrentBranchDepth > s->MaxBranchDepth)
560 s->MaxBranchDepth = s->CurrentBranchDepth;
561
562 /* actual instruction is filled in at ENDIF time */
563 break;
564
565 case RC_OPCODE_ELSE:
566 if (!s->CurrentBranchDepth) {
567 rc_error(s->C, "%s: got ELSE outside a branch", __func__);
568 return;
569 }
570
571 branch = &s->Branches[s->CurrentBranchDepth - 1];
572 branch->Else = newip;
573
574 /* actual instruction is filled in at ENDIF time */
575 break;
576
577 case RC_OPCODE_ENDIF:
578 if (!s->CurrentBranchDepth) {
579 rc_error(s->C, "%s: got ELSE outside a branch", __func__);
580 return;
581 }
582
583 branch = &s->Branches[s->CurrentBranchDepth - 1];
584 branch->Endif = newip;
585
586 s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
587 | R500_FC_A_OP_NONE /* no address stack */
588 | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
589 | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
590 | R500_FC_B_OP1_NONE /* no branch counter if stay */
591 | R500_FC_B_POP_CNT(1)
592 ;
593 s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
594 s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
595 | R500_FC_A_OP_NONE /* no address stack */
596 | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
597 | R500_FC_B_OP0_INCR /* increment branch counter if stay */
598 | R500_FC_IGNORE_UNCOVERED
599 ;
600
601 if (branch->Else >= 0) {
602 /* increment branch counter also if jump */
603 s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
604 s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
605
606 s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
607 | R500_FC_A_OP_NONE /* no address stack */
608 | R500_FC_B_ELSE /* all active pixels want to jump */
609 | R500_FC_B_OP0_NONE /* no counter op if stay */
610 | R500_FC_B_OP1_DECR /* decrement branch counter if jump */
611 | R500_FC_B_POP_CNT(1)
612 ;
613 s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
614 } else {
615 /* don't touch branch counter on jump */
616 s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
617 s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
618 }
619
620
621 s->CurrentBranchDepth--;
622 break;
623 default:
624 rc_error(s->C, "%s: unknown opcode %s\n", __func__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
625 }
626 }
627
r500BuildFragmentProgramHwCode(struct radeon_compiler * c,void * user)628 void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
629 {
630 struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
631 struct emit_state s;
632 struct r500_fragment_program_code *code = &compiler->code->code.r500;
633
634 memset(&s, 0, sizeof(s));
635 s.C = &compiler->Base;
636 s.Code = code;
637
638 memset(code, 0, sizeof(*code));
639 code->max_temp_idx = 1;
640 code->inst_end = -1;
641
642 for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
643 inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
644 inst = inst->Next) {
645 if (inst->Type == RC_INSTRUCTION_NORMAL) {
646 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
647
648 if (opcode->IsFlowControl) {
649 emit_flowcontrol(&s, inst);
650 } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
651 continue;
652 } else {
653 emit_tex(compiler, &inst->U.I);
654 }
655 } else {
656 emit_paired(compiler, &inst->U.P);
657 }
658 }
659
660 if (code->max_temp_idx >= compiler->Base.max_temp_regs)
661 rc_error(&compiler->Base, "Too many hardware temporaries used\n");
662
663 if (compiler->Base.Error)
664 return;
665
666 if (code->inst_end == -1 ||
667 (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
668 int ip;
669
670 /* This may happen when dead-code elimination is disabled or
671 * when most of the fragment program logic is leading to a KIL */
672 if (code->inst_end >= compiler->Base.max_alu_insts-1) {
673 rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
674 return;
675 }
676
677 ip = ++code->inst_end;
678 code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
679 }
680
681 /* Make sure TEX_SEM_WAIT is set on the last instruction */
682 code->inst[code->inst_end].inst0 |= R500_INST_TEX_SEM_WAIT;
683
684 /* Enable full flow control mode if we are using loops or have if
685 * statements nested at least four deep. */
686 if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
687 if (code->max_temp_idx < 1)
688 code->max_temp_idx = 1;
689
690 code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
691 }
692 }
693