• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2005 Ben Skeggs.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 /**
7  * \file
8  *
9  * Emit the r300_fragment_program_code that can be understood by the hardware.
10  * Input is a pre-transformed radeon_program.
11  *
12  * \author Ben Skeggs <darktama@iinet.net.au>
13  *
14  * \author Jerome Glisse <j.glisse@gmail.com>
15  */
16 
17 #include "r300_fragprog.h"
18 
19 #include "r300_reg.h"
20 
21 #include "r300_fragprog_swizzle.h"
22 #include "radeon_program_pair.h"
23 
24 #include "util/compiler.h"
25 
26 struct r300_emit_state {
27    struct r300_fragment_program_compiler *compiler;
28 
29    unsigned current_node : 2;
30    unsigned node_first_tex : 8;
31    unsigned node_first_alu : 8;
32    uint32_t node_flags;
33 };
34 
35 #define PROG_CODE                                                                                  \
36    struct r300_fragment_program_compiler *c = emit->compiler;                                      \
37    struct r300_fragment_program_code *code = &c->code->code.r300
38 
39 #define error(fmt, args...)                                                                        \
40    do {                                                                                            \
41       rc_error(&c->Base, "%s::%s(): " fmt "\n", __FILE__, __func__, ##args);                       \
42    } while (0)
43 
44 static unsigned int
get_msbs_alu(unsigned int bits)45 get_msbs_alu(unsigned int bits)
46 {
47    return (bits >> 6) & 0x7;
48 }
49 
50 /**
51  * @param lsbs The number of least significant bits
52  */
53 static unsigned int
get_msbs_tex(unsigned int bits,unsigned int lsbs)54 get_msbs_tex(unsigned int bits, unsigned int lsbs)
55 {
56    return (bits >> lsbs) & 0x15;
57 }
58 
59 #define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask)
60 
61 /**
62  * Mark a temporary register as used.
63  */
64 static void
use_temporary(struct r300_fragment_program_code * code,unsigned int index)65 use_temporary(struct r300_fragment_program_code *code, unsigned int index)
66 {
67    if (index > code->pixsize)
68       code->pixsize = index;
69 }
70 
71 static unsigned int
use_source(struct r300_fragment_program_code * code,struct rc_pair_instruction_source src)72 use_source(struct r300_fragment_program_code *code, struct rc_pair_instruction_source src)
73 {
74    if (!src.Used)
75       return 0;
76 
77    if (src.File == RC_FILE_CONSTANT) {
78       return src.Index | (1 << 5);
79    } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
80       use_temporary(code, src.Index);
81       return src.Index & 0x1f;
82    }
83 
84    return 0;
85 }
86 
87 static unsigned int
translate_rgb_opcode(struct r300_fragment_program_compiler * c,rc_opcode opcode)88 translate_rgb_opcode(struct r300_fragment_program_compiler *c, rc_opcode opcode)
89 {
90    switch (opcode) {
91    case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP;
92    case RC_OPCODE_CND: return R300_ALU_OUTC_CND;
93    case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3;
94    case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4;
95    case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC;
96    default:
97       error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
98       FALLTHROUGH;
99    case RC_OPCODE_NOP: FALLTHROUGH;
100    case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD;
101    case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX;
102    case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN;
103    case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
104    }
105 }
106 
107 static unsigned int
translate_alpha_opcode(struct r300_fragment_program_compiler * c,rc_opcode opcode)108 translate_alpha_opcode(struct r300_fragment_program_compiler *c, rc_opcode opcode)
109 {
110    switch (opcode) {
111    case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP;
112    case RC_OPCODE_CND: return R300_ALU_OUTA_CND;
113    case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4;
114    case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4;
115    case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2;
116    case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC;
117    case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2;
118    default:
119       error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
120       FALLTHROUGH;
121    case RC_OPCODE_NOP: FALLTHROUGH;
122    case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD;
123    case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX;
124    case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN;
125    case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP;
126    case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
127    }
128 }
129 
130 /**
131  * Emit one paired ALU instruction.
132  */
133 static int
emit_alu(struct r300_emit_state * emit,struct rc_pair_instruction * inst)134 emit_alu(struct r300_emit_state *emit, struct rc_pair_instruction *inst)
135 {
136    int ip;
137    int j;
138    PROG_CODE;
139 
140    if (code->alu.length >= c->Base.max_alu_insts) {
141       /* rc_recompute_ips does not give an exact count, because it counts extra stuff
142        * like BEGINTEX, but here it is intended to be only approximative anyway,
143        * just to give some idea how close to the limit we are. */
144       rc_error(&c->Base, "Too many ALU instructions used: %u, max: %u.\n",
145                rc_recompute_ips(&c->Base), c->Base.max_alu_insts);
146       return 0;
147    }
148 
149    ip = code->alu.length++;
150 
151    code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
152    code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
153 
154    for (j = 0; j < 3; ++j) {
155       /* Set the RGB address */
156       unsigned int src = use_source(code, inst->RGB.Src[j]);
157       unsigned int arg;
158       if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
159          code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j);
160 
161       code->alu.inst[ip].rgb_addr |= src << (6 * j);
162 
163       /* Set the Alpha address */
164       src = use_source(code, inst->Alpha.Src[j]);
165       if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
166          code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j);
167 
168       code->alu.inst[ip].alpha_addr |= src << (6 * j);
169 
170       arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
171       arg |= inst->RGB.Arg[j].Abs << 6;
172       arg |= inst->RGB.Arg[j].Negate << 5;
173       code->alu.inst[ip].rgb_inst |= arg << (7 * j);
174 
175       arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
176       arg |= inst->Alpha.Arg[j].Abs << 6;
177       arg |= inst->Alpha.Arg[j].Negate << 5;
178       code->alu.inst[ip].alpha_inst |= arg << (7 * j);
179    }
180 
181    /* Presubtract */
182    if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
183       switch (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
184       case RC_PRESUB_BIAS:
185          code->alu.inst[ip].rgb_inst |= R300_ALU_SRCP_1_MINUS_2_SRC0;
186          break;
187       case RC_PRESUB_ADD:
188          code->alu.inst[ip].rgb_inst |= R300_ALU_SRCP_SRC1_PLUS_SRC0;
189          break;
190       case RC_PRESUB_SUB:
191          code->alu.inst[ip].rgb_inst |= R300_ALU_SRCP_SRC1_MINUS_SRC0;
192          break;
193       case RC_PRESUB_INV:
194          code->alu.inst[ip].rgb_inst |= R300_ALU_SRCP_1_MINUS_SRC0;
195          break;
196       default:
197          break;
198       }
199    }
200 
201    if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
202       switch (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
203       case RC_PRESUB_BIAS:
204          code->alu.inst[ip].alpha_inst |= R300_ALU_SRCP_1_MINUS_2_SRC0;
205          break;
206       case RC_PRESUB_ADD:
207          code->alu.inst[ip].alpha_inst |= R300_ALU_SRCP_SRC1_PLUS_SRC0;
208          break;
209       case RC_PRESUB_SUB:
210          code->alu.inst[ip].alpha_inst |= R300_ALU_SRCP_SRC1_MINUS_SRC0;
211          break;
212       case RC_PRESUB_INV:
213          code->alu.inst[ip].alpha_inst |= R300_ALU_SRCP_1_MINUS_SRC0;
214          break;
215       default:
216          break;
217       }
218    }
219 
220    if (inst->RGB.Saturate)
221       code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
222    if (inst->Alpha.Saturate)
223       code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;
224 
225    if (inst->RGB.WriteMask) {
226       use_temporary(code, inst->RGB.DestIndex);
227       if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS)
228          code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT;
229       code->alu.inst[ip].rgb_addr |= ((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) |
230                                      (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
231    }
232    if (inst->RGB.OutputWriteMask) {
233       code->alu.inst[ip].rgb_addr |=
234          (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) |
235          R300_RGB_TARGET(inst->RGB.Target);
236       emit->node_flags |= R300_RGBA_OUT;
237    }
238 
239    if (inst->Alpha.WriteMask) {
240       use_temporary(code, inst->Alpha.DestIndex);
241       if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS)
242          code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT;
243       code->alu.inst[ip].alpha_addr |=
244          ((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) | R300_ALU_DSTA_REG;
245    }
246    if (inst->Alpha.OutputWriteMask) {
247       code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT | R300_ALPHA_TARGET(inst->Alpha.Target);
248       emit->node_flags |= R300_RGBA_OUT;
249    }
250    if (inst->Alpha.DepthWriteMask) {
251       code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
252       emit->node_flags |= R300_W_OUT;
253       c->code->writes_depth = 1;
254    }
255    if (inst->Nop)
256       code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP;
257 
258    /* Handle Output Modifier
259     * According to the r300 docs, there is no RC_OMOD_DISABLE for r300 */
260    if (inst->RGB.Omod) {
261       if (inst->RGB.Omod == RC_OMOD_DISABLE) {
262          rc_error(&c->Base, "RC_OMOD_DISABLE not supported");
263       }
264       code->alu.inst[ip].rgb_inst |= (inst->RGB.Omod << R300_ALU_OUTC_MOD_SHIFT);
265    }
266    if (inst->Alpha.Omod) {
267       if (inst->Alpha.Omod == RC_OMOD_DISABLE) {
268          rc_error(&c->Base, "RC_OMOD_DISABLE not supported");
269       }
270       code->alu.inst[ip].alpha_inst |= (inst->Alpha.Omod << R300_ALU_OUTC_MOD_SHIFT);
271    }
272    return 1;
273 }
274 
275 /**
276  * Finish the current node without advancing to the next one.
277  */
278 static int
finish_node(struct r300_emit_state * emit)279 finish_node(struct r300_emit_state *emit)
280 {
281    struct r300_fragment_program_compiler *c = emit->compiler;
282    struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
283    unsigned alu_offset;
284    unsigned alu_end;
285    unsigned tex_offset;
286    unsigned tex_end;
287 
288    unsigned int alu_offset_msbs, alu_end_msbs;
289 
290    if (code->alu.length == emit->node_first_alu) {
291       /* Generate a single NOP for this node */
292       struct rc_pair_instruction inst;
293       memset(&inst, 0, sizeof(inst));
294       if (!emit_alu(emit, &inst))
295          return 0;
296    }
297 
298    alu_offset = emit->node_first_alu;
299    alu_end = code->alu.length - alu_offset - 1;
300    tex_offset = emit->node_first_tex;
301    tex_end = code->tex.length - tex_offset - 1;
302 
303    if (code->tex.length == emit->node_first_tex) {
304       if (emit->current_node > 0) {
305          error("Node %i has no TEX instructions", emit->current_node);
306          return 0;
307       }
308 
309       tex_end = 0;
310    } else {
311       if (emit->current_node == 0)
312          code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
313    }
314 
315    /* Write the config register.
316     * Note: The order in which the words for each node are written
317     * is not correct here and needs to be fixed up once we're entirely
318     * done
319     *
320     * Also note that the register specification from AMD is slightly
321     * incorrect in its description of this register. */
322    code->code_addr[emit->current_node] =
323       ((alu_offset << R300_ALU_START_SHIFT) & R300_ALU_START_MASK) |
324       ((alu_end << R300_ALU_SIZE_SHIFT) & R300_ALU_SIZE_MASK) |
325       ((tex_offset << R300_TEX_START_SHIFT) & R300_TEX_START_MASK) |
326       ((tex_end << R300_TEX_SIZE_SHIFT) & R300_TEX_SIZE_MASK) | emit->node_flags |
327       (get_msbs_tex(tex_offset, 5) << R400_TEX_START_MSB_SHIFT) |
328       (get_msbs_tex(tex_end, 5) << R400_TEX_SIZE_MSB_SHIFT);
329 
330    /* Write r400 extended instruction fields.  These will be ignored on
331     * r300 cards.  */
332    alu_offset_msbs = get_msbs_alu(alu_offset);
333    alu_end_msbs = get_msbs_alu(alu_end);
334    switch (emit->current_node) {
335    case 0:
336       code->r400_code_offset_ext |=
337          alu_offset_msbs << R400_ALU_START3_MSB_SHIFT | alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT;
338       break;
339    case 1:
340       code->r400_code_offset_ext |=
341          alu_offset_msbs << R400_ALU_START2_MSB_SHIFT | alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT;
342       break;
343    case 2:
344       code->r400_code_offset_ext |=
345          alu_offset_msbs << R400_ALU_START1_MSB_SHIFT | alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT;
346       break;
347    case 3:
348       code->r400_code_offset_ext |=
349          alu_offset_msbs << R400_ALU_START0_MSB_SHIFT | alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT;
350       break;
351    }
352    return 1;
353 }
354 
355 /**
356  * Begin a block of texture instructions.
357  * Create the necessary indirection.
358  */
359 static int
begin_tex(struct r300_emit_state * emit)360 begin_tex(struct r300_emit_state *emit)
361 {
362    PROG_CODE;
363 
364    if (code->alu.length == emit->node_first_alu && code->tex.length == emit->node_first_tex) {
365       return 1;
366    }
367 
368    if (emit->current_node == 3) {
369       error("Too many texture indirections");
370       return 0;
371    }
372 
373    if (!finish_node(emit))
374       return 0;
375 
376    emit->current_node++;
377    emit->node_first_tex = code->tex.length;
378    emit->node_first_alu = code->alu.length;
379    emit->node_flags = 0;
380    return 1;
381 }
382 
383 static int
emit_tex(struct r300_emit_state * emit,struct rc_instruction * inst)384 emit_tex(struct r300_emit_state *emit, struct rc_instruction *inst)
385 {
386    unsigned int unit;
387    unsigned int dest;
388    unsigned int opcode;
389    PROG_CODE;
390 
391    if (code->tex.length >= emit->compiler->Base.max_tex_insts) {
392       error("Too many TEX instructions");
393       return 0;
394    }
395 
396    unit = inst->U.I.TexSrcUnit;
397    dest = inst->U.I.DstReg.Index;
398 
399    switch (inst->U.I.Opcode) {
400    case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
401    case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
402    case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
403    case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
404    default:
405       error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name);
406       return 0;
407    }
408 
409    if (inst->U.I.Opcode == RC_OPCODE_KIL) {
410       unit = 0;
411       dest = 0;
412    } else {
413       use_temporary(code, dest);
414    }
415 
416    use_temporary(code, inst->U.I.SrcReg[0].Index);
417 
418    code->tex.inst[code->tex.length++] =
419       ((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) & R300_SRC_ADDR_MASK) |
420       ((dest << R300_DST_ADDR_SHIFT) & R300_DST_ADDR_MASK) | (unit << R300_TEX_ID_SHIFT) |
421       (opcode << R300_TEX_INST_SHIFT) |
422       (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ? R400_SRC_ADDR_EXT_BIT : 0) |
423       (dest >= R300_PFS_NUM_TEMP_REGS ? R400_DST_ADDR_EXT_BIT : 0);
424    return 1;
425 }
426 
427 /**
428  * Final compilation step: Turn the intermediate radeon_program into
429  * machine-readable instructions.
430  */
431 void
r300BuildFragmentProgramHwCode(struct radeon_compiler * c,void * user)432 r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
433 {
434    struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler *)c;
435    struct r300_emit_state emit;
436    struct r300_fragment_program_code *code = &compiler->code->code.r300;
437    unsigned int tex_end;
438 
439    memset(&emit, 0, sizeof(emit));
440    emit.compiler = compiler;
441 
442    memset(code, 0, sizeof(struct r300_fragment_program_code));
443 
444    for (struct rc_instruction *inst = compiler->Base.Program.Instructions.Next;
445         inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; inst = inst->Next) {
446       if (inst->Type == RC_INSTRUCTION_NORMAL) {
447          if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
448             begin_tex(&emit);
449             continue;
450          }
451 
452          emit_tex(&emit, inst);
453       } else {
454          emit_alu(&emit, &inst->U.P);
455       }
456    }
457 
458    if (code->pixsize >= compiler->Base.max_temp_regs)
459       rc_error(&compiler->Base, "Too many hardware temporaries used.\n");
460 
461    if (compiler->Base.Error)
462       return;
463 
464    /* Finish the program */
465    finish_node(&emit);
466 
467    code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
468 
469    /* Set r400 extended instruction fields.  These values will be ignored
470     * on r300 cards. */
471    code->r400_code_offset_ext |= (get_msbs_alu(0) << R400_ALU_OFFSET_MSB_SHIFT) |
472                                  (get_msbs_alu(code->alu.length - 1) << R400_ALU_SIZE_MSB_SHIFT);
473 
474    tex_end = code->tex.length ? code->tex.length - 1 : 0;
475    code->code_offset =
476       ((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) & R300_PFS_CNTL_ALU_OFFSET_MASK) |
477       (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT) & R300_PFS_CNTL_ALU_END_MASK) |
478       ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) & R300_PFS_CNTL_TEX_OFFSET_MASK) |
479       ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT) & R300_PFS_CNTL_TEX_END_MASK) |
480       (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT) |
481       (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT);
482 
483    if (emit.current_node < 3) {
484       int shift = 3 - emit.current_node;
485       int i;
486       for (i = emit.current_node; i >= 0; --i)
487          code->code_addr[shift + i] = code->code_addr[i];
488       for (i = 0; i < shift; ++i)
489          code->code_addr[i] = 0;
490    }
491 
492    if (code->pixsize >= R300_PFS_NUM_TEMP_REGS || code->alu.length > R300_PFS_MAX_ALU_INST ||
493        code->tex.length > R300_PFS_MAX_TEX_INST) {
494 
495       code->r390_mode = 1;
496    }
497 }
498