1 /*
2 * Copyright 2005 Ben Skeggs.
3 * SPDX-License-Identifier: MIT
4 */
5
6 /**
7 * \file
8 *
9 * Emit the r300_fragment_program_code that can be understood by the hardware.
10 * Input is a pre-transformed radeon_program.
11 *
12 * \author Ben Skeggs <darktama@iinet.net.au>
13 *
14 * \author Jerome Glisse <j.glisse@gmail.com>
15 */
16
17 #include "r300_fragprog.h"
18
19 #include "r300_reg.h"
20
21 #include "r300_fragprog_swizzle.h"
22 #include "radeon_program_pair.h"
23
24 #include "util/compiler.h"
25
26 struct r300_emit_state {
27 struct r300_fragment_program_compiler *compiler;
28
29 unsigned current_node : 2;
30 unsigned node_first_tex : 8;
31 unsigned node_first_alu : 8;
32 uint32_t node_flags;
33 };
34
35 #define PROG_CODE \
36 struct r300_fragment_program_compiler *c = emit->compiler; \
37 struct r300_fragment_program_code *code = &c->code->code.r300
38
39 #define error(fmt, args...) \
40 do { \
41 rc_error(&c->Base, "%s::%s(): " fmt "\n", __FILE__, __func__, ##args); \
42 } while (0)
43
44 static unsigned int
get_msbs_alu(unsigned int bits)45 get_msbs_alu(unsigned int bits)
46 {
47 return (bits >> 6) & 0x7;
48 }
49
50 /**
51 * @param lsbs The number of least significant bits
52 */
53 static unsigned int
get_msbs_tex(unsigned int bits,unsigned int lsbs)54 get_msbs_tex(unsigned int bits, unsigned int lsbs)
55 {
56 return (bits >> lsbs) & 0x15;
57 }
58
59 #define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask)
60
61 /**
62 * Mark a temporary register as used.
63 */
64 static void
use_temporary(struct r300_fragment_program_code * code,unsigned int index)65 use_temporary(struct r300_fragment_program_code *code, unsigned int index)
66 {
67 if (index > code->pixsize)
68 code->pixsize = index;
69 }
70
71 static unsigned int
use_source(struct r300_fragment_program_code * code,struct rc_pair_instruction_source src)72 use_source(struct r300_fragment_program_code *code, struct rc_pair_instruction_source src)
73 {
74 if (!src.Used)
75 return 0;
76
77 if (src.File == RC_FILE_CONSTANT) {
78 return src.Index | (1 << 5);
79 } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
80 use_temporary(code, src.Index);
81 return src.Index & 0x1f;
82 }
83
84 return 0;
85 }
86
87 static unsigned int
translate_rgb_opcode(struct r300_fragment_program_compiler * c,rc_opcode opcode)88 translate_rgb_opcode(struct r300_fragment_program_compiler *c, rc_opcode opcode)
89 {
90 switch (opcode) {
91 case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP;
92 case RC_OPCODE_CND: return R300_ALU_OUTC_CND;
93 case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3;
94 case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4;
95 case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC;
96 default:
97 error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
98 FALLTHROUGH;
99 case RC_OPCODE_NOP: FALLTHROUGH;
100 case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD;
101 case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX;
102 case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN;
103 case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
104 }
105 }
106
107 static unsigned int
translate_alpha_opcode(struct r300_fragment_program_compiler * c,rc_opcode opcode)108 translate_alpha_opcode(struct r300_fragment_program_compiler *c, rc_opcode opcode)
109 {
110 switch (opcode) {
111 case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP;
112 case RC_OPCODE_CND: return R300_ALU_OUTA_CND;
113 case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4;
114 case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4;
115 case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2;
116 case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC;
117 case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2;
118 default:
119 error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
120 FALLTHROUGH;
121 case RC_OPCODE_NOP: FALLTHROUGH;
122 case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD;
123 case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX;
124 case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN;
125 case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP;
126 case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
127 }
128 }
129
130 /**
131 * Emit one paired ALU instruction.
132 */
133 static int
emit_alu(struct r300_emit_state * emit,struct rc_pair_instruction * inst)134 emit_alu(struct r300_emit_state *emit, struct rc_pair_instruction *inst)
135 {
136 int ip;
137 int j;
138 PROG_CODE;
139
140 if (code->alu.length >= c->Base.max_alu_insts) {
141 /* rc_recompute_ips does not give an exact count, because it counts extra stuff
142 * like BEGINTEX, but here it is intended to be only approximative anyway,
143 * just to give some idea how close to the limit we are. */
144 rc_error(&c->Base, "Too many ALU instructions used: %u, max: %u.\n",
145 rc_recompute_ips(&c->Base), c->Base.max_alu_insts);
146 return 0;
147 }
148
149 ip = code->alu.length++;
150
151 code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
152 code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
153
154 for (j = 0; j < 3; ++j) {
155 /* Set the RGB address */
156 unsigned int src = use_source(code, inst->RGB.Src[j]);
157 unsigned int arg;
158 if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
159 code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j);
160
161 code->alu.inst[ip].rgb_addr |= src << (6 * j);
162
163 /* Set the Alpha address */
164 src = use_source(code, inst->Alpha.Src[j]);
165 if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
166 code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j);
167
168 code->alu.inst[ip].alpha_addr |= src << (6 * j);
169
170 arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
171 arg |= inst->RGB.Arg[j].Abs << 6;
172 arg |= inst->RGB.Arg[j].Negate << 5;
173 code->alu.inst[ip].rgb_inst |= arg << (7 * j);
174
175 arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
176 arg |= inst->Alpha.Arg[j].Abs << 6;
177 arg |= inst->Alpha.Arg[j].Negate << 5;
178 code->alu.inst[ip].alpha_inst |= arg << (7 * j);
179 }
180
181 /* Presubtract */
182 if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
183 switch (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
184 case RC_PRESUB_BIAS:
185 code->alu.inst[ip].rgb_inst |= R300_ALU_SRCP_1_MINUS_2_SRC0;
186 break;
187 case RC_PRESUB_ADD:
188 code->alu.inst[ip].rgb_inst |= R300_ALU_SRCP_SRC1_PLUS_SRC0;
189 break;
190 case RC_PRESUB_SUB:
191 code->alu.inst[ip].rgb_inst |= R300_ALU_SRCP_SRC1_MINUS_SRC0;
192 break;
193 case RC_PRESUB_INV:
194 code->alu.inst[ip].rgb_inst |= R300_ALU_SRCP_1_MINUS_SRC0;
195 break;
196 default:
197 break;
198 }
199 }
200
201 if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
202 switch (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
203 case RC_PRESUB_BIAS:
204 code->alu.inst[ip].alpha_inst |= R300_ALU_SRCP_1_MINUS_2_SRC0;
205 break;
206 case RC_PRESUB_ADD:
207 code->alu.inst[ip].alpha_inst |= R300_ALU_SRCP_SRC1_PLUS_SRC0;
208 break;
209 case RC_PRESUB_SUB:
210 code->alu.inst[ip].alpha_inst |= R300_ALU_SRCP_SRC1_MINUS_SRC0;
211 break;
212 case RC_PRESUB_INV:
213 code->alu.inst[ip].alpha_inst |= R300_ALU_SRCP_1_MINUS_SRC0;
214 break;
215 default:
216 break;
217 }
218 }
219
220 if (inst->RGB.Saturate)
221 code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
222 if (inst->Alpha.Saturate)
223 code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;
224
225 if (inst->RGB.WriteMask) {
226 use_temporary(code, inst->RGB.DestIndex);
227 if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS)
228 code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT;
229 code->alu.inst[ip].rgb_addr |= ((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) |
230 (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
231 }
232 if (inst->RGB.OutputWriteMask) {
233 code->alu.inst[ip].rgb_addr |=
234 (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) |
235 R300_RGB_TARGET(inst->RGB.Target);
236 emit->node_flags |= R300_RGBA_OUT;
237 }
238
239 if (inst->Alpha.WriteMask) {
240 use_temporary(code, inst->Alpha.DestIndex);
241 if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS)
242 code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT;
243 code->alu.inst[ip].alpha_addr |=
244 ((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) | R300_ALU_DSTA_REG;
245 }
246 if (inst->Alpha.OutputWriteMask) {
247 code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT | R300_ALPHA_TARGET(inst->Alpha.Target);
248 emit->node_flags |= R300_RGBA_OUT;
249 }
250 if (inst->Alpha.DepthWriteMask) {
251 code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
252 emit->node_flags |= R300_W_OUT;
253 c->code->writes_depth = 1;
254 }
255 if (inst->Nop)
256 code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP;
257
258 /* Handle Output Modifier
259 * According to the r300 docs, there is no RC_OMOD_DISABLE for r300 */
260 if (inst->RGB.Omod) {
261 if (inst->RGB.Omod == RC_OMOD_DISABLE) {
262 rc_error(&c->Base, "RC_OMOD_DISABLE not supported");
263 }
264 code->alu.inst[ip].rgb_inst |= (inst->RGB.Omod << R300_ALU_OUTC_MOD_SHIFT);
265 }
266 if (inst->Alpha.Omod) {
267 if (inst->Alpha.Omod == RC_OMOD_DISABLE) {
268 rc_error(&c->Base, "RC_OMOD_DISABLE not supported");
269 }
270 code->alu.inst[ip].alpha_inst |= (inst->Alpha.Omod << R300_ALU_OUTC_MOD_SHIFT);
271 }
272 return 1;
273 }
274
275 /**
276 * Finish the current node without advancing to the next one.
277 */
278 static int
finish_node(struct r300_emit_state * emit)279 finish_node(struct r300_emit_state *emit)
280 {
281 struct r300_fragment_program_compiler *c = emit->compiler;
282 struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
283 unsigned alu_offset;
284 unsigned alu_end;
285 unsigned tex_offset;
286 unsigned tex_end;
287
288 unsigned int alu_offset_msbs, alu_end_msbs;
289
290 if (code->alu.length == emit->node_first_alu) {
291 /* Generate a single NOP for this node */
292 struct rc_pair_instruction inst;
293 memset(&inst, 0, sizeof(inst));
294 if (!emit_alu(emit, &inst))
295 return 0;
296 }
297
298 alu_offset = emit->node_first_alu;
299 alu_end = code->alu.length - alu_offset - 1;
300 tex_offset = emit->node_first_tex;
301 tex_end = code->tex.length - tex_offset - 1;
302
303 if (code->tex.length == emit->node_first_tex) {
304 if (emit->current_node > 0) {
305 error("Node %i has no TEX instructions", emit->current_node);
306 return 0;
307 }
308
309 tex_end = 0;
310 } else {
311 if (emit->current_node == 0)
312 code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
313 }
314
315 /* Write the config register.
316 * Note: The order in which the words for each node are written
317 * is not correct here and needs to be fixed up once we're entirely
318 * done
319 *
320 * Also note that the register specification from AMD is slightly
321 * incorrect in its description of this register. */
322 code->code_addr[emit->current_node] =
323 ((alu_offset << R300_ALU_START_SHIFT) & R300_ALU_START_MASK) |
324 ((alu_end << R300_ALU_SIZE_SHIFT) & R300_ALU_SIZE_MASK) |
325 ((tex_offset << R300_TEX_START_SHIFT) & R300_TEX_START_MASK) |
326 ((tex_end << R300_TEX_SIZE_SHIFT) & R300_TEX_SIZE_MASK) | emit->node_flags |
327 (get_msbs_tex(tex_offset, 5) << R400_TEX_START_MSB_SHIFT) |
328 (get_msbs_tex(tex_end, 5) << R400_TEX_SIZE_MSB_SHIFT);
329
330 /* Write r400 extended instruction fields. These will be ignored on
331 * r300 cards. */
332 alu_offset_msbs = get_msbs_alu(alu_offset);
333 alu_end_msbs = get_msbs_alu(alu_end);
334 switch (emit->current_node) {
335 case 0:
336 code->r400_code_offset_ext |=
337 alu_offset_msbs << R400_ALU_START3_MSB_SHIFT | alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT;
338 break;
339 case 1:
340 code->r400_code_offset_ext |=
341 alu_offset_msbs << R400_ALU_START2_MSB_SHIFT | alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT;
342 break;
343 case 2:
344 code->r400_code_offset_ext |=
345 alu_offset_msbs << R400_ALU_START1_MSB_SHIFT | alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT;
346 break;
347 case 3:
348 code->r400_code_offset_ext |=
349 alu_offset_msbs << R400_ALU_START0_MSB_SHIFT | alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT;
350 break;
351 }
352 return 1;
353 }
354
355 /**
356 * Begin a block of texture instructions.
357 * Create the necessary indirection.
358 */
359 static int
begin_tex(struct r300_emit_state * emit)360 begin_tex(struct r300_emit_state *emit)
361 {
362 PROG_CODE;
363
364 if (code->alu.length == emit->node_first_alu && code->tex.length == emit->node_first_tex) {
365 return 1;
366 }
367
368 if (emit->current_node == 3) {
369 error("Too many texture indirections");
370 return 0;
371 }
372
373 if (!finish_node(emit))
374 return 0;
375
376 emit->current_node++;
377 emit->node_first_tex = code->tex.length;
378 emit->node_first_alu = code->alu.length;
379 emit->node_flags = 0;
380 return 1;
381 }
382
383 static int
emit_tex(struct r300_emit_state * emit,struct rc_instruction * inst)384 emit_tex(struct r300_emit_state *emit, struct rc_instruction *inst)
385 {
386 unsigned int unit;
387 unsigned int dest;
388 unsigned int opcode;
389 PROG_CODE;
390
391 if (code->tex.length >= emit->compiler->Base.max_tex_insts) {
392 error("Too many TEX instructions");
393 return 0;
394 }
395
396 unit = inst->U.I.TexSrcUnit;
397 dest = inst->U.I.DstReg.Index;
398
399 switch (inst->U.I.Opcode) {
400 case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
401 case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
402 case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
403 case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
404 default:
405 error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name);
406 return 0;
407 }
408
409 if (inst->U.I.Opcode == RC_OPCODE_KIL) {
410 unit = 0;
411 dest = 0;
412 } else {
413 use_temporary(code, dest);
414 }
415
416 use_temporary(code, inst->U.I.SrcReg[0].Index);
417
418 code->tex.inst[code->tex.length++] =
419 ((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) & R300_SRC_ADDR_MASK) |
420 ((dest << R300_DST_ADDR_SHIFT) & R300_DST_ADDR_MASK) | (unit << R300_TEX_ID_SHIFT) |
421 (opcode << R300_TEX_INST_SHIFT) |
422 (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ? R400_SRC_ADDR_EXT_BIT : 0) |
423 (dest >= R300_PFS_NUM_TEMP_REGS ? R400_DST_ADDR_EXT_BIT : 0);
424 return 1;
425 }
426
427 /**
428 * Final compilation step: Turn the intermediate radeon_program into
429 * machine-readable instructions.
430 */
431 void
r300BuildFragmentProgramHwCode(struct radeon_compiler * c,void * user)432 r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
433 {
434 struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler *)c;
435 struct r300_emit_state emit;
436 struct r300_fragment_program_code *code = &compiler->code->code.r300;
437 unsigned int tex_end;
438
439 memset(&emit, 0, sizeof(emit));
440 emit.compiler = compiler;
441
442 memset(code, 0, sizeof(struct r300_fragment_program_code));
443
444 for (struct rc_instruction *inst = compiler->Base.Program.Instructions.Next;
445 inst != &compiler->Base.Program.Instructions && !compiler->Base.Error; inst = inst->Next) {
446 if (inst->Type == RC_INSTRUCTION_NORMAL) {
447 if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
448 begin_tex(&emit);
449 continue;
450 }
451
452 emit_tex(&emit, inst);
453 } else {
454 emit_alu(&emit, &inst->U.P);
455 }
456 }
457
458 if (code->pixsize >= compiler->Base.max_temp_regs)
459 rc_error(&compiler->Base, "Too many hardware temporaries used.\n");
460
461 if (compiler->Base.Error)
462 return;
463
464 /* Finish the program */
465 finish_node(&emit);
466
467 code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
468
469 /* Set r400 extended instruction fields. These values will be ignored
470 * on r300 cards. */
471 code->r400_code_offset_ext |= (get_msbs_alu(0) << R400_ALU_OFFSET_MSB_SHIFT) |
472 (get_msbs_alu(code->alu.length - 1) << R400_ALU_SIZE_MSB_SHIFT);
473
474 tex_end = code->tex.length ? code->tex.length - 1 : 0;
475 code->code_offset =
476 ((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) & R300_PFS_CNTL_ALU_OFFSET_MASK) |
477 (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT) & R300_PFS_CNTL_ALU_END_MASK) |
478 ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) & R300_PFS_CNTL_TEX_OFFSET_MASK) |
479 ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT) & R300_PFS_CNTL_TEX_END_MASK) |
480 (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT) |
481 (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT);
482
483 if (emit.current_node < 3) {
484 int shift = 3 - emit.current_node;
485 int i;
486 for (i = emit.current_node; i >= 0; --i)
487 code->code_addr[shift + i] = code->code_addr[i];
488 for (i = 0; i < shift; ++i)
489 code->code_addr[i] = 0;
490 }
491
492 if (code->pixsize >= R300_PFS_NUM_TEMP_REGS || code->alu.length > R300_PFS_MAX_ALU_INST ||
493 code->tex.length > R300_PFS_MAX_TEX_INST) {
494
495 code->r390_mode = 1;
496 }
497 }
498