1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 *
31 * Emit the r300_fragment_program_code that can be understood by the hardware.
32 * Input is a pre-transformed radeon_program.
33 *
34 * \author Ben Skeggs <darktama@iinet.net.au>
35 *
36 * \author Jerome Glisse <j.glisse@gmail.com>
37 */
38
39 #include "r300_fragprog.h"
40
41 #include "r300_reg.h"
42
43 #include "radeon_program_pair.h"
44 #include "r300_fragprog_swizzle.h"
45
46 #include "util/compiler.h"
47
48
49 struct r300_emit_state {
50 struct r300_fragment_program_compiler * compiler;
51
52 unsigned current_node : 2;
53 unsigned node_first_tex : 8;
54 unsigned node_first_alu : 8;
55 uint32_t node_flags;
56 };
57
58 #define PROG_CODE \
59 struct r300_fragment_program_compiler *c = emit->compiler; \
60 struct r300_fragment_program_code *code = &c->code->code.r300
61
62 #define error(fmt, args...) do { \
63 rc_error(&c->Base, "%s::%s(): " fmt "\n", \
64 __FILE__, __func__, ##args); \
65 } while(0)
66
get_msbs_alu(unsigned int bits)67 static unsigned int get_msbs_alu(unsigned int bits)
68 {
69 return (bits >> 6) & 0x7;
70 }
71
72 /**
73 * @param lsbs The number of least significant bits
74 */
get_msbs_tex(unsigned int bits,unsigned int lsbs)75 static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs)
76 {
77 return (bits >> lsbs) & 0x15;
78 }
79
80 #define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask)
81
82 /**
83 * Mark a temporary register as used.
84 */
use_temporary(struct r300_fragment_program_code * code,unsigned int index)85 static void use_temporary(struct r300_fragment_program_code *code, unsigned int index)
86 {
87 if (index > code->pixsize)
88 code->pixsize = index;
89 }
90
use_source(struct r300_fragment_program_code * code,struct rc_pair_instruction_source src)91 static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src)
92 {
93 if (!src.Used)
94 return 0;
95
96 if (src.File == RC_FILE_CONSTANT) {
97 return src.Index | (1 << 5);
98 } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
99 use_temporary(code, src.Index);
100 return src.Index & 0x1f;
101 }
102
103 return 0;
104 }
105
106
translate_rgb_opcode(struct r300_fragment_program_compiler * c,rc_opcode opcode)107 static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
108 {
109 switch(opcode) {
110 case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP;
111 case RC_OPCODE_CND: return R300_ALU_OUTC_CND;
112 case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3;
113 case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4;
114 case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC;
115 default:
116 error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
117 FALLTHROUGH;
118 case RC_OPCODE_NOP:
119 FALLTHROUGH;
120 case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD;
121 case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX;
122 case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN;
123 case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
124 }
125 }
126
translate_alpha_opcode(struct r300_fragment_program_compiler * c,rc_opcode opcode)127 static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
128 {
129 switch(opcode) {
130 case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP;
131 case RC_OPCODE_CND: return R300_ALU_OUTA_CND;
132 case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4;
133 case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4;
134 case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2;
135 case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC;
136 case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2;
137 default:
138 error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
139 FALLTHROUGH;
140 case RC_OPCODE_NOP:
141 FALLTHROUGH;
142 case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD;
143 case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX;
144 case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN;
145 case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP;
146 case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
147 }
148 }
149
150 /**
151 * Emit one paired ALU instruction.
152 */
emit_alu(struct r300_emit_state * emit,struct rc_pair_instruction * inst)153 static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst)
154 {
155 int ip;
156 int j;
157 PROG_CODE;
158
159 if (code->alu.length >= c->Base.max_alu_insts) {
160 /* rc_recompute_ips does not give an exact count, because it counts extra stuff
161 * like BEGINTEX, but here it is intended to be only approximative anyway,
162 * just to give some idea how close to the limit we are. */
163 rc_error(&c->Base, "Too many ALU instructions used: %u, max: %u.\n",
164 rc_recompute_ips(&c->Base), c->Base.max_alu_insts);
165 return 0;
166 }
167
168 ip = code->alu.length++;
169
170 code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
171 code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
172
173 for(j = 0; j < 3; ++j) {
174 /* Set the RGB address */
175 unsigned int src = use_source(code, inst->RGB.Src[j]);
176 unsigned int arg;
177 if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
178 code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j);
179
180 code->alu.inst[ip].rgb_addr |= src << (6*j);
181
182 /* Set the Alpha address */
183 src = use_source(code, inst->Alpha.Src[j]);
184 if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
185 code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j);
186
187 code->alu.inst[ip].alpha_addr |= src << (6*j);
188
189 arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
190 arg |= inst->RGB.Arg[j].Abs << 6;
191 arg |= inst->RGB.Arg[j].Negate << 5;
192 code->alu.inst[ip].rgb_inst |= arg << (7*j);
193
194 arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
195 arg |= inst->Alpha.Arg[j].Abs << 6;
196 arg |= inst->Alpha.Arg[j].Negate << 5;
197 code->alu.inst[ip].alpha_inst |= arg << (7*j);
198 }
199
200 /* Presubtract */
201 if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
202 switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
203 case RC_PRESUB_BIAS:
204 code->alu.inst[ip].rgb_inst |=
205 R300_ALU_SRCP_1_MINUS_2_SRC0;
206 break;
207 case RC_PRESUB_ADD:
208 code->alu.inst[ip].rgb_inst |=
209 R300_ALU_SRCP_SRC1_PLUS_SRC0;
210 break;
211 case RC_PRESUB_SUB:
212 code->alu.inst[ip].rgb_inst |=
213 R300_ALU_SRCP_SRC1_MINUS_SRC0;
214 break;
215 case RC_PRESUB_INV:
216 code->alu.inst[ip].rgb_inst |=
217 R300_ALU_SRCP_1_MINUS_SRC0;
218 break;
219 default:
220 break;
221 }
222 }
223
224 if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
225 switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
226 case RC_PRESUB_BIAS:
227 code->alu.inst[ip].alpha_inst |=
228 R300_ALU_SRCP_1_MINUS_2_SRC0;
229 break;
230 case RC_PRESUB_ADD:
231 code->alu.inst[ip].alpha_inst |=
232 R300_ALU_SRCP_SRC1_PLUS_SRC0;
233 break;
234 case RC_PRESUB_SUB:
235 code->alu.inst[ip].alpha_inst |=
236 R300_ALU_SRCP_SRC1_MINUS_SRC0;
237 break;
238 case RC_PRESUB_INV:
239 code->alu.inst[ip].alpha_inst |=
240 R300_ALU_SRCP_1_MINUS_SRC0;
241 break;
242 default:
243 break;
244 }
245 }
246
247 if (inst->RGB.Saturate)
248 code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
249 if (inst->Alpha.Saturate)
250 code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;
251
252 if (inst->RGB.WriteMask) {
253 use_temporary(code, inst->RGB.DestIndex);
254 if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS)
255 code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT;
256 code->alu.inst[ip].rgb_addr |=
257 ((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) |
258 (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
259 }
260 if (inst->RGB.OutputWriteMask) {
261 code->alu.inst[ip].rgb_addr |=
262 (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) |
263 R300_RGB_TARGET(inst->RGB.Target);
264 emit->node_flags |= R300_RGBA_OUT;
265 }
266
267 if (inst->Alpha.WriteMask) {
268 use_temporary(code, inst->Alpha.DestIndex);
269 if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS)
270 code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT;
271 code->alu.inst[ip].alpha_addr |=
272 ((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) |
273 R300_ALU_DSTA_REG;
274 }
275 if (inst->Alpha.OutputWriteMask) {
276 code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT |
277 R300_ALPHA_TARGET(inst->Alpha.Target);
278 emit->node_flags |= R300_RGBA_OUT;
279 }
280 if (inst->Alpha.DepthWriteMask) {
281 code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
282 emit->node_flags |= R300_W_OUT;
283 c->code->writes_depth = 1;
284 }
285 if (inst->Nop)
286 code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP;
287
288 /* Handle Output Modifier
289 * According to the r300 docs, there is no RC_OMOD_DISABLE for r300 */
290 if (inst->RGB.Omod) {
291 if (inst->RGB.Omod == RC_OMOD_DISABLE) {
292 rc_error(&c->Base, "RC_OMOD_DISABLE not supported");
293 }
294 code->alu.inst[ip].rgb_inst |=
295 (inst->RGB.Omod << R300_ALU_OUTC_MOD_SHIFT);
296 }
297 if (inst->Alpha.Omod) {
298 if (inst->Alpha.Omod == RC_OMOD_DISABLE) {
299 rc_error(&c->Base, "RC_OMOD_DISABLE not supported");
300 }
301 code->alu.inst[ip].alpha_inst |=
302 (inst->Alpha.Omod << R300_ALU_OUTC_MOD_SHIFT);
303 }
304 return 1;
305 }
306
307
308 /**
309 * Finish the current node without advancing to the next one.
310 */
finish_node(struct r300_emit_state * emit)311 static int finish_node(struct r300_emit_state * emit)
312 {
313 struct r300_fragment_program_compiler * c = emit->compiler;
314 struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
315 unsigned alu_offset;
316 unsigned alu_end;
317 unsigned tex_offset;
318 unsigned tex_end;
319
320 unsigned int alu_offset_msbs, alu_end_msbs;
321
322 if (code->alu.length == emit->node_first_alu) {
323 /* Generate a single NOP for this node */
324 struct rc_pair_instruction inst;
325 memset(&inst, 0, sizeof(inst));
326 if (!emit_alu(emit, &inst))
327 return 0;
328 }
329
330 alu_offset = emit->node_first_alu;
331 alu_end = code->alu.length - alu_offset - 1;
332 tex_offset = emit->node_first_tex;
333 tex_end = code->tex.length - tex_offset - 1;
334
335 if (code->tex.length == emit->node_first_tex) {
336 if (emit->current_node > 0) {
337 error("Node %i has no TEX instructions", emit->current_node);
338 return 0;
339 }
340
341 tex_end = 0;
342 } else {
343 if (emit->current_node == 0)
344 code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
345 }
346
347 /* Write the config register.
348 * Note: The order in which the words for each node are written
349 * is not correct here and needs to be fixed up once we're entirely
350 * done
351 *
352 * Also note that the register specification from AMD is slightly
353 * incorrect in its description of this register. */
354 code->code_addr[emit->current_node] =
355 ((alu_offset << R300_ALU_START_SHIFT)
356 & R300_ALU_START_MASK)
357 | ((alu_end << R300_ALU_SIZE_SHIFT)
358 & R300_ALU_SIZE_MASK)
359 | ((tex_offset << R300_TEX_START_SHIFT)
360 & R300_TEX_START_MASK)
361 | ((tex_end << R300_TEX_SIZE_SHIFT)
362 & R300_TEX_SIZE_MASK)
363 | emit->node_flags
364 | (get_msbs_tex(tex_offset, 5)
365 << R400_TEX_START_MSB_SHIFT)
366 | (get_msbs_tex(tex_end, 5)
367 << R400_TEX_SIZE_MSB_SHIFT)
368 ;
369
370 /* Write r400 extended instruction fields. These will be ignored on
371 * r300 cards. */
372 alu_offset_msbs = get_msbs_alu(alu_offset);
373 alu_end_msbs = get_msbs_alu(alu_end);
374 switch(emit->current_node) {
375 case 0:
376 code->r400_code_offset_ext |=
377 alu_offset_msbs << R400_ALU_START3_MSB_SHIFT
378 | alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT;
379 break;
380 case 1:
381 code->r400_code_offset_ext |=
382 alu_offset_msbs << R400_ALU_START2_MSB_SHIFT
383 | alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT;
384 break;
385 case 2:
386 code->r400_code_offset_ext |=
387 alu_offset_msbs << R400_ALU_START1_MSB_SHIFT
388 | alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT;
389 break;
390 case 3:
391 code->r400_code_offset_ext |=
392 alu_offset_msbs << R400_ALU_START0_MSB_SHIFT
393 | alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT;
394 break;
395 }
396 return 1;
397 }
398
399
400 /**
401 * Begin a block of texture instructions.
402 * Create the necessary indirection.
403 */
begin_tex(struct r300_emit_state * emit)404 static int begin_tex(struct r300_emit_state * emit)
405 {
406 PROG_CODE;
407
408 if (code->alu.length == emit->node_first_alu &&
409 code->tex.length == emit->node_first_tex) {
410 return 1;
411 }
412
413 if (emit->current_node == 3) {
414 error("Too many texture indirections");
415 return 0;
416 }
417
418 if (!finish_node(emit))
419 return 0;
420
421 emit->current_node++;
422 emit->node_first_tex = code->tex.length;
423 emit->node_first_alu = code->alu.length;
424 emit->node_flags = 0;
425 return 1;
426 }
427
428
emit_tex(struct r300_emit_state * emit,struct rc_instruction * inst)429 static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
430 {
431 unsigned int unit;
432 unsigned int dest;
433 unsigned int opcode;
434 PROG_CODE;
435
436 if (code->tex.length >= emit->compiler->Base.max_tex_insts) {
437 error("Too many TEX instructions");
438 return 0;
439 }
440
441 unit = inst->U.I.TexSrcUnit;
442 dest = inst->U.I.DstReg.Index;
443
444 switch(inst->U.I.Opcode) {
445 case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
446 case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
447 case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
448 case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
449 default:
450 error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name);
451 return 0;
452 }
453
454 if (inst->U.I.Opcode == RC_OPCODE_KIL) {
455 unit = 0;
456 dest = 0;
457 } else {
458 use_temporary(code, dest);
459 }
460
461 use_temporary(code, inst->U.I.SrcReg[0].Index);
462
463 code->tex.inst[code->tex.length++] =
464 ((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT)
465 & R300_SRC_ADDR_MASK)
466 | ((dest << R300_DST_ADDR_SHIFT)
467 & R300_DST_ADDR_MASK)
468 | (unit << R300_TEX_ID_SHIFT)
469 | (opcode << R300_TEX_INST_SHIFT)
470 | (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ?
471 R400_SRC_ADDR_EXT_BIT : 0)
472 | (dest >= R300_PFS_NUM_TEMP_REGS ?
473 R400_DST_ADDR_EXT_BIT : 0)
474 ;
475 return 1;
476 }
477
478
479 /**
480 * Final compilation step: Turn the intermediate radeon_program into
481 * machine-readable instructions.
482 */
r300BuildFragmentProgramHwCode(struct radeon_compiler * c,void * user)483 void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
484 {
485 struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
486 struct r300_emit_state emit;
487 struct r300_fragment_program_code *code = &compiler->code->code.r300;
488 unsigned int tex_end;
489
490 memset(&emit, 0, sizeof(emit));
491 emit.compiler = compiler;
492
493 memset(code, 0, sizeof(struct r300_fragment_program_code));
494
495 for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
496 inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
497 inst = inst->Next) {
498 if (inst->Type == RC_INSTRUCTION_NORMAL) {
499 if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
500 begin_tex(&emit);
501 continue;
502 }
503
504 emit_tex(&emit, inst);
505 } else {
506 emit_alu(&emit, &inst->U.P);
507 }
508 }
509
510 if (code->pixsize >= compiler->Base.max_temp_regs)
511 rc_error(&compiler->Base, "Too many hardware temporaries used.\n");
512
513 if (compiler->Base.Error)
514 return;
515
516 /* Finish the program */
517 finish_node(&emit);
518
519 code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
520
521 /* Set r400 extended instruction fields. These values will be ignored
522 * on r300 cards. */
523 code->r400_code_offset_ext |=
524 (get_msbs_alu(0)
525 << R400_ALU_OFFSET_MSB_SHIFT)
526 | (get_msbs_alu(code->alu.length - 1)
527 << R400_ALU_SIZE_MSB_SHIFT);
528
529 tex_end = code->tex.length ? code->tex.length - 1 : 0;
530 code->code_offset =
531 ((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
532 & R300_PFS_CNTL_ALU_OFFSET_MASK)
533 | (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT)
534 & R300_PFS_CNTL_ALU_END_MASK)
535 | ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
536 & R300_PFS_CNTL_TEX_OFFSET_MASK)
537 | ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT)
538 & R300_PFS_CNTL_TEX_END_MASK)
539 | (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT)
540 | (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT)
541 ;
542
543 if (emit.current_node < 3) {
544 int shift = 3 - emit.current_node;
545 int i;
546 for(i = emit.current_node; i >= 0; --i)
547 code->code_addr[shift + i] = code->code_addr[i];
548 for(i = 0; i < shift; ++i)
549 code->code_addr[i] = 0;
550 }
551
552 if (code->pixsize >= R300_PFS_NUM_TEMP_REGS
553 || code->alu.length > R300_PFS_MAX_ALU_INST
554 || code->tex.length > R300_PFS_MAX_TEX_INST) {
555
556 code->r390_mode = 1;
557 }
558 }
559