1 /*
2 * Copyright (C) 2005 Ben Skeggs.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 /**
29 * \file
30 *
31 * Emit the r300_fragment_program_code that can be understood by the hardware.
32 * Input is a pre-transformed radeon_program.
33 *
34 * \author Ben Skeggs <darktama@iinet.net.au>
35 *
36 * \author Jerome Glisse <j.glisse@gmail.com>
37 */
38
39 #include "r300_fragprog.h"
40
41 #include "r300_reg.h"
42
43 #include "radeon_program_pair.h"
44 #include "r300_fragprog_swizzle.h"
45
46 #include "util/compiler.h"
47
48
49 struct r300_emit_state {
50 struct r300_fragment_program_compiler * compiler;
51
52 unsigned current_node : 2;
53 unsigned node_first_tex : 8;
54 unsigned node_first_alu : 8;
55 uint32_t node_flags;
56 };
57
58 #define PROG_CODE \
59 struct r300_fragment_program_compiler *c = emit->compiler; \
60 struct r300_fragment_program_code *code = &c->code->code.r300
61
62 #define error(fmt, args...) do { \
63 rc_error(&c->Base, "%s::%s(): " fmt "\n", \
64 __FILE__, __FUNCTION__, ##args); \
65 } while(0)
66
get_msbs_alu(unsigned int bits)67 static unsigned int get_msbs_alu(unsigned int bits)
68 {
69 return (bits >> 6) & 0x7;
70 }
71
72 /**
73 * @param lsbs The number of least significant bits
74 */
get_msbs_tex(unsigned int bits,unsigned int lsbs)75 static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs)
76 {
77 return (bits >> lsbs) & 0x15;
78 }
79
80 #define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask)
81
82 /**
83 * Mark a temporary register as used.
84 */
use_temporary(struct r300_fragment_program_code * code,unsigned int index)85 static void use_temporary(struct r300_fragment_program_code *code, unsigned int index)
86 {
87 if (index > code->pixsize)
88 code->pixsize = index;
89 }
90
use_source(struct r300_fragment_program_code * code,struct rc_pair_instruction_source src)91 static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src)
92 {
93 if (!src.Used)
94 return 0;
95
96 if (src.File == RC_FILE_CONSTANT) {
97 return src.Index | (1 << 5);
98 } else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
99 use_temporary(code, src.Index);
100 return src.Index & 0x1f;
101 }
102
103 return 0;
104 }
105
106
translate_rgb_opcode(struct r300_fragment_program_compiler * c,rc_opcode opcode)107 static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
108 {
109 switch(opcode) {
110 case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP;
111 case RC_OPCODE_CND: return R300_ALU_OUTC_CND;
112 case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3;
113 case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4;
114 case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC;
115 default:
116 error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
117 FALLTHROUGH;
118 case RC_OPCODE_NOP:
119 FALLTHROUGH;
120 case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD;
121 case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX;
122 case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN;
123 case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
124 }
125 }
126
translate_alpha_opcode(struct r300_fragment_program_compiler * c,rc_opcode opcode)127 static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
128 {
129 switch(opcode) {
130 case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP;
131 case RC_OPCODE_CND: return R300_ALU_OUTA_CND;
132 case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4;
133 case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4;
134 case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2;
135 case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC;
136 case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2;
137 default:
138 error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
139 FALLTHROUGH;
140 case RC_OPCODE_NOP:
141 FALLTHROUGH;
142 case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD;
143 case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX;
144 case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN;
145 case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP;
146 case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
147 }
148 }
149
150 /**
151 * Emit one paired ALU instruction.
152 */
emit_alu(struct r300_emit_state * emit,struct rc_pair_instruction * inst)153 static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst)
154 {
155 int ip;
156 int j;
157 PROG_CODE;
158
159 if (code->alu.length >= c->Base.max_alu_insts) {
160 error("Too many ALU instructions");
161 return 0;
162 }
163
164 ip = code->alu.length++;
165
166 code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
167 code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
168
169 for(j = 0; j < 3; ++j) {
170 /* Set the RGB address */
171 unsigned int src = use_source(code, inst->RGB.Src[j]);
172 unsigned int arg;
173 if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
174 code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j);
175
176 code->alu.inst[ip].rgb_addr |= src << (6*j);
177
178 /* Set the Alpha address */
179 src = use_source(code, inst->Alpha.Src[j]);
180 if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
181 code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j);
182
183 code->alu.inst[ip].alpha_addr |= src << (6*j);
184
185 arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
186 arg |= inst->RGB.Arg[j].Abs << 6;
187 arg |= inst->RGB.Arg[j].Negate << 5;
188 code->alu.inst[ip].rgb_inst |= arg << (7*j);
189
190 arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
191 arg |= inst->Alpha.Arg[j].Abs << 6;
192 arg |= inst->Alpha.Arg[j].Negate << 5;
193 code->alu.inst[ip].alpha_inst |= arg << (7*j);
194 }
195
196 /* Presubtract */
197 if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
198 switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
199 case RC_PRESUB_BIAS:
200 code->alu.inst[ip].rgb_inst |=
201 R300_ALU_SRCP_1_MINUS_2_SRC0;
202 break;
203 case RC_PRESUB_ADD:
204 code->alu.inst[ip].rgb_inst |=
205 R300_ALU_SRCP_SRC1_PLUS_SRC0;
206 break;
207 case RC_PRESUB_SUB:
208 code->alu.inst[ip].rgb_inst |=
209 R300_ALU_SRCP_SRC1_MINUS_SRC0;
210 break;
211 case RC_PRESUB_INV:
212 code->alu.inst[ip].rgb_inst |=
213 R300_ALU_SRCP_1_MINUS_SRC0;
214 break;
215 default:
216 break;
217 }
218 }
219
220 if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
221 switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
222 case RC_PRESUB_BIAS:
223 code->alu.inst[ip].alpha_inst |=
224 R300_ALU_SRCP_1_MINUS_2_SRC0;
225 break;
226 case RC_PRESUB_ADD:
227 code->alu.inst[ip].alpha_inst |=
228 R300_ALU_SRCP_SRC1_PLUS_SRC0;
229 break;
230 case RC_PRESUB_SUB:
231 code->alu.inst[ip].alpha_inst |=
232 R300_ALU_SRCP_SRC1_MINUS_SRC0;
233 break;
234 case RC_PRESUB_INV:
235 code->alu.inst[ip].alpha_inst |=
236 R300_ALU_SRCP_1_MINUS_SRC0;
237 break;
238 default:
239 break;
240 }
241 }
242
243 if (inst->RGB.Saturate)
244 code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
245 if (inst->Alpha.Saturate)
246 code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;
247
248 if (inst->RGB.WriteMask) {
249 use_temporary(code, inst->RGB.DestIndex);
250 if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS)
251 code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT;
252 code->alu.inst[ip].rgb_addr |=
253 ((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) |
254 (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
255 }
256 if (inst->RGB.OutputWriteMask) {
257 code->alu.inst[ip].rgb_addr |=
258 (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) |
259 R300_RGB_TARGET(inst->RGB.Target);
260 emit->node_flags |= R300_RGBA_OUT;
261 }
262
263 if (inst->Alpha.WriteMask) {
264 use_temporary(code, inst->Alpha.DestIndex);
265 if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS)
266 code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT;
267 code->alu.inst[ip].alpha_addr |=
268 ((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) |
269 R300_ALU_DSTA_REG;
270 }
271 if (inst->Alpha.OutputWriteMask) {
272 code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT |
273 R300_ALPHA_TARGET(inst->Alpha.Target);
274 emit->node_flags |= R300_RGBA_OUT;
275 }
276 if (inst->Alpha.DepthWriteMask) {
277 code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
278 emit->node_flags |= R300_W_OUT;
279 c->code->writes_depth = 1;
280 }
281 if (inst->Nop)
282 code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP;
283
284 /* Handle Output Modifier
285 * According to the r300 docs, there is no RC_OMOD_DISABLE for r300 */
286 if (inst->RGB.Omod) {
287 if (inst->RGB.Omod == RC_OMOD_DISABLE) {
288 rc_error(&c->Base, "RC_OMOD_DISABLE not supported");
289 }
290 code->alu.inst[ip].rgb_inst |=
291 (inst->RGB.Omod << R300_ALU_OUTC_MOD_SHIFT);
292 }
293 if (inst->Alpha.Omod) {
294 if (inst->Alpha.Omod == RC_OMOD_DISABLE) {
295 rc_error(&c->Base, "RC_OMOD_DISABLE not supported");
296 }
297 code->alu.inst[ip].alpha_inst |=
298 (inst->Alpha.Omod << R300_ALU_OUTC_MOD_SHIFT);
299 }
300 return 1;
301 }
302
303
304 /**
305 * Finish the current node without advancing to the next one.
306 */
finish_node(struct r300_emit_state * emit)307 static int finish_node(struct r300_emit_state * emit)
308 {
309 struct r300_fragment_program_compiler * c = emit->compiler;
310 struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
311 unsigned alu_offset;
312 unsigned alu_end;
313 unsigned tex_offset;
314 unsigned tex_end;
315
316 unsigned int alu_offset_msbs, alu_end_msbs;
317
318 if (code->alu.length == emit->node_first_alu) {
319 /* Generate a single NOP for this node */
320 struct rc_pair_instruction inst;
321 memset(&inst, 0, sizeof(inst));
322 if (!emit_alu(emit, &inst))
323 return 0;
324 }
325
326 alu_offset = emit->node_first_alu;
327 alu_end = code->alu.length - alu_offset - 1;
328 tex_offset = emit->node_first_tex;
329 tex_end = code->tex.length - tex_offset - 1;
330
331 if (code->tex.length == emit->node_first_tex) {
332 if (emit->current_node > 0) {
333 error("Node %i has no TEX instructions", emit->current_node);
334 return 0;
335 }
336
337 tex_end = 0;
338 } else {
339 if (emit->current_node == 0)
340 code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
341 }
342
343 /* Write the config register.
344 * Note: The order in which the words for each node are written
345 * is not correct here and needs to be fixed up once we're entirely
346 * done
347 *
348 * Also note that the register specification from AMD is slightly
349 * incorrect in its description of this register. */
350 code->code_addr[emit->current_node] =
351 ((alu_offset << R300_ALU_START_SHIFT)
352 & R300_ALU_START_MASK)
353 | ((alu_end << R300_ALU_SIZE_SHIFT)
354 & R300_ALU_SIZE_MASK)
355 | ((tex_offset << R300_TEX_START_SHIFT)
356 & R300_TEX_START_MASK)
357 | ((tex_end << R300_TEX_SIZE_SHIFT)
358 & R300_TEX_SIZE_MASK)
359 | emit->node_flags
360 | (get_msbs_tex(tex_offset, 5)
361 << R400_TEX_START_MSB_SHIFT)
362 | (get_msbs_tex(tex_end, 5)
363 << R400_TEX_SIZE_MSB_SHIFT)
364 ;
365
366 /* Write r400 extended instruction fields. These will be ignored on
367 * r300 cards. */
368 alu_offset_msbs = get_msbs_alu(alu_offset);
369 alu_end_msbs = get_msbs_alu(alu_end);
370 switch(emit->current_node) {
371 case 0:
372 code->r400_code_offset_ext |=
373 alu_offset_msbs << R400_ALU_START3_MSB_SHIFT
374 | alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT;
375 break;
376 case 1:
377 code->r400_code_offset_ext |=
378 alu_offset_msbs << R400_ALU_START2_MSB_SHIFT
379 | alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT;
380 break;
381 case 2:
382 code->r400_code_offset_ext |=
383 alu_offset_msbs << R400_ALU_START1_MSB_SHIFT
384 | alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT;
385 break;
386 case 3:
387 code->r400_code_offset_ext |=
388 alu_offset_msbs << R400_ALU_START0_MSB_SHIFT
389 | alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT;
390 break;
391 }
392 return 1;
393 }
394
395
396 /**
397 * Begin a block of texture instructions.
398 * Create the necessary indirection.
399 */
begin_tex(struct r300_emit_state * emit)400 static int begin_tex(struct r300_emit_state * emit)
401 {
402 PROG_CODE;
403
404 if (code->alu.length == emit->node_first_alu &&
405 code->tex.length == emit->node_first_tex) {
406 return 1;
407 }
408
409 if (emit->current_node == 3) {
410 error("Too many texture indirections");
411 return 0;
412 }
413
414 if (!finish_node(emit))
415 return 0;
416
417 emit->current_node++;
418 emit->node_first_tex = code->tex.length;
419 emit->node_first_alu = code->alu.length;
420 emit->node_flags = 0;
421 return 1;
422 }
423
424
emit_tex(struct r300_emit_state * emit,struct rc_instruction * inst)425 static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
426 {
427 unsigned int unit;
428 unsigned int dest;
429 unsigned int opcode;
430 PROG_CODE;
431
432 if (code->tex.length >= emit->compiler->Base.max_tex_insts) {
433 error("Too many TEX instructions");
434 return 0;
435 }
436
437 unit = inst->U.I.TexSrcUnit;
438 dest = inst->U.I.DstReg.Index;
439
440 switch(inst->U.I.Opcode) {
441 case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
442 case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
443 case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
444 case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
445 default:
446 error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name);
447 return 0;
448 }
449
450 if (inst->U.I.Opcode == RC_OPCODE_KIL) {
451 unit = 0;
452 dest = 0;
453 } else {
454 use_temporary(code, dest);
455 }
456
457 use_temporary(code, inst->U.I.SrcReg[0].Index);
458
459 code->tex.inst[code->tex.length++] =
460 ((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT)
461 & R300_SRC_ADDR_MASK)
462 | ((dest << R300_DST_ADDR_SHIFT)
463 & R300_DST_ADDR_MASK)
464 | (unit << R300_TEX_ID_SHIFT)
465 | (opcode << R300_TEX_INST_SHIFT)
466 | (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ?
467 R400_SRC_ADDR_EXT_BIT : 0)
468 | (dest >= R300_PFS_NUM_TEMP_REGS ?
469 R400_DST_ADDR_EXT_BIT : 0)
470 ;
471 return 1;
472 }
473
474
475 /**
476 * Final compilation step: Turn the intermediate radeon_program into
477 * machine-readable instructions.
478 */
r300BuildFragmentProgramHwCode(struct radeon_compiler * c,void * user)479 void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
480 {
481 struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
482 struct r300_emit_state emit;
483 struct r300_fragment_program_code *code = &compiler->code->code.r300;
484 unsigned int tex_end;
485
486 memset(&emit, 0, sizeof(emit));
487 emit.compiler = compiler;
488
489 memset(code, 0, sizeof(struct r300_fragment_program_code));
490
491 for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
492 inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
493 inst = inst->Next) {
494 if (inst->Type == RC_INSTRUCTION_NORMAL) {
495 if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
496 begin_tex(&emit);
497 continue;
498 }
499
500 emit_tex(&emit, inst);
501 } else {
502 emit_alu(&emit, &inst->U.P);
503 }
504 }
505
506 if (code->pixsize >= compiler->Base.max_temp_regs)
507 rc_error(&compiler->Base, "Too many hardware temporaries used.\n");
508
509 if (compiler->Base.Error)
510 return;
511
512 /* Finish the program */
513 finish_node(&emit);
514
515 code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
516
517 /* Set r400 extended instruction fields. These values will be ignored
518 * on r300 cards. */
519 code->r400_code_offset_ext |=
520 (get_msbs_alu(0)
521 << R400_ALU_OFFSET_MSB_SHIFT)
522 | (get_msbs_alu(code->alu.length - 1)
523 << R400_ALU_SIZE_MSB_SHIFT);
524
525 tex_end = code->tex.length ? code->tex.length - 1 : 0;
526 code->code_offset =
527 ((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
528 & R300_PFS_CNTL_ALU_OFFSET_MASK)
529 | (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT)
530 & R300_PFS_CNTL_ALU_END_MASK)
531 | ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
532 & R300_PFS_CNTL_TEX_OFFSET_MASK)
533 | ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT)
534 & R300_PFS_CNTL_TEX_END_MASK)
535 | (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT)
536 | (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT)
537 ;
538
539 if (emit.current_node < 3) {
540 int shift = 3 - emit.current_node;
541 int i;
542 for(i = emit.current_node; i >= 0; --i)
543 code->code_addr[shift + i] = code->code_addr[i];
544 for(i = 0; i < shift; ++i)
545 code->code_addr[i] = 0;
546 }
547
548 if (code->pixsize >= R300_PFS_NUM_TEMP_REGS
549 || code->alu.length > R300_PFS_MAX_ALU_INST
550 || code->tex.length > R300_PFS_MAX_TEX_INST) {
551
552 code->r390_mode = 1;
553 }
554 }
555