1 /**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <stdarg.h>
29
30 #include "i915_context.h"
31 #include "i915_debug.h"
32 #include "i915_debug_private.h"
33 #include "i915_fpc.h"
34 #include "i915_reg.h"
35
36 #include "pipe/p_shader_tokens.h"
37 #include "tgsi/tgsi_dump.h"
38 #include "tgsi/tgsi_from_mesa.h"
39 #include "tgsi/tgsi_info.h"
40 #include "tgsi/tgsi_parse.h"
41 #include "util/log.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "util/u_string.h"
45
46 #include "draw/draw_vertex.h"
47
48 #ifndef M_PI
49 #define M_PI 3.14159265358979323846
50 #endif
51
52 /**
53 * Simple pass-through fragment shader to use when we don't have
54 * a real shader (or it fails to compile for some reason).
55 */
56 static unsigned passthrough_program[] = {
57 _3DSTATE_PIXEL_SHADER_PROGRAM | ((1 * 3) - 1),
58 /* move to output color:
59 */
60 (A0_MOV | (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | A0_DEST_CHANNEL_ALL |
61 (REG_TYPE_R << A0_SRC0_TYPE_SHIFT) | (0 << A0_SRC0_NR_SHIFT)),
62 ((SRC_ONE << A1_SRC0_CHANNEL_X_SHIFT) |
63 (SRC_ZERO << A1_SRC0_CHANNEL_Y_SHIFT) |
64 (SRC_ZERO << A1_SRC0_CHANNEL_Z_SHIFT) |
65 (SRC_ONE << A1_SRC0_CHANNEL_W_SHIFT)),
66 0};
67
68 /**
69 * component-wise negation of ureg
70 */
71 static inline int
negate(int reg,int x,int y,int z,int w)72 negate(int reg, int x, int y, int z, int w)
73 {
74 /* Another neat thing about the UREG representation */
75 return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) |
76 ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) |
77 ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) |
78 ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT));
79 }
80
81 /**
82 * In the event of a translation failure, we'll generate a simple color
83 * pass-through program.
84 */
85 static void
i915_use_passthrough_shader(struct i915_fragment_shader * fs)86 i915_use_passthrough_shader(struct i915_fragment_shader *fs)
87 {
88 fs->program = (uint32_t *)MALLOC(sizeof(passthrough_program));
89 if (fs->program) {
90 memcpy(fs->program, passthrough_program, sizeof(passthrough_program));
91 fs->program_len = ARRAY_SIZE(passthrough_program);
92 }
93 fs->num_constants = 0;
94 }
95
96 void
i915_program_error(struct i915_fp_compile * p,const char * msg,...)97 i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
98 {
99 if (p->log_program_errors) {
100 va_list args;
101
102 va_start(args, msg);
103 mesa_loge_v(msg, args);
104 va_end(args);
105 }
106
107 p->error = 1;
108 }
109
110 static uint32_t
get_mapping(struct i915_fragment_shader * fs,enum tgsi_semantic semantic,int index)111 get_mapping(struct i915_fragment_shader *fs, enum tgsi_semantic semantic,
112 int index)
113 {
114 int i;
115 for (i = 0; i < I915_TEX_UNITS; i++) {
116 if (fs->texcoords[i].semantic == -1) {
117 fs->texcoords[i].semantic = semantic;
118 fs->texcoords[i].index = index;
119 return i;
120 }
121 if (fs->texcoords[i].semantic == semantic &&
122 fs->texcoords[i].index == index)
123 return i;
124 }
125 debug_printf("Exceeded max generics\n");
126 return 0;
127 }
128
129 /**
130 * Construct a ureg for the given source register. Will emit
131 * constants, apply swizzling and negation as needed.
132 */
133 static uint32_t
src_vector(struct i915_fp_compile * p,const struct i915_full_src_register * source,struct i915_fragment_shader * fs)134 src_vector(struct i915_fp_compile *p,
135 const struct i915_full_src_register *source,
136 struct i915_fragment_shader *fs)
137 {
138 uint32_t index = source->Register.Index;
139 uint32_t src = 0, sem_name, sem_ind;
140
141 switch (source->Register.File) {
142 case TGSI_FILE_TEMPORARY:
143 if (source->Register.Index >= I915_MAX_TEMPORARY) {
144 i915_program_error(p, "Exceeded max temporary reg");
145 return 0;
146 }
147 src = UREG(REG_TYPE_R, index);
148 break;
149 case TGSI_FILE_INPUT:
150 /* XXX: Packing COL1, FOGC into a single attribute works for
151 * texenv programs, but will fail for real fragment programs
152 * that use these attributes and expect them to be a full 4
153 * components wide. Could use a texcoord to pass these
154 * attributes if necessary, but that won't work in the general
155 * case.
156 *
157 * We also use a texture coordinate to pass wpos when possible.
158 */
159
160 sem_name = p->shader->info.input_semantic_name[index];
161 sem_ind = p->shader->info.input_semantic_index[index];
162
163 switch (sem_name) {
164 case TGSI_SEMANTIC_GENERIC:
165 case TGSI_SEMANTIC_TEXCOORD:
166 case TGSI_SEMANTIC_PCOORD:
167 case TGSI_SEMANTIC_POSITION: {
168 if (sem_name == TGSI_SEMANTIC_PCOORD)
169 fs->reads_pntc = true;
170
171 int real_tex_unit = get_mapping(fs, sem_name, sem_ind);
172 src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit,
173 D0_CHANNEL_ALL);
174 break;
175 }
176 case TGSI_SEMANTIC_COLOR:
177 if (sem_ind == 0) {
178 src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
179 } else {
180 /* secondary color */
181 assert(sem_ind == 1);
182 src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
183 src = swizzle(src, X, Y, Z, ONE);
184 }
185 break;
186 case TGSI_SEMANTIC_FOG:
187 src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
188 src = swizzle(src, W, W, W, W);
189 break;
190 case TGSI_SEMANTIC_FACE: {
191 /* for back/front faces */
192 int real_tex_unit = get_mapping(fs, sem_name, sem_ind);
193 src =
194 i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_X);
195 break;
196 }
197 default:
198 i915_program_error(p, "Bad source->Index");
199 return 0;
200 }
201 break;
202
203 case TGSI_FILE_IMMEDIATE: {
204 assert(index < p->num_immediates);
205
206 uint8_t swiz[4] = {
207 source->Register.SwizzleX,
208 source->Register.SwizzleY,
209 source->Register.SwizzleZ,
210 source->Register.SwizzleW
211 };
212
213 uint8_t neg[4] = {
214 source->Register.Negate,
215 source->Register.Negate,
216 source->Register.Negate,
217 source->Register.Negate
218 };
219
220 unsigned i;
221
222 for (i = 0; i < 4; i++) {
223 if (swiz[i] == TGSI_SWIZZLE_ZERO || swiz[i] == TGSI_SWIZZLE_ONE) {
224 continue;
225 } else if (p->immediates[index][swiz[i]] == 0.0) {
226 swiz[i] = TGSI_SWIZZLE_ZERO;
227 } else if (p->immediates[index][swiz[i]] == 1.0) {
228 swiz[i] = TGSI_SWIZZLE_ONE;
229 } else if (p->immediates[index][swiz[i]] == -1.0) {
230 swiz[i] = TGSI_SWIZZLE_ONE;
231 neg[i] ^= 1;
232 } else {
233 break;
234 }
235 }
236
237 if (i == 4) {
238 return negate(swizzle(UREG(REG_TYPE_R, 0),
239 swiz[0], swiz[1], swiz[2], swiz[3]),
240 neg[0], neg[1], neg[2], neg[3]);
241 }
242
243 index = p->immediates_map[index];
244 FALLTHROUGH;
245 }
246
247 case TGSI_FILE_CONSTANT:
248 src = UREG(REG_TYPE_CONST, index);
249 break;
250
251 default:
252 i915_program_error(p, "Bad source->File");
253 return 0;
254 }
255
256 src = swizzle(src, source->Register.SwizzleX, source->Register.SwizzleY,
257 source->Register.SwizzleZ, source->Register.SwizzleW);
258
259 /* No HW abs flag, so we have to max with the negation. */
260 if (source->Register.Absolute) {
261 uint32_t tmp = i915_get_utemp(p);
262 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src,
263 negate(src, 1, 1, 1, 1), 0);
264 src = tmp;
265 }
266
267 /* There's both negate-all-components and per-component negation.
268 * Try to handle both here.
269 */
270 {
271 int n = source->Register.Negate;
272 src = negate(src, n, n, n, n);
273 }
274
275 return src;
276 }
277
278 /**
279 * Construct a ureg for a destination register.
280 */
281 static uint32_t
get_result_vector(struct i915_fp_compile * p,const struct i915_full_dst_register * dest)282 get_result_vector(struct i915_fp_compile *p,
283 const struct i915_full_dst_register *dest)
284 {
285 switch (dest->Register.File) {
286 case TGSI_FILE_OUTPUT: {
287 uint32_t sem_name =
288 p->shader->info.output_semantic_name[dest->Register.Index];
289 switch (sem_name) {
290 case TGSI_SEMANTIC_POSITION:
291 return UREG(REG_TYPE_OD, 0);
292 case TGSI_SEMANTIC_COLOR:
293 return UREG(REG_TYPE_OC, 0);
294 default:
295 i915_program_error(p, "Bad inst->DstReg.Index/semantics");
296 return 0;
297 }
298 }
299 case TGSI_FILE_TEMPORARY:
300 return UREG(REG_TYPE_R, dest->Register.Index);
301 default:
302 i915_program_error(p, "Bad inst->DstReg.File");
303 return 0;
304 }
305 }
306
307 /**
308 * Compute flags for saturation and writemask.
309 */
310 static uint32_t
get_result_flags(const struct i915_full_instruction * inst)311 get_result_flags(const struct i915_full_instruction *inst)
312 {
313 const uint32_t writeMask = inst->Dst[0].Register.WriteMask;
314 uint32_t flags = 0x0;
315
316 if (inst->Instruction.Saturate)
317 flags |= A0_DEST_SATURATE;
318
319 if (writeMask & TGSI_WRITEMASK_X)
320 flags |= A0_DEST_CHANNEL_X;
321 if (writeMask & TGSI_WRITEMASK_Y)
322 flags |= A0_DEST_CHANNEL_Y;
323 if (writeMask & TGSI_WRITEMASK_Z)
324 flags |= A0_DEST_CHANNEL_Z;
325 if (writeMask & TGSI_WRITEMASK_W)
326 flags |= A0_DEST_CHANNEL_W;
327
328 return flags;
329 }
330
331 /**
332 * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token
333 */
334 static uint32_t
translate_tex_src_target(struct i915_fp_compile * p,uint32_t tex)335 translate_tex_src_target(struct i915_fp_compile *p, uint32_t tex)
336 {
337 switch (tex) {
338 case TGSI_TEXTURE_SHADOW1D:
339 FALLTHROUGH;
340 case TGSI_TEXTURE_1D:
341 return D0_SAMPLE_TYPE_2D;
342
343 case TGSI_TEXTURE_SHADOW2D:
344 FALLTHROUGH;
345 case TGSI_TEXTURE_2D:
346 return D0_SAMPLE_TYPE_2D;
347
348 case TGSI_TEXTURE_SHADOWRECT:
349 FALLTHROUGH;
350 case TGSI_TEXTURE_RECT:
351 return D0_SAMPLE_TYPE_2D;
352
353 case TGSI_TEXTURE_3D:
354 return D0_SAMPLE_TYPE_VOLUME;
355
356 case TGSI_TEXTURE_CUBE:
357 return D0_SAMPLE_TYPE_CUBE;
358
359 default:
360 i915_program_error(p, "TexSrc type");
361 return 0;
362 }
363 }
364
365 /**
366 * Return the number of coords needed to access a given TGSI_TEXTURE_*
367 */
368 uint32_t
i915_coord_mask(enum tgsi_opcode opcode,enum tgsi_texture_type tex)369 i915_coord_mask(enum tgsi_opcode opcode, enum tgsi_texture_type tex)
370 {
371 uint32_t coord_mask = 0;
372
373 if (opcode == TGSI_OPCODE_TXP || opcode == TGSI_OPCODE_TXB)
374 coord_mask |= TGSI_WRITEMASK_W;
375
376 switch (tex) {
377 case TGSI_TEXTURE_1D: /* See the 1D coord swizzle below. */
378 case TGSI_TEXTURE_2D:
379 case TGSI_TEXTURE_RECT:
380 return coord_mask | TGSI_WRITEMASK_XY;
381
382 case TGSI_TEXTURE_SHADOW1D:
383 case TGSI_TEXTURE_SHADOW2D:
384 case TGSI_TEXTURE_SHADOWRECT:
385 case TGSI_TEXTURE_3D:
386 case TGSI_TEXTURE_CUBE:
387 return coord_mask | TGSI_WRITEMASK_XYZ;
388
389 default:
390 unreachable("bad texture target");
391 }
392 }
393
394 /**
395 * Generate texel lookup instruction.
396 */
397 static void
emit_tex(struct i915_fp_compile * p,const struct i915_full_instruction * inst,uint32_t opcode,struct i915_fragment_shader * fs)398 emit_tex(struct i915_fp_compile *p, const struct i915_full_instruction *inst,
399 uint32_t opcode, struct i915_fragment_shader *fs)
400 {
401 uint32_t texture = inst->Texture.Texture;
402 uint32_t unit = inst->Src[1].Register.Index;
403 uint32_t tex = translate_tex_src_target(p, texture);
404 uint32_t sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex);
405 uint32_t coord = src_vector(p, &inst->Src[0], fs);
406
407 /* For 1D textures, set the Y coord to the same as X. Otherwise, we could
408 * select the wrong LOD based on the uninitialized Y coord when we sample our
409 * 1D textures as 2D.
410 */
411 if (texture == TGSI_TEXTURE_1D || texture == TGSI_TEXTURE_SHADOW1D)
412 coord = swizzle(coord, X, X, Z, W);
413
414 i915_emit_texld(p, get_result_vector(p, &inst->Dst[0]),
415 get_result_flags(inst), sampler, coord, opcode,
416 i915_coord_mask(inst->Instruction.Opcode, texture));
417 }
418
419 /**
420 * Generate a simple arithmetic instruction
421 * \param opcode the i915 opcode
422 * \param numArgs the number of input/src arguments
423 */
424 static void
emit_simple_arith(struct i915_fp_compile * p,const struct i915_full_instruction * inst,uint32_t opcode,uint32_t numArgs,struct i915_fragment_shader * fs)425 emit_simple_arith(struct i915_fp_compile *p,
426 const struct i915_full_instruction *inst, uint32_t opcode,
427 uint32_t numArgs, struct i915_fragment_shader *fs)
428 {
429 uint32_t arg1, arg2, arg3;
430
431 assert(numArgs <= 3);
432
433 arg1 = (numArgs < 1) ? 0 : src_vector(p, &inst->Src[0], fs);
434 arg2 = (numArgs < 2) ? 0 : src_vector(p, &inst->Src[1], fs);
435 arg3 = (numArgs < 3) ? 0 : src_vector(p, &inst->Src[2], fs);
436
437 i915_emit_arith(p, opcode, get_result_vector(p, &inst->Dst[0]),
438 get_result_flags(inst), 0, arg1, arg2, arg3);
439 }
440
441 /** As above, but swap the first two src regs */
442 static void
emit_simple_arith_swap2(struct i915_fp_compile * p,const struct i915_full_instruction * inst,uint32_t opcode,uint32_t numArgs,struct i915_fragment_shader * fs)443 emit_simple_arith_swap2(struct i915_fp_compile *p,
444 const struct i915_full_instruction *inst,
445 uint32_t opcode, uint32_t numArgs,
446 struct i915_fragment_shader *fs)
447 {
448 struct i915_full_instruction inst2;
449
450 assert(numArgs == 2);
451
452 /* transpose first two registers */
453 inst2 = *inst;
454 inst2.Src[0] = inst->Src[1];
455 inst2.Src[1] = inst->Src[0];
456
457 emit_simple_arith(p, &inst2, opcode, numArgs, fs);
458 }
459
460 /*
461 * Translate TGSI instruction to i915 instruction.
462 *
463 * Possible concerns:
464 *
465 * DDX, DDY -- return 0
466 * SIN, COS -- could use another taylor step?
467 * LIT -- results seem a little different to sw mesa
468 * LOG -- different to mesa on negative numbers, but this is conformant.
469 */
470 static void
i915_translate_instruction(struct i915_fp_compile * p,const struct i915_full_instruction * inst,struct i915_fragment_shader * fs)471 i915_translate_instruction(struct i915_fp_compile *p,
472 const struct i915_full_instruction *inst,
473 struct i915_fragment_shader *fs)
474 {
475 uint32_t src0, src1, src2, flags;
476 uint32_t tmp = 0;
477
478 switch (inst->Instruction.Opcode) {
479 case TGSI_OPCODE_ADD:
480 emit_simple_arith(p, inst, A0_ADD, 2, fs);
481 break;
482
483 case TGSI_OPCODE_CEIL:
484 src0 = src_vector(p, &inst->Src[0], fs);
485 tmp = i915_get_utemp(p);
486 flags = get_result_flags(inst);
487 i915_emit_arith(p, A0_FLR, tmp, flags & A0_DEST_CHANNEL_ALL, 0,
488 negate(src0, 1, 1, 1, 1), 0, 0);
489 i915_emit_arith(p, A0_MOV, get_result_vector(p, &inst->Dst[0]), flags, 0,
490 negate(tmp, 1, 1, 1, 1), 0, 0);
491 break;
492
493 case TGSI_OPCODE_CMP:
494 src0 = src_vector(p, &inst->Src[0], fs);
495 src1 = src_vector(p, &inst->Src[1], fs);
496 src2 = src_vector(p, &inst->Src[2], fs);
497 i915_emit_arith(p, A0_CMP, get_result_vector(p, &inst->Dst[0]),
498 get_result_flags(inst), 0, src0, src2,
499 src1); /* NOTE: order of src2, src1 */
500 break;
501
502 case TGSI_OPCODE_DDX:
503 case TGSI_OPCODE_DDY:
504 /* XXX We just output 0 here */
505 debug_printf("Punting DDX/DDY\n");
506 src0 = get_result_vector(p, &inst->Dst[0]);
507 i915_emit_arith(p, A0_MOV, get_result_vector(p, &inst->Dst[0]),
508 get_result_flags(inst), 0,
509 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0);
510 break;
511
512 case TGSI_OPCODE_DP2:
513 src0 = src_vector(p, &inst->Src[0], fs);
514 src1 = src_vector(p, &inst->Src[1], fs);
515
516 i915_emit_arith(p, A0_DP3, get_result_vector(p, &inst->Dst[0]),
517 get_result_flags(inst), 0,
518 swizzle(src0, X, Y, ZERO, ZERO), src1, 0);
519 break;
520
521 case TGSI_OPCODE_DP3:
522 emit_simple_arith(p, inst, A0_DP3, 2, fs);
523 break;
524
525 case TGSI_OPCODE_DP4:
526 emit_simple_arith(p, inst, A0_DP4, 2, fs);
527 break;
528
529 case TGSI_OPCODE_DST:
530 src0 = src_vector(p, &inst->Src[0], fs);
531 src1 = src_vector(p, &inst->Src[1], fs);
532
533 /* result[0] = 1 * 1;
534 * result[1] = a[1] * b[1];
535 * result[2] = a[2] * 1;
536 * result[3] = 1 * b[3];
537 */
538 i915_emit_arith(p, A0_MUL, get_result_vector(p, &inst->Dst[0]),
539 get_result_flags(inst), 0, swizzle(src0, ONE, Y, Z, ONE),
540 swizzle(src1, ONE, Y, ONE, W), 0);
541 break;
542
543 case TGSI_OPCODE_END:
544 /* no-op */
545 break;
546
547 case TGSI_OPCODE_EX2:
548 src0 = src_vector(p, &inst->Src[0], fs);
549
550 i915_emit_arith(p, A0_EXP, get_result_vector(p, &inst->Dst[0]),
551 get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
552 0);
553 break;
554
555 case TGSI_OPCODE_FLR:
556 emit_simple_arith(p, inst, A0_FLR, 1, fs);
557 break;
558
559 case TGSI_OPCODE_FRC:
560 emit_simple_arith(p, inst, A0_FRC, 1, fs);
561 break;
562
563 case TGSI_OPCODE_KILL_IF:
564 /* kill if src[0].x < 0 || src[0].y < 0 ... */
565 src0 = src_vector(p, &inst->Src[0], fs);
566 tmp = i915_get_utemp(p);
567
568 i915_emit_texld(p, tmp, /* dest reg: a dummy reg */
569 A0_DEST_CHANNEL_ALL, /* dest writemask */
570 0, /* sampler */
571 src0, /* coord*/
572 T0_TEXKILL, /* opcode */
573 TGSI_WRITEMASK_XYZW);/* coord_mask */
574 break;
575
576 case TGSI_OPCODE_KILL:
577 /* unconditional kill */
578 tmp = i915_get_utemp(p);
579
580 i915_emit_texld(p, tmp, /* dest reg: a dummy reg */
581 A0_DEST_CHANNEL_ALL, /* dest writemask */
582 0, /* sampler */
583 negate(swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE),
584 1, 1, 1, 1), /* coord */
585 T0_TEXKILL, /* opcode */
586 TGSI_WRITEMASK_X); /* coord_mask */
587 break;
588
589 case TGSI_OPCODE_LG2:
590 src0 = src_vector(p, &inst->Src[0], fs);
591
592 i915_emit_arith(p, A0_LOG, get_result_vector(p, &inst->Dst[0]),
593 get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
594 0);
595 break;
596
597 case TGSI_OPCODE_LIT:
598 src0 = src_vector(p, &inst->Src[0], fs);
599 tmp = i915_get_utemp(p);
600
601 /* tmp = max( a.xyzw, a.00zw )
602 * XXX: Clamp tmp.w to -128..128
603 * tmp.y = log(tmp.y)
604 * tmp.y = tmp.w * tmp.y
605 * tmp.y = exp(tmp.y)
606 * result = cmp (a.11-x1, a.1x01, a.1xy1 )
607 */
608 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
609 swizzle(src0, ZERO, ZERO, Z, W), 0);
610
611 i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
612 swizzle(tmp, Y, Y, Y, Y), 0, 0);
613
614 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
615 swizzle(tmp, ZERO, Y, ZERO, ZERO),
616 swizzle(tmp, ZERO, W, ZERO, ZERO), 0);
617
618 i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
619 swizzle(tmp, Y, Y, Y, Y), 0, 0);
620
621 i915_emit_arith(
622 p, A0_CMP, get_result_vector(p, &inst->Dst[0]), get_result_flags(inst),
623 0, negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
624 swizzle(tmp, ONE, X, ZERO, ONE), swizzle(tmp, ONE, X, Y, ONE));
625
626 break;
627
628 case TGSI_OPCODE_LRP:
629 src0 = src_vector(p, &inst->Src[0], fs);
630 src1 = src_vector(p, &inst->Src[1], fs);
631 src2 = src_vector(p, &inst->Src[2], fs);
632 flags = get_result_flags(inst);
633 tmp = i915_get_utemp(p);
634
635 /* b*a + c*(1-a)
636 *
637 * b*a + c - ca
638 *
639 * tmp = b*a + c,
640 * result = (-c)*a + tmp
641 */
642 i915_emit_arith(p, A0_MAD, tmp, flags & A0_DEST_CHANNEL_ALL, 0, src1,
643 src0, src2);
644
645 i915_emit_arith(p, A0_MAD, get_result_vector(p, &inst->Dst[0]), flags, 0,
646 negate(src2, 1, 1, 1, 1), src0, tmp);
647 break;
648
649 case TGSI_OPCODE_MAD:
650 emit_simple_arith(p, inst, A0_MAD, 3, fs);
651 break;
652
653 case TGSI_OPCODE_MAX:
654 emit_simple_arith(p, inst, A0_MAX, 2, fs);
655 break;
656
657 case TGSI_OPCODE_MIN:
658 emit_simple_arith(p, inst, A0_MIN, 2, fs);
659 break;
660
661 case TGSI_OPCODE_MOV:
662 emit_simple_arith(p, inst, A0_MOV, 1, fs);
663 break;
664
665 case TGSI_OPCODE_MUL:
666 emit_simple_arith(p, inst, A0_MUL, 2, fs);
667 break;
668
669 case TGSI_OPCODE_NOP:
670 break;
671
672 case TGSI_OPCODE_POW:
673 src0 = src_vector(p, &inst->Src[0], fs);
674 src1 = src_vector(p, &inst->Src[1], fs);
675 tmp = i915_get_utemp(p);
676 flags = get_result_flags(inst);
677
678 /* XXX: masking on intermediate values, here and elsewhere.
679 */
680 i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_X, 0,
681 swizzle(src0, X, X, X, X), 0, 0);
682
683 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);
684
685 i915_emit_arith(p, A0_EXP, get_result_vector(p, &inst->Dst[0]), flags, 0,
686 swizzle(tmp, X, X, X, X), 0, 0);
687 break;
688
689 case TGSI_OPCODE_RET:
690 /* XXX: no-op? */
691 break;
692
693 case TGSI_OPCODE_RCP:
694 src0 = src_vector(p, &inst->Src[0], fs);
695
696 i915_emit_arith(p, A0_RCP, get_result_vector(p, &inst->Dst[0]),
697 get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
698 0);
699 break;
700
701 case TGSI_OPCODE_RSQ:
702 src0 = src_vector(p, &inst->Src[0], fs);
703
704 i915_emit_arith(p, A0_RSQ, get_result_vector(p, &inst->Dst[0]),
705 get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
706 0);
707 break;
708
709 case TGSI_OPCODE_SEQ: {
710 const uint32_t zero = swizzle(UREG(REG_TYPE_R, 0),
711 SRC_ZERO, SRC_ZERO, SRC_ZERO, SRC_ZERO);
712
713 /* if we're both >= and <= then we're == */
714 src0 = src_vector(p, &inst->Src[0], fs);
715 src1 = src_vector(p, &inst->Src[1], fs);
716 tmp = i915_get_utemp(p);
717
718 if (src0 == zero || src1 == zero) {
719 if (src0 == zero)
720 src0 = src1;
721
722 /* x == 0 is equivalent to -abs(x) >= 0, but the latter requires only
723 * two instructions instead of three.
724 */
725 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
726 negate(src0, 1, 1, 1, 1), 0);
727 i915_emit_arith(p, A0_SGE, get_result_vector(p, &inst->Dst[0]),
728 get_result_flags(inst), 0,
729 negate(tmp, 1, 1, 1, 1), zero, 0);
730 } else {
731 i915_emit_arith(p, A0_SGE, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0);
732
733 i915_emit_arith(p, A0_SGE, get_result_vector(p, &inst->Dst[0]),
734 get_result_flags(inst), 0, src1, src0, 0);
735
736 i915_emit_arith(p, A0_MUL, get_result_vector(p, &inst->Dst[0]),
737 get_result_flags(inst), 0,
738 get_result_vector(p, &inst->Dst[0]), tmp, 0);
739 }
740
741 break;
742 }
743
744 case TGSI_OPCODE_SGE:
745 emit_simple_arith(p, inst, A0_SGE, 2, fs);
746 break;
747
748 case TGSI_OPCODE_SLE:
749 /* like SGE, but swap reg0, reg1 */
750 emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs);
751 break;
752
753 case TGSI_OPCODE_SLT:
754 emit_simple_arith(p, inst, A0_SLT, 2, fs);
755 break;
756
757 case TGSI_OPCODE_SGT:
758 /* like SLT, but swap reg0, reg1 */
759 emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs);
760 break;
761
762 case TGSI_OPCODE_SNE: {
763 const uint32_t zero = swizzle(UREG(REG_TYPE_R, 0),
764 SRC_ZERO, SRC_ZERO, SRC_ZERO, SRC_ZERO);
765
766 /* if we're < or > then we're != */
767 src0 = src_vector(p, &inst->Src[0], fs);
768 src1 = src_vector(p, &inst->Src[1], fs);
769 tmp = i915_get_utemp(p);
770
771 if (src0 == zero || src1 == zero) {
772 if (src0 == zero)
773 src0 = src1;
774
775 /* x != 0 is equivalent to -abs(x) < 0, but the latter requires only
776 * two instructions instead of three.
777 */
778 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
779 negate(src0, 1, 1, 1, 1), 0);
780 i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]),
781 get_result_flags(inst), 0,
782 negate(tmp, 1, 1, 1, 1), zero, 0);
783 } else {
784 i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0);
785
786 i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]),
787 get_result_flags(inst), 0, src1, src0, 0);
788
789 i915_emit_arith(p, A0_ADD, get_result_vector(p, &inst->Dst[0]),
790 get_result_flags(inst), 0,
791 get_result_vector(p, &inst->Dst[0]), tmp, 0);
792 }
793 break;
794 }
795
796 case TGSI_OPCODE_SSG:
797 /* compute (src>0) - (src<0) */
798 src0 = src_vector(p, &inst->Src[0], fs);
799 tmp = i915_get_utemp(p);
800
801 i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
802 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0);
803
804 i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]),
805 get_result_flags(inst), 0,
806 swizzle(src0, ZERO, ZERO, ZERO, ZERO), src0, 0);
807
808 i915_emit_arith(
809 p, A0_ADD, get_result_vector(p, &inst->Dst[0]), get_result_flags(inst),
810 0, get_result_vector(p, &inst->Dst[0]), negate(tmp, 1, 1, 1, 1), 0);
811 break;
812
813 case TGSI_OPCODE_TEX:
814 emit_tex(p, inst, T0_TEXLD, fs);
815 break;
816
817 case TGSI_OPCODE_TRUNC:
818 emit_simple_arith(p, inst, A0_TRC, 1, fs);
819 break;
820
821 case TGSI_OPCODE_TXB:
822 emit_tex(p, inst, T0_TEXLDB, fs);
823 break;
824
825 case TGSI_OPCODE_TXP:
826 emit_tex(p, inst, T0_TEXLDP, fs);
827 break;
828
829 default:
830 i915_program_error(p, "bad opcode %s (%d)",
831 tgsi_get_opcode_name(inst->Instruction.Opcode),
832 inst->Instruction.Opcode);
833 return;
834 }
835
836 i915_release_utemps(p);
837 }
838
839 static void
i915_translate_token(struct i915_fp_compile * p,const union i915_full_token * token,struct i915_fragment_shader * fs)840 i915_translate_token(struct i915_fp_compile *p,
841 const union i915_full_token *token,
842 struct i915_fragment_shader *fs)
843 {
844 struct i915_fragment_shader *ifs = p->shader;
845 switch (token->Token.Type) {
846 case TGSI_TOKEN_TYPE_PROPERTY:
847 /* Ignore properties where we only support one value. */
848 assert(token->FullProperty.Property.PropertyName ==
849 TGSI_PROPERTY_FS_COORD_ORIGIN ||
850 token->FullProperty.Property.PropertyName ==
851 TGSI_PROPERTY_FS_COORD_PIXEL_CENTER ||
852 token->FullProperty.Property.PropertyName ==
853 TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS ||
854 token->FullProperty.Property.PropertyName ==
855 TGSI_PROPERTY_SEPARABLE_PROGRAM);
856 break;
857
858 case TGSI_TOKEN_TYPE_DECLARATION:
859 if (token->FullDeclaration.Declaration.File == TGSI_FILE_CONSTANT) {
860 if (token->FullDeclaration.Range.Last >= I915_MAX_CONSTANT) {
861 i915_program_error(p, "Exceeded %d max uniforms",
862 I915_MAX_CONSTANT);
863 } else {
864 uint32_t i;
865 for (i = token->FullDeclaration.Range.First;
866 i <= token->FullDeclaration.Range.Last; i++) {
867 ifs->constant_flags[i] = I915_CONSTFLAG_USER;
868 ifs->num_constants = MAX2(ifs->num_constants, i + 1);
869 }
870 }
871 } else if (token->FullDeclaration.Declaration.File ==
872 TGSI_FILE_TEMPORARY) {
873 if (token->FullDeclaration.Range.Last >= I915_MAX_TEMPORARY) {
874 i915_program_error(p, "Exceeded %d max TGSI temps",
875 I915_MAX_TEMPORARY);
876 } else {
877 uint32_t i;
878 for (i = token->FullDeclaration.Range.First;
879 i <= token->FullDeclaration.Range.Last; i++) {
880 /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */
881 p->temp_flag |= (1 << i); /* mark temp as used */
882 }
883 }
884 }
885 break;
886
887 case TGSI_TOKEN_TYPE_IMMEDIATE: {
888 const struct tgsi_full_immediate *imm = &token->FullImmediate;
889 const uint32_t pos = p->num_immediates++;
890 uint32_t j;
891 assert(imm->Immediate.NrTokens <= 4 + 1);
892 for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
893 p->immediates[pos][j] = imm->u[j].Float;
894 }
895 } break;
896
897 case TGSI_TOKEN_TYPE_INSTRUCTION:
898 if (p->first_instruction) {
899 /* resolve location of immediates */
900 uint32_t i, j;
901 for (i = 0; i < p->num_immediates; i++) {
902 /* find constant slot for this immediate */
903 for (j = 0; j < I915_MAX_CONSTANT; j++) {
904 if (ifs->constant_flags[j] == 0x0) {
905 memcpy(ifs->constants[j], p->immediates[i],
906 4 * sizeof(float));
907 /*printf("immediate %d maps to const %d\n", i, j);*/
908 ifs->constant_flags[j] = 0xf; /* all four comps used */
909 p->immediates_map[i] = j;
910 ifs->num_constants = MAX2(ifs->num_constants, j + 1);
911 break;
912 }
913 }
914 if (j == I915_MAX_CONSTANT) {
915 i915_program_error(p, "Exceeded %d max uniforms and immediates.",
916 I915_MAX_CONSTANT);
917 }
918 }
919
920 p->first_instruction = false;
921 }
922
923 i915_translate_instruction(p, &token->FullInstruction, fs);
924 break;
925
926 default:
927 assert(0);
928 }
929 }
930
931 /**
932 * Translate TGSI fragment shader into i915 hardware instructions.
933 * \param p the translation state
934 * \param tokens the TGSI token array
935 */
936 static void
i915_translate_instructions(struct i915_fp_compile * p,const struct i915_token_list * tokens,struct i915_fragment_shader * fs)937 i915_translate_instructions(struct i915_fp_compile *p,
938 const struct i915_token_list *tokens,
939 struct i915_fragment_shader *fs)
940 {
941 int i;
942 for (i = 0; i < tokens->NumTokens && !p->error; i++) {
943 i915_translate_token(p, &tokens->Tokens[i], fs);
944 }
945 }
946
947 static struct i915_fp_compile *
i915_init_compile(struct i915_context * i915,struct i915_fragment_shader * ifs)948 i915_init_compile(struct i915_context *i915, struct i915_fragment_shader *ifs)
949 {
950 struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile);
951 int i;
952
953 p->shader = ifs;
954
955 /* Put new constants at end of const buffer, growing downward.
956 * The problem is we don't know how many user-defined constants might
957 * be specified with pipe->set_constant_buffer().
958 * Should pre-scan the user's program to determine the highest-numbered
959 * constant referenced.
960 */
961 ifs->num_constants = 0;
962 memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags));
963
964 memset(&p->register_phases, 0, sizeof(p->register_phases));
965
966 for (i = 0; i < I915_TEX_UNITS; i++)
967 ifs->texcoords[i].semantic = -1;
968
969 p->log_program_errors = !i915->no_log_program_errors;
970
971 p->first_instruction = true;
972
973 p->nr_tex_indirect = 1; /* correct? */
974 p->nr_tex_insn = 0;
975 p->nr_alu_insn = 0;
976 p->nr_decl_insn = 0;
977
978 p->csr = p->program;
979 p->decl = p->declarations;
980 p->decl_s = 0;
981 p->decl_t = 0;
982 p->temp_flag = ~0x0U << I915_MAX_TEMPORARY;
983 p->utemp_flag = ~0x7;
984
985 /* initialize the first program word */
986 *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM;
987
988 return p;
989 }
990
991 /* Copy compile results to the fragment program struct and destroy the
992 * compilation context.
993 */
994 static void
i915_fini_compile(struct i915_context * i915,struct i915_fp_compile * p)995 i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
996 {
997 struct i915_fragment_shader *ifs = p->shader;
998 unsigned long program_size = (unsigned long)(p->csr - p->program);
999 unsigned long decl_size = (unsigned long)(p->decl - p->declarations);
1000
1001 if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT)
1002 debug_printf("Exceeded max nr indirect texture lookups\n");
1003
1004 if (p->nr_tex_insn > I915_MAX_TEX_INSN)
1005 i915_program_error(p, "Exceeded max TEX instructions");
1006
1007 if (p->nr_alu_insn > I915_MAX_ALU_INSN)
1008 i915_program_error(p, "Exceeded max ALU instructions");
1009
1010 if (p->nr_decl_insn > I915_MAX_DECL_INSN)
1011 i915_program_error(p, "Exceeded max DECL instructions");
1012
1013 /* hw doesn't seem to like empty frag programs (num_instructions == 1 is just
1014 * TGSI_END), even when the depth write fixup gets emitted below - maybe that
1015 * one is fishy, too?
1016 */
1017 if (ifs->info.num_instructions == 1)
1018 i915_program_error(p, "Empty fragment shader");
1019
1020 if (p->error) {
1021 p->NumNativeInstructions = 0;
1022 p->NumNativeAluInstructions = 0;
1023 p->NumNativeTexInstructions = 0;
1024 p->NumNativeTexIndirections = 0;
1025
1026 i915_use_passthrough_shader(ifs);
1027 } else {
1028 p->NumNativeInstructions =
1029 p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn;
1030 p->NumNativeAluInstructions = p->nr_alu_insn;
1031 p->NumNativeTexInstructions = p->nr_tex_insn;
1032 p->NumNativeTexIndirections = p->nr_tex_indirect;
1033
1034 /* patch in the program length */
1035 p->declarations[0] |= program_size + decl_size - 2;
1036
1037 /* Copy compilation results to fragment program struct:
1038 */
1039 assert(!ifs->program);
1040
1041 ifs->program_len = decl_size + program_size;
1042 ifs->program = (uint32_t *)MALLOC(ifs->program_len * sizeof(uint32_t));
1043 memcpy(ifs->program, p->declarations, decl_size * sizeof(uint32_t));
1044 memcpy(&ifs->program[decl_size], p->program,
1045 program_size * sizeof(uint32_t));
1046
1047 util_debug_message(
1048 &i915->debug, SHADER_INFO,
1049 "%s shader: %d inst, %d tex, %d tex_indirect, %d temps, %d const",
1050 _mesa_shader_stage_to_abbrev(MESA_SHADER_FRAGMENT), (int)program_size,
1051 p->nr_tex_insn, p->nr_tex_indirect,
1052 p->shader->info.file_max[TGSI_FILE_TEMPORARY] + 1,
1053 ifs->num_constants);
1054 }
1055
1056 /* Release the compilation struct:
1057 */
1058 FREE(p);
1059 }
1060
1061 /**
1062 * Rather than trying to intercept and jiggle depth writes during
1063 * emit, just move the value into its correct position at the end of
1064 * the program:
1065 */
1066 static void
i915_fixup_depth_write(struct i915_fp_compile * p)1067 i915_fixup_depth_write(struct i915_fp_compile *p)
1068 {
1069 for (int i = 0; i < p->shader->info.num_outputs; i++) {
1070 if (p->shader->info.output_semantic_name[i] != TGSI_SEMANTIC_POSITION)
1071 continue;
1072
1073 const uint32_t depth = UREG(REG_TYPE_OD, 0);
1074
1075 i915_emit_arith(p, A0_MOV, /* opcode */
1076 depth, /* dest reg */
1077 A0_DEST_CHANNEL_W, /* write mask */
1078 0, /* saturate? */
1079 swizzle(depth, X, Y, Z, Z), /* src0 */
1080 0, 0 /* src1, src2 */);
1081 }
1082 }
1083
1084 void
i915_translate_fragment_program(struct i915_context * i915,struct i915_fragment_shader * fs)1085 i915_translate_fragment_program(struct i915_context *i915,
1086 struct i915_fragment_shader *fs)
1087 {
1088 struct i915_fp_compile *p;
1089 const struct tgsi_token *tokens = fs->state.tokens;
1090 struct i915_token_list *i_tokens;
1091
1092 if (I915_DBG_ON(DBG_FS)) {
1093 mesa_logi("TGSI fragment shader:");
1094 tgsi_dump(tokens, 0);
1095 }
1096
1097 p = i915_init_compile(i915, fs);
1098
1099 i_tokens = i915_optimize(tokens);
1100 i915_translate_instructions(p, i_tokens, fs);
1101 i915_fixup_depth_write(p);
1102
1103 i915_fini_compile(i915, p);
1104 i915_optimize_free(i_tokens);
1105
1106 if (I915_DBG_ON(DBG_FS)) {
1107 mesa_logi("i915 fragment shader with %d constants%s", fs->num_constants,
1108 fs->num_constants ? ":" : "");
1109
1110 for (int i = 0; i < I915_MAX_CONSTANT; i++) {
1111 if (fs->constant_flags[i] &&
1112 fs->constant_flags[i] != I915_CONSTFLAG_USER) {
1113 mesa_logi("\t\tC[%d] = { %f, %f, %f, %f }", i, fs->constants[i][0],
1114 fs->constants[i][1], fs->constants[i][2],
1115 fs->constants[i][3]);
1116 }
1117 }
1118 i915_disassemble_program(fs->program, fs->program_len);
1119 }
1120 }
1121