• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2007 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 #include <stdarg.h>
29 
30 #include "i915_context.h"
31 #include "i915_debug.h"
32 #include "i915_debug_private.h"
33 #include "i915_fpc.h"
34 #include "i915_reg.h"
35 
36 #include "pipe/p_shader_tokens.h"
37 #include "tgsi/tgsi_dump.h"
38 #include "tgsi/tgsi_from_mesa.h"
39 #include "tgsi/tgsi_info.h"
40 #include "tgsi/tgsi_parse.h"
41 #include "util/log.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "util/u_string.h"
45 
46 #include "draw/draw_vertex.h"
47 
48 #ifndef M_PI
49 #define M_PI 3.14159265358979323846
50 #endif
51 
52 /**
53  * Simple pass-through fragment shader to use when we don't have
54  * a real shader (or it fails to compile for some reason).
55  */
56 static unsigned passthrough_program[] = {
57    _3DSTATE_PIXEL_SHADER_PROGRAM | ((1 * 3) - 1),
58    /* move to output color:
59     */
60    (A0_MOV | (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | A0_DEST_CHANNEL_ALL |
61     (REG_TYPE_R << A0_SRC0_TYPE_SHIFT) | (0 << A0_SRC0_NR_SHIFT)),
62    ((SRC_ONE << A1_SRC0_CHANNEL_X_SHIFT) |
63     (SRC_ZERO << A1_SRC0_CHANNEL_Y_SHIFT) |
64     (SRC_ZERO << A1_SRC0_CHANNEL_Z_SHIFT) |
65     (SRC_ONE << A1_SRC0_CHANNEL_W_SHIFT)),
66    0};
67 
68 /**
69  * component-wise negation of ureg
70  */
71 static inline int
negate(int reg,int x,int y,int z,int w)72 negate(int reg, int x, int y, int z, int w)
73 {
74    /* Another neat thing about the UREG representation */
75    return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) |
76                  ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) |
77                  ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) |
78                  ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT));
79 }
80 
81 /**
82  * In the event of a translation failure, we'll generate a simple color
83  * pass-through program.
84  */
85 static void
i915_use_passthrough_shader(struct i915_fragment_shader * fs)86 i915_use_passthrough_shader(struct i915_fragment_shader *fs)
87 {
88    fs->program = (uint32_t *)MALLOC(sizeof(passthrough_program));
89    if (fs->program) {
90       memcpy(fs->program, passthrough_program, sizeof(passthrough_program));
91       fs->program_len = ARRAY_SIZE(passthrough_program);
92    }
93    fs->num_constants = 0;
94 }
95 
96 void
i915_program_error(struct i915_fp_compile * p,const char * msg,...)97 i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
98 {
99    if (p->log_program_errors) {
100       va_list args;
101 
102       va_start(args, msg);
103       mesa_loge_v(msg, args);
104       va_end(args);
105    }
106 
107    p->error = 1;
108 }
109 
110 static uint32_t
get_mapping(struct i915_fragment_shader * fs,enum tgsi_semantic semantic,int index)111 get_mapping(struct i915_fragment_shader *fs, enum tgsi_semantic semantic,
112             int index)
113 {
114    int i;
115    for (i = 0; i < I915_TEX_UNITS; i++) {
116       if (fs->texcoords[i].semantic == -1) {
117          fs->texcoords[i].semantic = semantic;
118          fs->texcoords[i].index = index;
119          return i;
120       }
121       if (fs->texcoords[i].semantic == semantic &&
122           fs->texcoords[i].index == index)
123          return i;
124    }
125    debug_printf("Exceeded max generics\n");
126    return 0;
127 }
128 
129 /**
130  * Construct a ureg for the given source register.  Will emit
131  * constants, apply swizzling and negation as needed.
132  */
133 static uint32_t
src_vector(struct i915_fp_compile * p,const struct i915_full_src_register * source,struct i915_fragment_shader * fs)134 src_vector(struct i915_fp_compile *p,
135            const struct i915_full_src_register *source,
136            struct i915_fragment_shader *fs)
137 {
138    uint32_t index = source->Register.Index;
139    uint32_t src = 0, sem_name, sem_ind;
140 
141    switch (source->Register.File) {
142    case TGSI_FILE_TEMPORARY:
143       if (source->Register.Index >= I915_MAX_TEMPORARY) {
144          i915_program_error(p, "Exceeded max temporary reg");
145          return 0;
146       }
147       src = UREG(REG_TYPE_R, index);
148       break;
149    case TGSI_FILE_INPUT:
150       /* XXX: Packing COL1, FOGC into a single attribute works for
151        * texenv programs, but will fail for real fragment programs
152        * that use these attributes and expect them to be a full 4
153        * components wide.  Could use a texcoord to pass these
154        * attributes if necessary, but that won't work in the general
155        * case.
156        *
157        * We also use a texture coordinate to pass wpos when possible.
158        */
159 
160       sem_name = p->shader->info.input_semantic_name[index];
161       sem_ind = p->shader->info.input_semantic_index[index];
162 
163       switch (sem_name) {
164       case TGSI_SEMANTIC_GENERIC:
165       case TGSI_SEMANTIC_TEXCOORD:
166       case TGSI_SEMANTIC_PCOORD:
167       case TGSI_SEMANTIC_POSITION: {
168          if (sem_name == TGSI_SEMANTIC_PCOORD)
169             fs->reads_pntc = true;
170 
171          int real_tex_unit = get_mapping(fs, sem_name, sem_ind);
172          src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit,
173                               D0_CHANNEL_ALL);
174          break;
175       }
176       case TGSI_SEMANTIC_COLOR:
177          if (sem_ind == 0) {
178             src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
179          } else {
180             /* secondary color */
181             assert(sem_ind == 1);
182             src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
183             src = swizzle(src, X, Y, Z, ONE);
184          }
185          break;
186       case TGSI_SEMANTIC_FOG:
187          src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
188          src = swizzle(src, W, W, W, W);
189          break;
190       case TGSI_SEMANTIC_FACE: {
191          /* for back/front faces */
192          int real_tex_unit = get_mapping(fs, sem_name, sem_ind);
193          src =
194             i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_X);
195          break;
196       }
197       default:
198          i915_program_error(p, "Bad source->Index");
199          return 0;
200       }
201       break;
202 
203    case TGSI_FILE_IMMEDIATE: {
204       assert(index < p->num_immediates);
205 
206       uint8_t swiz[4] = {
207          source->Register.SwizzleX,
208          source->Register.SwizzleY,
209          source->Register.SwizzleZ,
210          source->Register.SwizzleW
211       };
212 
213       uint8_t neg[4] = {
214          source->Register.Negate,
215          source->Register.Negate,
216          source->Register.Negate,
217          source->Register.Negate
218       };
219 
220       unsigned i;
221 
222       for (i = 0; i < 4; i++) {
223          if (swiz[i] == TGSI_SWIZZLE_ZERO || swiz[i] == TGSI_SWIZZLE_ONE) {
224             continue;
225          } else if (p->immediates[index][swiz[i]] == 0.0) {
226             swiz[i] = TGSI_SWIZZLE_ZERO;
227          } else if (p->immediates[index][swiz[i]] == 1.0) {
228             swiz[i] = TGSI_SWIZZLE_ONE;
229          } else if (p->immediates[index][swiz[i]] == -1.0) {
230             swiz[i] = TGSI_SWIZZLE_ONE;
231             neg[i] ^= 1;
232          } else {
233             break;
234          }
235       }
236 
237       if (i == 4) {
238          return negate(swizzle(UREG(REG_TYPE_R, 0),
239                                swiz[0], swiz[1], swiz[2], swiz[3]),
240                        neg[0], neg[1], neg[2], neg[3]);
241       }
242 
243       index = p->immediates_map[index];
244       FALLTHROUGH;
245    }
246 
247    case TGSI_FILE_CONSTANT:
248       src = UREG(REG_TYPE_CONST, index);
249       break;
250 
251    default:
252       i915_program_error(p, "Bad source->File");
253       return 0;
254    }
255 
256    src = swizzle(src, source->Register.SwizzleX, source->Register.SwizzleY,
257                  source->Register.SwizzleZ, source->Register.SwizzleW);
258 
259    /* No HW abs flag, so we have to max with the negation. */
260    if (source->Register.Absolute) {
261       uint32_t tmp = i915_get_utemp(p);
262       i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src,
263                       negate(src, 1, 1, 1, 1), 0);
264       src = tmp;
265    }
266 
267    /* There's both negate-all-components and per-component negation.
268     * Try to handle both here.
269     */
270    {
271       int n = source->Register.Negate;
272       src = negate(src, n, n, n, n);
273    }
274 
275    return src;
276 }
277 
278 /**
279  * Construct a ureg for a destination register.
280  */
281 static uint32_t
get_result_vector(struct i915_fp_compile * p,const struct i915_full_dst_register * dest)282 get_result_vector(struct i915_fp_compile *p,
283                   const struct i915_full_dst_register *dest)
284 {
285    switch (dest->Register.File) {
286    case TGSI_FILE_OUTPUT: {
287       uint32_t sem_name =
288          p->shader->info.output_semantic_name[dest->Register.Index];
289       switch (sem_name) {
290       case TGSI_SEMANTIC_POSITION:
291          return UREG(REG_TYPE_OD, 0);
292       case TGSI_SEMANTIC_COLOR:
293          return UREG(REG_TYPE_OC, 0);
294       default:
295          i915_program_error(p, "Bad inst->DstReg.Index/semantics");
296          return 0;
297       }
298    }
299    case TGSI_FILE_TEMPORARY:
300       return UREG(REG_TYPE_R, dest->Register.Index);
301    default:
302       i915_program_error(p, "Bad inst->DstReg.File");
303       return 0;
304    }
305 }
306 
307 /**
308  * Compute flags for saturation and writemask.
309  */
310 static uint32_t
get_result_flags(const struct i915_full_instruction * inst)311 get_result_flags(const struct i915_full_instruction *inst)
312 {
313    const uint32_t writeMask = inst->Dst[0].Register.WriteMask;
314    uint32_t flags = 0x0;
315 
316    if (inst->Instruction.Saturate)
317       flags |= A0_DEST_SATURATE;
318 
319    if (writeMask & TGSI_WRITEMASK_X)
320       flags |= A0_DEST_CHANNEL_X;
321    if (writeMask & TGSI_WRITEMASK_Y)
322       flags |= A0_DEST_CHANNEL_Y;
323    if (writeMask & TGSI_WRITEMASK_Z)
324       flags |= A0_DEST_CHANNEL_Z;
325    if (writeMask & TGSI_WRITEMASK_W)
326       flags |= A0_DEST_CHANNEL_W;
327 
328    return flags;
329 }
330 
331 /**
332  * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token
333  */
334 static uint32_t
translate_tex_src_target(struct i915_fp_compile * p,uint32_t tex)335 translate_tex_src_target(struct i915_fp_compile *p, uint32_t tex)
336 {
337    switch (tex) {
338    case TGSI_TEXTURE_SHADOW1D:
339       FALLTHROUGH;
340    case TGSI_TEXTURE_1D:
341       return D0_SAMPLE_TYPE_2D;
342 
343    case TGSI_TEXTURE_SHADOW2D:
344       FALLTHROUGH;
345    case TGSI_TEXTURE_2D:
346       return D0_SAMPLE_TYPE_2D;
347 
348    case TGSI_TEXTURE_SHADOWRECT:
349       FALLTHROUGH;
350    case TGSI_TEXTURE_RECT:
351       return D0_SAMPLE_TYPE_2D;
352 
353    case TGSI_TEXTURE_3D:
354       return D0_SAMPLE_TYPE_VOLUME;
355 
356    case TGSI_TEXTURE_CUBE:
357       return D0_SAMPLE_TYPE_CUBE;
358 
359    default:
360       i915_program_error(p, "TexSrc type");
361       return 0;
362    }
363 }
364 
365 /**
366  * Return the number of coords needed to access a given TGSI_TEXTURE_*
367  */
368 uint32_t
i915_coord_mask(enum tgsi_opcode opcode,enum tgsi_texture_type tex)369 i915_coord_mask(enum tgsi_opcode opcode, enum tgsi_texture_type tex)
370 {
371    uint32_t coord_mask = 0;
372 
373    if (opcode == TGSI_OPCODE_TXP || opcode == TGSI_OPCODE_TXB)
374       coord_mask |= TGSI_WRITEMASK_W;
375 
376    switch (tex) {
377    case TGSI_TEXTURE_1D: /* See the 1D coord swizzle below. */
378    case TGSI_TEXTURE_2D:
379    case TGSI_TEXTURE_RECT:
380       return coord_mask | TGSI_WRITEMASK_XY;
381 
382    case TGSI_TEXTURE_SHADOW1D:
383    case TGSI_TEXTURE_SHADOW2D:
384    case TGSI_TEXTURE_SHADOWRECT:
385    case TGSI_TEXTURE_3D:
386    case TGSI_TEXTURE_CUBE:
387       return coord_mask | TGSI_WRITEMASK_XYZ;
388 
389    default:
390       unreachable("bad texture target");
391    }
392 }
393 
394 /**
395  * Generate texel lookup instruction.
396  */
397 static void
emit_tex(struct i915_fp_compile * p,const struct i915_full_instruction * inst,uint32_t opcode,struct i915_fragment_shader * fs)398 emit_tex(struct i915_fp_compile *p, const struct i915_full_instruction *inst,
399          uint32_t opcode, struct i915_fragment_shader *fs)
400 {
401    uint32_t texture = inst->Texture.Texture;
402    uint32_t unit = inst->Src[1].Register.Index;
403    uint32_t tex = translate_tex_src_target(p, texture);
404    uint32_t sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex);
405    uint32_t coord = src_vector(p, &inst->Src[0], fs);
406 
407    /* For 1D textures, set the Y coord to the same as X.  Otherwise, we could
408     * select the wrong LOD based on the uninitialized Y coord when we sample our
409     * 1D textures as 2D.
410     */
411    if (texture == TGSI_TEXTURE_1D || texture == TGSI_TEXTURE_SHADOW1D)
412       coord = swizzle(coord, X, X, Z, W);
413 
414    i915_emit_texld(p, get_result_vector(p, &inst->Dst[0]),
415                    get_result_flags(inst), sampler, coord, opcode,
416                    i915_coord_mask(inst->Instruction.Opcode, texture));
417 }
418 
419 /**
420  * Generate a simple arithmetic instruction
421  * \param opcode  the i915 opcode
422  * \param numArgs  the number of input/src arguments
423  */
424 static void
emit_simple_arith(struct i915_fp_compile * p,const struct i915_full_instruction * inst,uint32_t opcode,uint32_t numArgs,struct i915_fragment_shader * fs)425 emit_simple_arith(struct i915_fp_compile *p,
426                   const struct i915_full_instruction *inst, uint32_t opcode,
427                   uint32_t numArgs, struct i915_fragment_shader *fs)
428 {
429    uint32_t arg1, arg2, arg3;
430 
431    assert(numArgs <= 3);
432 
433    arg1 = (numArgs < 1) ? 0 : src_vector(p, &inst->Src[0], fs);
434    arg2 = (numArgs < 2) ? 0 : src_vector(p, &inst->Src[1], fs);
435    arg3 = (numArgs < 3) ? 0 : src_vector(p, &inst->Src[2], fs);
436 
437    i915_emit_arith(p, opcode, get_result_vector(p, &inst->Dst[0]),
438                    get_result_flags(inst), 0, arg1, arg2, arg3);
439 }
440 
441 /** As above, but swap the first two src regs */
442 static void
emit_simple_arith_swap2(struct i915_fp_compile * p,const struct i915_full_instruction * inst,uint32_t opcode,uint32_t numArgs,struct i915_fragment_shader * fs)443 emit_simple_arith_swap2(struct i915_fp_compile *p,
444                         const struct i915_full_instruction *inst,
445                         uint32_t opcode, uint32_t numArgs,
446                         struct i915_fragment_shader *fs)
447 {
448    struct i915_full_instruction inst2;
449 
450    assert(numArgs == 2);
451 
452    /* transpose first two registers */
453    inst2 = *inst;
454    inst2.Src[0] = inst->Src[1];
455    inst2.Src[1] = inst->Src[0];
456 
457    emit_simple_arith(p, &inst2, opcode, numArgs, fs);
458 }
459 
460 /*
461  * Translate TGSI instruction to i915 instruction.
462  *
463  * Possible concerns:
464  *
465  * DDX, DDY -- return 0
466  * SIN, COS -- could use another taylor step?
467  * LIT      -- results seem a little different to sw mesa
468  * LOG      -- different to mesa on negative numbers, but this is conformant.
469  */
470 static void
i915_translate_instruction(struct i915_fp_compile * p,const struct i915_full_instruction * inst,struct i915_fragment_shader * fs)471 i915_translate_instruction(struct i915_fp_compile *p,
472                            const struct i915_full_instruction *inst,
473                            struct i915_fragment_shader *fs)
474 {
475    uint32_t src0, src1, src2, flags;
476    uint32_t tmp = 0;
477 
478    switch (inst->Instruction.Opcode) {
479    case TGSI_OPCODE_ADD:
480       emit_simple_arith(p, inst, A0_ADD, 2, fs);
481       break;
482 
483    case TGSI_OPCODE_CEIL:
484       src0 = src_vector(p, &inst->Src[0], fs);
485       tmp = i915_get_utemp(p);
486       flags = get_result_flags(inst);
487       i915_emit_arith(p, A0_FLR, tmp, flags & A0_DEST_CHANNEL_ALL, 0,
488                       negate(src0, 1, 1, 1, 1), 0, 0);
489       i915_emit_arith(p, A0_MOV, get_result_vector(p, &inst->Dst[0]), flags, 0,
490                       negate(tmp, 1, 1, 1, 1), 0, 0);
491       break;
492 
493    case TGSI_OPCODE_CMP:
494       src0 = src_vector(p, &inst->Src[0], fs);
495       src1 = src_vector(p, &inst->Src[1], fs);
496       src2 = src_vector(p, &inst->Src[2], fs);
497       i915_emit_arith(p, A0_CMP, get_result_vector(p, &inst->Dst[0]),
498                       get_result_flags(inst), 0, src0, src2,
499                       src1); /* NOTE: order of src2, src1 */
500       break;
501 
502    case TGSI_OPCODE_DDX:
503    case TGSI_OPCODE_DDY:
504       /* XXX We just output 0 here */
505       debug_printf("Punting DDX/DDY\n");
506       src0 = get_result_vector(p, &inst->Dst[0]);
507       i915_emit_arith(p, A0_MOV, get_result_vector(p, &inst->Dst[0]),
508                       get_result_flags(inst), 0,
509                       swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0);
510       break;
511 
512    case TGSI_OPCODE_DP2:
513       src0 = src_vector(p, &inst->Src[0], fs);
514       src1 = src_vector(p, &inst->Src[1], fs);
515 
516       i915_emit_arith(p, A0_DP3, get_result_vector(p, &inst->Dst[0]),
517                       get_result_flags(inst), 0,
518                       swizzle(src0, X, Y, ZERO, ZERO), src1, 0);
519       break;
520 
521    case TGSI_OPCODE_DP3:
522       emit_simple_arith(p, inst, A0_DP3, 2, fs);
523       break;
524 
525    case TGSI_OPCODE_DP4:
526       emit_simple_arith(p, inst, A0_DP4, 2, fs);
527       break;
528 
529    case TGSI_OPCODE_DST:
530       src0 = src_vector(p, &inst->Src[0], fs);
531       src1 = src_vector(p, &inst->Src[1], fs);
532 
533       /* result[0] = 1    * 1;
534        * result[1] = a[1] * b[1];
535        * result[2] = a[2] * 1;
536        * result[3] = 1    * b[3];
537        */
538       i915_emit_arith(p, A0_MUL, get_result_vector(p, &inst->Dst[0]),
539                       get_result_flags(inst), 0, swizzle(src0, ONE, Y, Z, ONE),
540                       swizzle(src1, ONE, Y, ONE, W), 0);
541       break;
542 
543    case TGSI_OPCODE_END:
544       /* no-op */
545       break;
546 
547    case TGSI_OPCODE_EX2:
548       src0 = src_vector(p, &inst->Src[0], fs);
549 
550       i915_emit_arith(p, A0_EXP, get_result_vector(p, &inst->Dst[0]),
551                       get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
552                       0);
553       break;
554 
555    case TGSI_OPCODE_FLR:
556       emit_simple_arith(p, inst, A0_FLR, 1, fs);
557       break;
558 
559    case TGSI_OPCODE_FRC:
560       emit_simple_arith(p, inst, A0_FRC, 1, fs);
561       break;
562 
563    case TGSI_OPCODE_KILL_IF:
564       /* kill if src[0].x < 0 || src[0].y < 0 ... */
565       src0 = src_vector(p, &inst->Src[0], fs);
566       tmp = i915_get_utemp(p);
567 
568       i915_emit_texld(p, tmp,              /* dest reg: a dummy reg */
569                       A0_DEST_CHANNEL_ALL, /* dest writemask */
570                       0,                   /* sampler */
571                       src0,                /* coord*/
572                       T0_TEXKILL,          /* opcode */
573                       TGSI_WRITEMASK_XYZW);/* coord_mask */
574       break;
575 
576    case TGSI_OPCODE_KILL:
577       /* unconditional kill */
578       tmp = i915_get_utemp(p);
579 
580       i915_emit_texld(p, tmp,              /* dest reg: a dummy reg */
581                       A0_DEST_CHANNEL_ALL, /* dest writemask */
582                       0,                   /* sampler */
583                       negate(swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE),
584                              1, 1, 1, 1), /* coord */
585                       T0_TEXKILL,         /* opcode */
586                       TGSI_WRITEMASK_X);  /* coord_mask */
587       break;
588 
589    case TGSI_OPCODE_LG2:
590       src0 = src_vector(p, &inst->Src[0], fs);
591 
592       i915_emit_arith(p, A0_LOG, get_result_vector(p, &inst->Dst[0]),
593                       get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
594                       0);
595       break;
596 
597    case TGSI_OPCODE_LIT:
598       src0 = src_vector(p, &inst->Src[0], fs);
599       tmp = i915_get_utemp(p);
600 
601       /* tmp = max( a.xyzw, a.00zw )
602        * XXX: Clamp tmp.w to -128..128
603        * tmp.y = log(tmp.y)
604        * tmp.y = tmp.w * tmp.y
605        * tmp.y = exp(tmp.y)
606        * result = cmp (a.11-x1, a.1x01, a.1xy1 )
607        */
608       i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
609                       swizzle(src0, ZERO, ZERO, Z, W), 0);
610 
611       i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
612                       swizzle(tmp, Y, Y, Y, Y), 0, 0);
613 
614       i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
615                       swizzle(tmp, ZERO, Y, ZERO, ZERO),
616                       swizzle(tmp, ZERO, W, ZERO, ZERO), 0);
617 
618       i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
619                       swizzle(tmp, Y, Y, Y, Y), 0, 0);
620 
621       i915_emit_arith(
622          p, A0_CMP, get_result_vector(p, &inst->Dst[0]), get_result_flags(inst),
623          0, negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
624          swizzle(tmp, ONE, X, ZERO, ONE), swizzle(tmp, ONE, X, Y, ONE));
625 
626       break;
627 
628    case TGSI_OPCODE_LRP:
629       src0 = src_vector(p, &inst->Src[0], fs);
630       src1 = src_vector(p, &inst->Src[1], fs);
631       src2 = src_vector(p, &inst->Src[2], fs);
632       flags = get_result_flags(inst);
633       tmp = i915_get_utemp(p);
634 
635       /* b*a + c*(1-a)
636        *
637        * b*a + c - ca
638        *
639        * tmp = b*a + c,
640        * result = (-c)*a + tmp
641        */
642       i915_emit_arith(p, A0_MAD, tmp, flags & A0_DEST_CHANNEL_ALL, 0, src1,
643                       src0, src2);
644 
645       i915_emit_arith(p, A0_MAD, get_result_vector(p, &inst->Dst[0]), flags, 0,
646                       negate(src2, 1, 1, 1, 1), src0, tmp);
647       break;
648 
649    case TGSI_OPCODE_MAD:
650       emit_simple_arith(p, inst, A0_MAD, 3, fs);
651       break;
652 
653    case TGSI_OPCODE_MAX:
654       emit_simple_arith(p, inst, A0_MAX, 2, fs);
655       break;
656 
657    case TGSI_OPCODE_MIN:
658       emit_simple_arith(p, inst, A0_MIN, 2, fs);
659       break;
660 
661    case TGSI_OPCODE_MOV:
662       emit_simple_arith(p, inst, A0_MOV, 1, fs);
663       break;
664 
665    case TGSI_OPCODE_MUL:
666       emit_simple_arith(p, inst, A0_MUL, 2, fs);
667       break;
668 
669    case TGSI_OPCODE_NOP:
670       break;
671 
672    case TGSI_OPCODE_POW:
673       src0 = src_vector(p, &inst->Src[0], fs);
674       src1 = src_vector(p, &inst->Src[1], fs);
675       tmp = i915_get_utemp(p);
676       flags = get_result_flags(inst);
677 
678       /* XXX: masking on intermediate values, here and elsewhere.
679        */
680       i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_X, 0,
681                       swizzle(src0, X, X, X, X), 0, 0);
682 
683       i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);
684 
685       i915_emit_arith(p, A0_EXP, get_result_vector(p, &inst->Dst[0]), flags, 0,
686                       swizzle(tmp, X, X, X, X), 0, 0);
687       break;
688 
689    case TGSI_OPCODE_RET:
690       /* XXX: no-op? */
691       break;
692 
693    case TGSI_OPCODE_RCP:
694       src0 = src_vector(p, &inst->Src[0], fs);
695 
696       i915_emit_arith(p, A0_RCP, get_result_vector(p, &inst->Dst[0]),
697                       get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
698                       0);
699       break;
700 
701    case TGSI_OPCODE_RSQ:
702       src0 = src_vector(p, &inst->Src[0], fs);
703 
704       i915_emit_arith(p, A0_RSQ, get_result_vector(p, &inst->Dst[0]),
705                       get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
706                       0);
707       break;
708 
709    case TGSI_OPCODE_SEQ: {
710       const uint32_t zero = swizzle(UREG(REG_TYPE_R, 0),
711                                     SRC_ZERO, SRC_ZERO, SRC_ZERO, SRC_ZERO);
712 
713       /* if we're both >= and <= then we're == */
714       src0 = src_vector(p, &inst->Src[0], fs);
715       src1 = src_vector(p, &inst->Src[1], fs);
716       tmp = i915_get_utemp(p);
717 
718       if (src0 == zero || src1 == zero) {
719          if (src0 == zero)
720             src0 = src1;
721 
722          /* x == 0 is equivalent to -abs(x) >= 0, but the latter requires only
723           * two instructions instead of three.
724           */
725          i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
726                          negate(src0, 1, 1, 1, 1), 0);
727          i915_emit_arith(p, A0_SGE, get_result_vector(p, &inst->Dst[0]),
728                          get_result_flags(inst), 0,
729                          negate(tmp, 1, 1, 1, 1), zero, 0);
730       } else {
731          i915_emit_arith(p, A0_SGE, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0);
732 
733          i915_emit_arith(p, A0_SGE, get_result_vector(p, &inst->Dst[0]),
734                          get_result_flags(inst), 0, src1, src0, 0);
735 
736          i915_emit_arith(p, A0_MUL, get_result_vector(p, &inst->Dst[0]),
737                          get_result_flags(inst), 0,
738                          get_result_vector(p, &inst->Dst[0]), tmp, 0);
739       }
740 
741       break;
742    }
743 
744    case TGSI_OPCODE_SGE:
745       emit_simple_arith(p, inst, A0_SGE, 2, fs);
746       break;
747 
748    case TGSI_OPCODE_SLE:
749       /* like SGE, but swap reg0, reg1 */
750       emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs);
751       break;
752 
753    case TGSI_OPCODE_SLT:
754       emit_simple_arith(p, inst, A0_SLT, 2, fs);
755       break;
756 
757    case TGSI_OPCODE_SGT:
758       /* like SLT, but swap reg0, reg1 */
759       emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs);
760       break;
761 
762    case TGSI_OPCODE_SNE: {
763       const uint32_t zero = swizzle(UREG(REG_TYPE_R, 0),
764                                     SRC_ZERO, SRC_ZERO, SRC_ZERO, SRC_ZERO);
765 
766       /* if we're < or > then we're != */
767       src0 = src_vector(p, &inst->Src[0], fs);
768       src1 = src_vector(p, &inst->Src[1], fs);
769       tmp = i915_get_utemp(p);
770 
771       if (src0 == zero || src1 == zero) {
772          if (src0 == zero)
773             src0 = src1;
774 
775          /* x != 0 is equivalent to -abs(x) < 0, but the latter requires only
776           * two instructions instead of three.
777           */
778          i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
779                          negate(src0, 1, 1, 1, 1), 0);
780          i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]),
781                          get_result_flags(inst), 0,
782                          negate(tmp, 1, 1, 1, 1), zero, 0);
783       } else {
784          i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0);
785 
786          i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]),
787                          get_result_flags(inst), 0, src1, src0, 0);
788 
789          i915_emit_arith(p, A0_ADD, get_result_vector(p, &inst->Dst[0]),
790                          get_result_flags(inst), 0,
791                          get_result_vector(p, &inst->Dst[0]), tmp, 0);
792       }
793       break;
794    }
795 
796    case TGSI_OPCODE_SSG:
797       /* compute (src>0) - (src<0) */
798       src0 = src_vector(p, &inst->Src[0], fs);
799       tmp = i915_get_utemp(p);
800 
801       i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
802                       swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0);
803 
804       i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]),
805                       get_result_flags(inst), 0,
806                       swizzle(src0, ZERO, ZERO, ZERO, ZERO), src0, 0);
807 
808       i915_emit_arith(
809          p, A0_ADD, get_result_vector(p, &inst->Dst[0]), get_result_flags(inst),
810          0, get_result_vector(p, &inst->Dst[0]), negate(tmp, 1, 1, 1, 1), 0);
811       break;
812 
813    case TGSI_OPCODE_TEX:
814       emit_tex(p, inst, T0_TEXLD, fs);
815       break;
816 
817    case TGSI_OPCODE_TRUNC:
818       emit_simple_arith(p, inst, A0_TRC, 1, fs);
819       break;
820 
821    case TGSI_OPCODE_TXB:
822       emit_tex(p, inst, T0_TEXLDB, fs);
823       break;
824 
825    case TGSI_OPCODE_TXP:
826       emit_tex(p, inst, T0_TEXLDP, fs);
827       break;
828 
829    default:
830       i915_program_error(p, "bad opcode %s (%d)",
831                          tgsi_get_opcode_name(inst->Instruction.Opcode),
832                          inst->Instruction.Opcode);
833       return;
834    }
835 
836    i915_release_utemps(p);
837 }
838 
839 static void
i915_translate_token(struct i915_fp_compile * p,const union i915_full_token * token,struct i915_fragment_shader * fs)840 i915_translate_token(struct i915_fp_compile *p,
841                      const union i915_full_token *token,
842                      struct i915_fragment_shader *fs)
843 {
844    struct i915_fragment_shader *ifs = p->shader;
845    switch (token->Token.Type) {
846    case TGSI_TOKEN_TYPE_PROPERTY:
847       /* Ignore properties where we only support one value. */
848       assert(token->FullProperty.Property.PropertyName ==
849                 TGSI_PROPERTY_FS_COORD_ORIGIN ||
850              token->FullProperty.Property.PropertyName ==
851                 TGSI_PROPERTY_FS_COORD_PIXEL_CENTER ||
852              token->FullProperty.Property.PropertyName ==
853                 TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS ||
854              token->FullProperty.Property.PropertyName ==
855                 TGSI_PROPERTY_SEPARABLE_PROGRAM);
856       break;
857 
858    case TGSI_TOKEN_TYPE_DECLARATION:
859       if (token->FullDeclaration.Declaration.File == TGSI_FILE_CONSTANT) {
860          if (token->FullDeclaration.Range.Last >= I915_MAX_CONSTANT) {
861             i915_program_error(p, "Exceeded %d max uniforms",
862                                I915_MAX_CONSTANT);
863          } else {
864             uint32_t i;
865             for (i = token->FullDeclaration.Range.First;
866                  i <= token->FullDeclaration.Range.Last; i++) {
867                ifs->constant_flags[i] = I915_CONSTFLAG_USER;
868                ifs->num_constants = MAX2(ifs->num_constants, i + 1);
869             }
870          }
871       } else if (token->FullDeclaration.Declaration.File ==
872                  TGSI_FILE_TEMPORARY) {
873          if (token->FullDeclaration.Range.Last >= I915_MAX_TEMPORARY) {
874             i915_program_error(p, "Exceeded %d max TGSI temps",
875                                I915_MAX_TEMPORARY);
876          } else {
877             uint32_t i;
878             for (i = token->FullDeclaration.Range.First;
879                  i <= token->FullDeclaration.Range.Last; i++) {
880                /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */
881                p->temp_flag |= (1 << i); /* mark temp as used */
882             }
883          }
884       }
885       break;
886 
887    case TGSI_TOKEN_TYPE_IMMEDIATE: {
888       const struct tgsi_full_immediate *imm = &token->FullImmediate;
889       const uint32_t pos = p->num_immediates++;
890       uint32_t j;
891       assert(imm->Immediate.NrTokens <= 4 + 1);
892       for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
893          p->immediates[pos][j] = imm->u[j].Float;
894       }
895    } break;
896 
897    case TGSI_TOKEN_TYPE_INSTRUCTION:
898       if (p->first_instruction) {
899          /* resolve location of immediates */
900          uint32_t i, j;
901          for (i = 0; i < p->num_immediates; i++) {
902             /* find constant slot for this immediate */
903             for (j = 0; j < I915_MAX_CONSTANT; j++) {
904                if (ifs->constant_flags[j] == 0x0) {
905                   memcpy(ifs->constants[j], p->immediates[i],
906                          4 * sizeof(float));
907                   /*printf("immediate %d maps to const %d\n", i, j);*/
908                   ifs->constant_flags[j] = 0xf; /* all four comps used */
909                   p->immediates_map[i] = j;
910                   ifs->num_constants = MAX2(ifs->num_constants, j + 1);
911                   break;
912                }
913             }
914             if (j == I915_MAX_CONSTANT) {
915                i915_program_error(p, "Exceeded %d max uniforms and immediates.",
916                                   I915_MAX_CONSTANT);
917             }
918          }
919 
920          p->first_instruction = false;
921       }
922 
923       i915_translate_instruction(p, &token->FullInstruction, fs);
924       break;
925 
926    default:
927       assert(0);
928    }
929 }
930 
931 /**
932  * Translate TGSI fragment shader into i915 hardware instructions.
933  * \param p  the translation state
934  * \param tokens  the TGSI token array
935  */
936 static void
i915_translate_instructions(struct i915_fp_compile * p,const struct i915_token_list * tokens,struct i915_fragment_shader * fs)937 i915_translate_instructions(struct i915_fp_compile *p,
938                             const struct i915_token_list *tokens,
939                             struct i915_fragment_shader *fs)
940 {
941    int i;
942    for (i = 0; i < tokens->NumTokens && !p->error; i++) {
943       i915_translate_token(p, &tokens->Tokens[i], fs);
944    }
945 }
946 
947 static struct i915_fp_compile *
i915_init_compile(struct i915_context * i915,struct i915_fragment_shader * ifs)948 i915_init_compile(struct i915_context *i915, struct i915_fragment_shader *ifs)
949 {
950    struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile);
951    int i;
952 
953    p->shader = ifs;
954 
955    /* Put new constants at end of const buffer, growing downward.
956     * The problem is we don't know how many user-defined constants might
957     * be specified with pipe->set_constant_buffer().
958     * Should pre-scan the user's program to determine the highest-numbered
959     * constant referenced.
960     */
961    ifs->num_constants = 0;
962    memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags));
963 
964    memset(&p->register_phases, 0, sizeof(p->register_phases));
965 
966    for (i = 0; i < I915_TEX_UNITS; i++)
967       ifs->texcoords[i].semantic = -1;
968 
969    p->log_program_errors = !i915->no_log_program_errors;
970 
971    p->first_instruction = true;
972 
973    p->nr_tex_indirect = 1; /* correct? */
974    p->nr_tex_insn = 0;
975    p->nr_alu_insn = 0;
976    p->nr_decl_insn = 0;
977 
978    p->csr = p->program;
979    p->decl = p->declarations;
980    p->decl_s = 0;
981    p->decl_t = 0;
982    p->temp_flag = ~0x0U << I915_MAX_TEMPORARY;
983    p->utemp_flag = ~0x7;
984 
985    /* initialize the first program word */
986    *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM;
987 
988    return p;
989 }
990 
991 /* Copy compile results to the fragment program struct and destroy the
992  * compilation context.
993  */
994 static void
i915_fini_compile(struct i915_context * i915,struct i915_fp_compile * p)995 i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
996 {
997    struct i915_fragment_shader *ifs = p->shader;
998    unsigned long program_size = (unsigned long)(p->csr - p->program);
999    unsigned long decl_size = (unsigned long)(p->decl - p->declarations);
1000 
1001    if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT)
1002       debug_printf("Exceeded max nr indirect texture lookups\n");
1003 
1004    if (p->nr_tex_insn > I915_MAX_TEX_INSN)
1005       i915_program_error(p, "Exceeded max TEX instructions");
1006 
1007    if (p->nr_alu_insn > I915_MAX_ALU_INSN)
1008       i915_program_error(p, "Exceeded max ALU instructions");
1009 
1010    if (p->nr_decl_insn > I915_MAX_DECL_INSN)
1011       i915_program_error(p, "Exceeded max DECL instructions");
1012 
1013    /* hw doesn't seem to like empty frag programs (num_instructions == 1 is just
1014     * TGSI_END), even when the depth write fixup gets emitted below - maybe that
1015     * one is fishy, too?
1016     */
1017    if (ifs->info.num_instructions == 1)
1018       i915_program_error(p, "Empty fragment shader");
1019 
1020    if (p->error) {
1021       p->NumNativeInstructions = 0;
1022       p->NumNativeAluInstructions = 0;
1023       p->NumNativeTexInstructions = 0;
1024       p->NumNativeTexIndirections = 0;
1025 
1026       i915_use_passthrough_shader(ifs);
1027    } else {
1028       p->NumNativeInstructions =
1029          p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn;
1030       p->NumNativeAluInstructions = p->nr_alu_insn;
1031       p->NumNativeTexInstructions = p->nr_tex_insn;
1032       p->NumNativeTexIndirections = p->nr_tex_indirect;
1033 
1034       /* patch in the program length */
1035       p->declarations[0] |= program_size + decl_size - 2;
1036 
1037       /* Copy compilation results to fragment program struct:
1038        */
1039       assert(!ifs->program);
1040 
1041       ifs->program_len = decl_size + program_size;
1042       ifs->program = (uint32_t *)MALLOC(ifs->program_len * sizeof(uint32_t));
1043       memcpy(ifs->program, p->declarations, decl_size * sizeof(uint32_t));
1044       memcpy(&ifs->program[decl_size], p->program,
1045              program_size * sizeof(uint32_t));
1046 
1047       util_debug_message(
1048          &i915->debug, SHADER_INFO,
1049          "%s shader: %d inst, %d tex, %d tex_indirect, %d temps, %d const",
1050          _mesa_shader_stage_to_abbrev(MESA_SHADER_FRAGMENT), (int)program_size,
1051          p->nr_tex_insn, p->nr_tex_indirect,
1052          p->shader->info.file_max[TGSI_FILE_TEMPORARY] + 1,
1053          ifs->num_constants);
1054    }
1055 
1056    /* Release the compilation struct:
1057     */
1058    FREE(p);
1059 }
1060 
1061 /**
1062  * Rather than trying to intercept and jiggle depth writes during
1063  * emit, just move the value into its correct position at the end of
1064  * the program:
1065  */
1066 static void
i915_fixup_depth_write(struct i915_fp_compile * p)1067 i915_fixup_depth_write(struct i915_fp_compile *p)
1068 {
1069    for (int i = 0; i < p->shader->info.num_outputs; i++) {
1070       if (p->shader->info.output_semantic_name[i] != TGSI_SEMANTIC_POSITION)
1071          continue;
1072 
1073       const uint32_t depth = UREG(REG_TYPE_OD, 0);
1074 
1075       i915_emit_arith(p, A0_MOV,                  /* opcode */
1076                       depth,                      /* dest reg */
1077                       A0_DEST_CHANNEL_W,          /* write mask */
1078                       0,                          /* saturate? */
1079                       swizzle(depth, X, Y, Z, Z), /* src0 */
1080                       0, 0 /* src1, src2 */);
1081    }
1082 }
1083 
1084 void
i915_translate_fragment_program(struct i915_context * i915,struct i915_fragment_shader * fs)1085 i915_translate_fragment_program(struct i915_context *i915,
1086                                 struct i915_fragment_shader *fs)
1087 {
1088    struct i915_fp_compile *p;
1089    const struct tgsi_token *tokens = fs->state.tokens;
1090    struct i915_token_list *i_tokens;
1091 
1092    if (I915_DBG_ON(DBG_FS)) {
1093       mesa_logi("TGSI fragment shader:");
1094       tgsi_dump(tokens, 0);
1095    }
1096 
1097    p = i915_init_compile(i915, fs);
1098 
1099    i_tokens = i915_optimize(tokens);
1100    i915_translate_instructions(p, i_tokens, fs);
1101    i915_fixup_depth_write(p);
1102 
1103    i915_fini_compile(i915, p);
1104    i915_optimize_free(i_tokens);
1105 
1106    if (I915_DBG_ON(DBG_FS)) {
1107       mesa_logi("i915 fragment shader with %d constants%s", fs->num_constants,
1108                 fs->num_constants ? ":" : "");
1109 
1110       for (int i = 0; i < I915_MAX_CONSTANT; i++) {
1111          if (fs->constant_flags[i] &&
1112              fs->constant_flags[i] != I915_CONSTFLAG_USER) {
1113             mesa_logi("\t\tC[%d] = { %f, %f, %f, %f }", i, fs->constants[i][0],
1114                       fs->constants[i][1], fs->constants[i][2],
1115                       fs->constants[i][3]);
1116          }
1117       }
1118       i915_disassemble_program(fs->program, fs->program_len);
1119    }
1120 }
1121