• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2016 Miklós Máté
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "main/mtypes.h"
24 #include "main/atifragshader.h"
25 #include "main/errors.h"
26 #include "program/prog_parameter.h"
27 
28 #include "tgsi/tgsi_ureg.h"
29 #include "tgsi/tgsi_scan.h"
30 #include "tgsi/tgsi_transform.h"
31 
32 #include "st_program.h"
33 #include "st_atifs_to_tgsi.h"
34 
35 /**
36  * Intermediate state used during shader translation.
37  */
38 struct st_translate {
39    struct ureg_program *ureg;
40    struct ati_fragment_shader *atifs;
41 
42    struct ureg_dst temps[MAX_PROGRAM_TEMPS];
43    struct ureg_src *constants;
44    struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
45    struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
46    struct ureg_src samplers[PIPE_MAX_SAMPLERS];
47 
48    const GLuint *inputMapping;
49    const GLuint *outputMapping;
50 
51    unsigned current_pass;
52 
53    bool regs_written[MAX_NUM_PASSES_ATI][MAX_NUM_FRAGMENT_REGISTERS_ATI];
54 
55    boolean error;
56 };
57 
58 struct instruction_desc {
59    unsigned TGSI_opcode;
60    const char *name;
61    unsigned char arg_count;
62 };
63 
64 static const struct instruction_desc inst_desc[] = {
65    {TGSI_OPCODE_MOV, "MOV", 1},
66    {TGSI_OPCODE_NOP, "UND", 0}, /* unused */
67    {TGSI_OPCODE_ADD, "ADD", 2},
68    {TGSI_OPCODE_MUL, "MUL", 2},
69    {TGSI_OPCODE_NOP, "SUB", 2},
70    {TGSI_OPCODE_DP3, "DOT3", 2},
71    {TGSI_OPCODE_DP4, "DOT4", 2},
72    {TGSI_OPCODE_MAD, "MAD", 3},
73    {TGSI_OPCODE_LRP, "LERP", 3},
74    {TGSI_OPCODE_NOP, "CND", 3},
75    {TGSI_OPCODE_NOP, "CND0", 3},
76    {TGSI_OPCODE_NOP, "DOT2_ADD", 3}
77 };
78 
79 static struct ureg_dst
get_temp(struct st_translate * t,unsigned index)80 get_temp(struct st_translate *t, unsigned index)
81 {
82    if (ureg_dst_is_undef(t->temps[index]))
83       t->temps[index] = ureg_DECL_temporary(t->ureg);
84    return t->temps[index];
85 }
86 
87 static struct ureg_src
apply_swizzle(struct st_translate * t,struct ureg_src src,GLuint swizzle)88 apply_swizzle(struct st_translate *t,
89               struct ureg_src src, GLuint swizzle)
90 {
91    if (swizzle == GL_SWIZZLE_STR_ATI) {
92       return src;
93    } else if (swizzle == GL_SWIZZLE_STQ_ATI) {
94       return ureg_swizzle(src,
95                           TGSI_SWIZZLE_X,
96                           TGSI_SWIZZLE_Y,
97                           TGSI_SWIZZLE_W,
98                           TGSI_SWIZZLE_Z);
99    } else {
100       struct ureg_dst tmp[2];
101       struct ureg_src imm[3];
102 
103       tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI);
104       tmp[1] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + 1);
105       imm[0] = src;
106       imm[1] = ureg_imm4f(t->ureg, 1.0f, 1.0f, 0.0f, 0.0f);
107       imm[2] = ureg_imm4f(t->ureg, 0.0f, 0.0f, 1.0f, 1.0f);
108       ureg_insn(t->ureg, TGSI_OPCODE_MAD, &tmp[0], 1, imm, 3);
109 
110       if (swizzle == GL_SWIZZLE_STR_DR_ATI) {
111          imm[0] = ureg_scalar(src, TGSI_SWIZZLE_Z);
112       } else {
113          imm[0] = ureg_scalar(src, TGSI_SWIZZLE_W);
114       }
115       ureg_insn(t->ureg, TGSI_OPCODE_RCP, &tmp[1], 1, &imm[0], 1);
116 
117       imm[0] = ureg_src(tmp[0]);
118       imm[1] = ureg_src(tmp[1]);
119       ureg_insn(t->ureg, TGSI_OPCODE_MUL, &tmp[0], 1, imm, 2);
120 
121       return ureg_src(tmp[0]);
122    }
123 }
124 
125 static struct ureg_src
get_source(struct st_translate * t,GLuint src_type)126 get_source(struct st_translate *t, GLuint src_type)
127 {
128    if (src_type >= GL_REG_0_ATI && src_type <= GL_REG_5_ATI) {
129       if (t->regs_written[t->current_pass][src_type - GL_REG_0_ATI]) {
130          return ureg_src(get_temp(t, src_type - GL_REG_0_ATI));
131       } else {
132          return ureg_imm1f(t->ureg, 0.0f);
133       }
134    } else if (src_type >= GL_CON_0_ATI && src_type <= GL_CON_7_ATI) {
135       return t->constants[src_type - GL_CON_0_ATI];
136    } else if (src_type == GL_ZERO) {
137       return ureg_imm1f(t->ureg, 0.0f);
138    } else if (src_type == GL_ONE) {
139       return ureg_imm1f(t->ureg, 1.0f);
140    } else if (src_type == GL_PRIMARY_COLOR_ARB) {
141       return t->inputs[t->inputMapping[VARYING_SLOT_COL0]];
142    } else if (src_type == GL_SECONDARY_INTERPOLATOR_ATI) {
143       return t->inputs[t->inputMapping[VARYING_SLOT_COL1]];
144    } else {
145       /* frontend prevents this */
146       unreachable("unknown source");
147    }
148 }
149 
150 static struct ureg_src
prepare_argument(struct st_translate * t,const unsigned argId,const struct atifragshader_src_register * srcReg)151 prepare_argument(struct st_translate *t, const unsigned argId,
152                  const struct atifragshader_src_register *srcReg)
153 {
154    struct ureg_src src = get_source(t, srcReg->Index);
155    struct ureg_dst arg = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + argId);
156 
157    switch (srcReg->argRep) {
158    case GL_NONE:
159       break;
160    case GL_RED:
161       src = ureg_scalar(src, TGSI_SWIZZLE_X);
162       break;
163    case GL_GREEN:
164       src = ureg_scalar(src, TGSI_SWIZZLE_Y);
165       break;
166    case GL_BLUE:
167       src = ureg_scalar(src, TGSI_SWIZZLE_Z);
168       break;
169    case GL_ALPHA:
170       src = ureg_scalar(src, TGSI_SWIZZLE_W);
171       break;
172    }
173    ureg_insn(t->ureg, TGSI_OPCODE_MOV, &arg, 1, &src, 1);
174 
175    if (srcReg->argMod & GL_COMP_BIT_ATI) {
176       struct ureg_src modsrc[2];
177       modsrc[0] = ureg_imm1f(t->ureg, 1.0f);
178       modsrc[1] = ureg_negate(ureg_src(arg));
179 
180       ureg_insn(t->ureg, TGSI_OPCODE_ADD, &arg, 1, modsrc, 2);
181    }
182    if (srcReg->argMod & GL_BIAS_BIT_ATI) {
183       struct ureg_src modsrc[2];
184       modsrc[0] = ureg_src(arg);
185       modsrc[1] = ureg_imm1f(t->ureg, -0.5f);
186 
187       ureg_insn(t->ureg, TGSI_OPCODE_ADD, &arg, 1, modsrc, 2);
188    }
189    if (srcReg->argMod & GL_2X_BIT_ATI) {
190       struct ureg_src modsrc[2];
191       modsrc[0] = ureg_src(arg);
192       modsrc[1] = ureg_src(arg);
193 
194       ureg_insn(t->ureg, TGSI_OPCODE_ADD, &arg, 1, modsrc, 2);
195    }
196    if (srcReg->argMod & GL_NEGATE_BIT_ATI) {
197       struct ureg_src modsrc[2];
198       modsrc[0] = ureg_src(arg);
199       modsrc[1] = ureg_imm1f(t->ureg, -1.0f);
200 
201       ureg_insn(t->ureg, TGSI_OPCODE_MUL, &arg, 1, modsrc, 2);
202    }
203    return  ureg_src(arg);
204 }
205 
206 /* These instructions need special treatment */
207 static void
emit_special_inst(struct st_translate * t,const struct instruction_desc * desc,struct ureg_dst * dst,struct ureg_src * args,unsigned argcount)208 emit_special_inst(struct st_translate *t, const struct instruction_desc *desc,
209                   struct ureg_dst *dst, struct ureg_src *args, unsigned argcount)
210 {
211    struct ureg_dst tmp[1];
212    struct ureg_src src[3];
213 
214    if (!strcmp(desc->name, "SUB")) {
215       ureg_ADD(t->ureg, *dst, args[0], ureg_negate(args[1]));
216    } else if (!strcmp(desc->name, "CND")) {
217       tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + 2); /* re-purpose a3 */
218       src[0] = ureg_imm1f(t->ureg, 0.5f);
219       src[1] = ureg_negate(args[2]);
220       ureg_insn(t->ureg, TGSI_OPCODE_ADD, tmp, 1, src, 2);
221       src[0] = ureg_src(tmp[0]);
222       src[1] = args[0];
223       src[2] = args[1];
224       ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3);
225    } else if (!strcmp(desc->name, "CND0")) {
226       src[0] = args[2];
227       src[1] = args[1];
228       src[2] = args[0];
229       ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3);
230    } else if (!strcmp(desc->name, "DOT2_ADD")) {
231       /* note: DP2A is not implemented in most pipe drivers */
232       tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI); /* re-purpose a1 */
233       src[0] = args[0];
234       src[1] = args[1];
235       ureg_insn(t->ureg, TGSI_OPCODE_DP2, tmp, 1, src, 2);
236       src[0] = ureg_src(tmp[0]);
237       src[1] = ureg_scalar(args[2], TGSI_SWIZZLE_Z);
238       ureg_insn(t->ureg, TGSI_OPCODE_ADD, dst, 1, src, 2);
239    }
240 }
241 
242 static void
emit_arith_inst(struct st_translate * t,const struct instruction_desc * desc,struct ureg_dst * dst,struct ureg_src * args,unsigned argcount)243 emit_arith_inst(struct st_translate *t,
244                 const struct instruction_desc *desc,
245                 struct ureg_dst *dst, struct ureg_src *args, unsigned argcount)
246 {
247    if (desc->TGSI_opcode == TGSI_OPCODE_NOP) {
248       return emit_special_inst(t, desc, dst, args, argcount);
249    }
250 
251    ureg_insn(t->ureg, desc->TGSI_opcode, dst, 1, args, argcount);
252 }
253 
254 static void
emit_dstmod(struct st_translate * t,struct ureg_dst dst,GLuint dstMod)255 emit_dstmod(struct st_translate *t,
256             struct ureg_dst dst, GLuint dstMod)
257 {
258    float imm;
259    struct ureg_src src[3];
260    GLuint scale = dstMod & ~GL_SATURATE_BIT_ATI;
261 
262    if (dstMod == GL_NONE) {
263       return;
264    }
265 
266    switch (scale) {
267    case GL_2X_BIT_ATI:
268       imm = 2.0f;
269       break;
270    case GL_4X_BIT_ATI:
271       imm = 4.0f;
272       break;
273    case GL_8X_BIT_ATI:
274       imm = 8.0f;
275       break;
276    case GL_HALF_BIT_ATI:
277       imm = 0.5f;
278       break;
279    case GL_QUARTER_BIT_ATI:
280       imm = 0.25f;
281       break;
282    case GL_EIGHTH_BIT_ATI:
283       imm = 0.125f;
284       break;
285    default:
286       imm = 1.0f;
287    }
288 
289    src[0] = ureg_src(dst);
290    src[1] = ureg_imm1f(t->ureg, imm);
291    if (dstMod & GL_SATURATE_BIT_ATI) {
292       dst = ureg_saturate(dst);
293    }
294    ureg_insn(t->ureg, TGSI_OPCODE_MUL, &dst, 1, src, 2);
295 }
296 
297 /**
298  * Compile one setup instruction to TGSI instructions.
299  */
300 static void
compile_setupinst(struct st_translate * t,const unsigned r,const struct atifs_setupinst * texinst)301 compile_setupinst(struct st_translate *t,
302                   const unsigned r,
303                   const struct atifs_setupinst *texinst)
304 {
305    struct ureg_dst dst[1];
306    struct ureg_src src[2];
307 
308    if (!texinst->Opcode)
309       return;
310 
311    dst[0] = get_temp(t, r);
312 
313    GLuint pass_tex = texinst->src;
314 
315    if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
316       unsigned attr = pass_tex - GL_TEXTURE0_ARB + VARYING_SLOT_TEX0;
317 
318       src[0] = t->inputs[t->inputMapping[attr]];
319    } else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
320       unsigned reg = pass_tex - GL_REG_0_ATI;
321 
322       /* the frontend already validated that REG is only allowed in second pass */
323       if (t->regs_written[0][reg]) {
324          src[0] = ureg_src(t->temps[reg]);
325       } else {
326          src[0] = ureg_imm1f(t->ureg, 0.0f);
327       }
328    }
329    src[0] = apply_swizzle(t, src[0], texinst->swizzle);
330 
331    if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) {
332       /* by default texture and sampler indexes are the same */
333       src[1] = t->samplers[r];
334       /* the texture target is still unknown, it will be fixed in the draw call */
335       ureg_tex_insn(t->ureg, TGSI_OPCODE_TEX, dst, 1, TGSI_TEXTURE_2D,
336                     NULL, 0, src, 2);
337    } else if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
338       ureg_insn(t->ureg, TGSI_OPCODE_MOV, dst, 1, src, 1);
339    }
340 
341    t->regs_written[t->current_pass][r] = true;
342 }
343 
344 /**
345  * Compile one arithmetic operation COLOR&ALPHA pair into TGSI instructions.
346  */
347 static void
compile_instruction(struct st_translate * t,const struct atifs_instruction * inst)348 compile_instruction(struct st_translate *t,
349                     const struct atifs_instruction *inst)
350 {
351    unsigned optype;
352 
353    for (optype = 0; optype < 2; optype++) { /* color, alpha */
354       const struct instruction_desc *desc;
355       struct ureg_dst dst[1];
356       struct ureg_src args[3]; /* arguments for the main operation */
357       unsigned arg;
358       unsigned dstreg = inst->DstReg[optype].Index - GL_REG_0_ATI;
359 
360       if (!inst->Opcode[optype])
361          continue;
362 
363       desc = &inst_desc[inst->Opcode[optype] - GL_MOV_ATI];
364 
365       /* prepare the arguments */
366       for (arg = 0; arg < desc->arg_count; arg++) {
367          if (arg >= inst->ArgCount[optype]) {
368             _mesa_warning(0, "Using 0 for missing argument %d of %s\n",
369                           arg, desc->name);
370             args[arg] = ureg_imm1f(t->ureg, 0.0f);
371          } else {
372             args[arg] = prepare_argument(t, arg,
373                                          &inst->SrcReg[optype][arg]);
374          }
375       }
376 
377       /* prepare dst */
378       dst[0] = get_temp(t, dstreg);
379 
380       if (optype) {
381          dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_W);
382       } else {
383          GLuint dstMask = inst->DstReg[optype].dstMask;
384          if (dstMask == GL_NONE) {
385             dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ);
386          } else {
387             dst[0] = ureg_writemask(dst[0], dstMask); /* the enum values match */
388          }
389       }
390 
391       /* emit the main instruction */
392       emit_arith_inst(t, desc, dst, args, arg);
393 
394       emit_dstmod(t, *dst, inst->DstReg[optype].dstMod);
395 
396       t->regs_written[t->current_pass][dstreg] = true;
397    }
398 }
399 
400 static void
finalize_shader(struct st_translate * t,unsigned numPasses)401 finalize_shader(struct st_translate *t, unsigned numPasses)
402 {
403    struct ureg_dst dst[1] = { { 0 } };
404    struct ureg_src src[1] = { { 0 } };
405 
406    if (t->regs_written[numPasses-1][0]) {
407       /* copy the result into the OUT slot */
408       dst[0] = t->outputs[t->outputMapping[FRAG_RESULT_COLOR]];
409       src[0] = ureg_src(t->temps[0]);
410       ureg_insn(t->ureg, TGSI_OPCODE_MOV, dst, 1, src, 1);
411    }
412 
413    /* signal the end of the program */
414    ureg_insn(t->ureg, TGSI_OPCODE_END, dst, 0, src, 0);
415 }
416 
417 /**
418  * Called when a new variant is needed, we need to translate
419  * the ATI fragment shader to TGSI
420  */
421 enum pipe_error
st_translate_atifs_program(struct ureg_program * ureg,struct ati_fragment_shader * atifs,struct gl_program * program,GLuint numInputs,const GLuint inputMapping[],const ubyte inputSemanticName[],const ubyte inputSemanticIndex[],const GLuint interpMode[],GLuint numOutputs,const GLuint outputMapping[],const ubyte outputSemanticName[],const ubyte outputSemanticIndex[])422 st_translate_atifs_program(
423    struct ureg_program *ureg,
424    struct ati_fragment_shader *atifs,
425    struct gl_program *program,
426    GLuint numInputs,
427    const GLuint inputMapping[],
428    const ubyte inputSemanticName[],
429    const ubyte inputSemanticIndex[],
430    const GLuint interpMode[],
431    GLuint numOutputs,
432    const GLuint outputMapping[],
433    const ubyte outputSemanticName[],
434    const ubyte outputSemanticIndex[])
435 {
436    enum pipe_error ret = PIPE_OK;
437 
438    unsigned pass, i, r;
439 
440    struct st_translate translate, *t;
441    t = &translate;
442    memset(t, 0, sizeof *t);
443 
444    t->inputMapping = inputMapping;
445    t->outputMapping = outputMapping;
446    t->ureg = ureg;
447    t->atifs = atifs;
448 
449    /*
450     * Declare input attributes.
451     */
452    for (i = 0; i < numInputs; i++) {
453       t->inputs[i] = ureg_DECL_fs_input(ureg,
454                                         inputSemanticName[i],
455                                         inputSemanticIndex[i],
456                                         interpMode[i]);
457    }
458 
459    /*
460     * Declare output attributes:
461     *  we always have numOutputs=1 and it's FRAG_RESULT_COLOR
462     */
463    t->outputs[0] = ureg_DECL_output(ureg,
464                                     TGSI_SEMANTIC_COLOR,
465                                     outputSemanticIndex[0]);
466 
467    /* Emit constants and immediates.  Mesa uses a single index space
468     * for these, so we put all the translated regs in t->constants.
469     */
470    if (program->Parameters) {
471       t->constants = calloc(program->Parameters->NumParameters,
472                             sizeof t->constants[0]);
473       if (t->constants == NULL) {
474          ret = PIPE_ERROR_OUT_OF_MEMORY;
475          goto out;
476       }
477 
478       for (i = 0; i < program->Parameters->NumParameters; i++) {
479          switch (program->Parameters->Parameters[i].Type) {
480          case PROGRAM_STATE_VAR:
481          case PROGRAM_UNIFORM:
482             t->constants[i] = ureg_DECL_constant(ureg, i);
483             break;
484          case PROGRAM_CONSTANT:
485             t->constants[i] =
486                ureg_DECL_immediate(ureg,
487                                    (const float*)program->Parameters->ParameterValues[i],
488                                    4);
489             break;
490          default:
491             break;
492          }
493       }
494    }
495 
496    /* texture samplers */
497    for (i = 0; i < MAX_NUM_FRAGMENT_REGISTERS_ATI; i++) {
498       if (program->SamplersUsed & (1 << i)) {
499          t->samplers[i] = ureg_DECL_sampler(ureg, i);
500          /* the texture target is still unknown, it will be fixed in the draw call */
501          ureg_DECL_sampler_view(ureg, i, TGSI_TEXTURE_2D,
502                                 TGSI_RETURN_TYPE_FLOAT,
503                                 TGSI_RETURN_TYPE_FLOAT,
504                                 TGSI_RETURN_TYPE_FLOAT,
505                                 TGSI_RETURN_TYPE_FLOAT);
506       }
507    }
508 
509    /* emit instructions */
510    for (pass = 0; pass < atifs->NumPasses; pass++) {
511       t->current_pass = pass;
512       for (r = 0; r < MAX_NUM_FRAGMENT_REGISTERS_ATI; r++) {
513          struct atifs_setupinst *texinst = &atifs->SetupInst[pass][r];
514          compile_setupinst(t, r, texinst);
515       }
516       for (i = 0; i < atifs->numArithInstr[pass]; i++) {
517          struct atifs_instruction *inst = &atifs->Instructions[pass][i];
518          compile_instruction(t, inst);
519       }
520    }
521 
522    finalize_shader(t, atifs->NumPasses);
523 
524 out:
525    free(t->constants);
526 
527    if (t->error) {
528       debug_printf("%s: translate error flag set\n", __func__);
529    }
530 
531    return ret;
532 }
533 
534 /**
535  * Called in ProgramStringNotify, we need to fill the metadata of the
536  * gl_program attached to the ati_fragment_shader
537  */
538 void
st_init_atifs_prog(struct gl_context * ctx,struct gl_program * prog)539 st_init_atifs_prog(struct gl_context *ctx, struct gl_program *prog)
540 {
541    /* we know this is st_fragment_program, because of st_new_ati_fs() */
542    struct st_fragment_program *stfp = (struct st_fragment_program *) prog;
543    struct ati_fragment_shader *atifs = stfp->ati_fs;
544 
545    unsigned pass, i, r, optype, arg;
546 
547    static const gl_state_index fog_params_state[STATE_LENGTH] =
548       {STATE_INTERNAL, STATE_FOG_PARAMS_OPTIMIZED, 0, 0, 0};
549    static const gl_state_index fog_color[STATE_LENGTH] =
550       {STATE_FOG_COLOR, 0, 0, 0, 0};
551 
552    prog->info.inputs_read = 0;
553    prog->info.outputs_written = BITFIELD64_BIT(FRAG_RESULT_COLOR);
554    prog->SamplersUsed = 0;
555    prog->Parameters = _mesa_new_parameter_list();
556 
557    /* fill in inputs_read, SamplersUsed, TexturesUsed */
558    for (pass = 0; pass < atifs->NumPasses; pass++) {
559       for (r = 0; r < MAX_NUM_FRAGMENT_REGISTERS_ATI; r++) {
560          struct atifs_setupinst *texinst = &atifs->SetupInst[pass][r];
561          GLuint pass_tex = texinst->src;
562 
563          if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) {
564             /* mark which texcoords are used */
565             prog->info.inputs_read |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + pass_tex - GL_TEXTURE0_ARB);
566             /* by default there is 1:1 mapping between samplers and textures */
567             prog->SamplersUsed |= (1 << r);
568             /* the target is unknown here, it will be fixed in the draw call */
569             prog->TexturesUsed[r] = TEXTURE_2D_BIT;
570          } else if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
571             if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
572                prog->info.inputs_read |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + pass_tex - GL_TEXTURE0_ARB);
573             }
574          }
575       }
576    }
577    for (pass = 0; pass < atifs->NumPasses; pass++) {
578       for (i = 0; i < atifs->numArithInstr[pass]; i++) {
579          struct atifs_instruction *inst = &atifs->Instructions[pass][i];
580 
581          for (optype = 0; optype < 2; optype++) { /* color, alpha */
582             if (inst->Opcode[optype]) {
583                for (arg = 0; arg < inst->ArgCount[optype]; arg++) {
584                   GLint index = inst->SrcReg[optype][arg].Index;
585                   if (index == GL_PRIMARY_COLOR_EXT) {
586                      prog->info.inputs_read |= BITFIELD64_BIT(VARYING_SLOT_COL0);
587                   } else if (index == GL_SECONDARY_INTERPOLATOR_ATI) {
588                      /* note: ATI_fragment_shader.txt never specifies what
589                       * GL_SECONDARY_INTERPOLATOR_ATI is, swrast uses
590                       * VARYING_SLOT_COL1 for this input */
591                      prog->info.inputs_read |= BITFIELD64_BIT(VARYING_SLOT_COL1);
592                   }
593                }
594             }
595          }
596       }
597    }
598    /* we may need fog */
599    prog->info.inputs_read |= BITFIELD64_BIT(VARYING_SLOT_FOGC);
600 
601    /* we always have the ATI_fs constants, and the fog params */
602    for (i = 0; i < MAX_NUM_FRAGMENT_CONSTANTS_ATI; i++) {
603       _mesa_add_parameter(prog->Parameters, PROGRAM_UNIFORM,
604                           NULL, 4, GL_FLOAT, NULL, NULL);
605    }
606    _mesa_add_state_reference(prog->Parameters, fog_params_state);
607    _mesa_add_state_reference(prog->Parameters, fog_color);
608 
609    prog->arb.NumInstructions = 0;
610    prog->arb.NumTemporaries = MAX_NUM_FRAGMENT_REGISTERS_ATI + 3; /* 3 input temps for arith ops */
611    prog->arb.NumParameters = MAX_NUM_FRAGMENT_CONSTANTS_ATI + 2; /* 2 state variables for fog */
612 }
613 
614 
615 struct tgsi_atifs_transform {
616    struct tgsi_transform_context base;
617    struct tgsi_shader_info info;
618    const struct st_fp_variant_key *key;
619    bool first_instruction_emitted;
620    unsigned fog_factor_temp;
621    unsigned fog_clamp_imm;
622 };
623 
624 static inline struct tgsi_atifs_transform *
tgsi_atifs_transform(struct tgsi_transform_context * tctx)625 tgsi_atifs_transform(struct tgsi_transform_context *tctx)
626 {
627    return (struct tgsi_atifs_transform *)tctx;
628 }
629 
630 /* copied from st_cb_drawpixels_shader.c */
631 static void
set_src(struct tgsi_full_instruction * inst,unsigned i,unsigned file,unsigned index,unsigned x,unsigned y,unsigned z,unsigned w)632 set_src(struct tgsi_full_instruction *inst, unsigned i, unsigned file, unsigned index,
633         unsigned x, unsigned y, unsigned z, unsigned w)
634 {
635    inst->Src[i].Register.File  = file;
636    inst->Src[i].Register.Index = index;
637    inst->Src[i].Register.SwizzleX = x;
638    inst->Src[i].Register.SwizzleY = y;
639    inst->Src[i].Register.SwizzleZ = z;
640    inst->Src[i].Register.SwizzleW = w;
641 }
642 
643 #define SET_SRC(inst, i, file, index, x, y, z, w) \
644    set_src(inst, i, file, index, TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, \
645            TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w)
646 
647 static void
transform_decl(struct tgsi_transform_context * tctx,struct tgsi_full_declaration * decl)648 transform_decl(struct tgsi_transform_context *tctx,
649                struct tgsi_full_declaration *decl)
650 {
651    struct tgsi_atifs_transform *ctx = tgsi_atifs_transform(tctx);
652 
653    if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
654       /* fix texture target */
655       unsigned newtarget = ctx->key->texture_targets[decl->Range.First];
656       if (newtarget)
657          decl->SamplerView.Resource = newtarget;
658    }
659 
660    tctx->emit_declaration(tctx, decl);
661 }
662 
663 static void
transform_instr(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * current_inst)664 transform_instr(struct tgsi_transform_context *tctx,
665                 struct tgsi_full_instruction *current_inst)
666 {
667    struct tgsi_atifs_transform *ctx = tgsi_atifs_transform(tctx);
668 
669    if (ctx->first_instruction_emitted)
670       goto transform_inst;
671 
672    ctx->first_instruction_emitted = true;
673 
674    if (ctx->key->fog) {
675       /* add a new temp for the fog factor */
676       ctx->fog_factor_temp = ctx->info.file_max[TGSI_FILE_TEMPORARY] + 1;
677       tgsi_transform_temp_decl(tctx, ctx->fog_factor_temp);
678 
679       /* add immediates for clamp */
680       ctx->fog_clamp_imm = ctx->info.immediate_count;
681       tgsi_transform_immediate_decl(tctx, 1.0f, 0.0f, 0.0f, 0.0f);
682    }
683 
684 transform_inst:
685    if (current_inst->Instruction.Opcode == TGSI_OPCODE_TEX) {
686       /* fix texture target */
687       unsigned newtarget = ctx->key->texture_targets[current_inst->Src[1].Register.Index];
688       if (newtarget)
689          current_inst->Texture.Texture = newtarget;
690 
691    } else if (ctx->key->fog && current_inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
692               current_inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
693       struct tgsi_full_instruction inst;
694       unsigned i;
695       int fogc_index = -1;
696       int reg0_index = current_inst->Src[0].Register.Index;
697 
698       /* find FOGC input */
699       for (i = 0; i < ctx->info.num_inputs; i++) {
700          if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FOG) {
701             fogc_index = i;
702             break;
703          }
704       }
705       if (fogc_index < 0) {
706          /* should never be reached, because fog coord input is always declared */
707          tctx->emit_instruction(tctx, current_inst);
708          return;
709       }
710 
711       /* compute the 1 component fog factor f */
712       if (ctx->key->fog == 1) {
713          /* LINEAR formula: f = (end - z) / (end - start)
714           * with optimized parameters:
715           *    f = MAD(fogcoord, oparams.x, oparams.y)
716           */
717          inst = tgsi_default_full_instruction();
718          inst.Instruction.Opcode = TGSI_OPCODE_MAD;
719          inst.Instruction.NumDstRegs = 1;
720          inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
721          inst.Dst[0].Register.Index = ctx->fog_factor_temp;
722          inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
723          inst.Instruction.NumSrcRegs = 3;
724          SET_SRC(&inst, 0, TGSI_FILE_INPUT, fogc_index, X, Y, Z, W);
725          SET_SRC(&inst, 1, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, X, X, X, X);
726          SET_SRC(&inst, 2, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, Y, Y, Y, Y);
727          tctx->emit_instruction(tctx, &inst);
728       } else if (ctx->key->fog == 2) {
729          /* EXP formula: f = exp(-dens * z)
730           * with optimized parameters:
731           *    f = MUL(fogcoord, oparams.z); f= EX2(-f)
732           */
733          inst = tgsi_default_full_instruction();
734          inst.Instruction.Opcode = TGSI_OPCODE_MUL;
735          inst.Instruction.NumDstRegs = 1;
736          inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
737          inst.Dst[0].Register.Index = ctx->fog_factor_temp;
738          inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
739          inst.Instruction.NumSrcRegs = 2;
740          SET_SRC(&inst, 0, TGSI_FILE_INPUT, fogc_index, X, Y, Z, W);
741          SET_SRC(&inst, 1, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, Z, Z, Z, Z);
742          tctx->emit_instruction(tctx, &inst);
743 
744          inst = tgsi_default_full_instruction();
745          inst.Instruction.Opcode = TGSI_OPCODE_EX2;
746          inst.Instruction.NumDstRegs = 1;
747          inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
748          inst.Dst[0].Register.Index = ctx->fog_factor_temp;
749          inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
750          inst.Instruction.NumSrcRegs = 1;
751          SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W);
752          inst.Src[0].Register.Negate = 1;
753          tctx->emit_instruction(tctx, &inst);
754       } else if (ctx->key->fog == 3) {
755          /* EXP2 formula: f = exp(-(dens * z)^2)
756           * with optimized parameters:
757           *    f = MUL(fogcoord, oparams.w); f=MUL(f, f); f= EX2(-f)
758           */
759          inst = tgsi_default_full_instruction();
760          inst.Instruction.Opcode = TGSI_OPCODE_MUL;
761          inst.Instruction.NumDstRegs = 1;
762          inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
763          inst.Dst[0].Register.Index = ctx->fog_factor_temp;
764          inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
765          inst.Instruction.NumSrcRegs = 2;
766          SET_SRC(&inst, 0, TGSI_FILE_INPUT, fogc_index, X, Y, Z, W);
767          SET_SRC(&inst, 1, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, W, W, W, W);
768          tctx->emit_instruction(tctx, &inst);
769 
770          inst = tgsi_default_full_instruction();
771          inst.Instruction.Opcode = TGSI_OPCODE_MUL;
772          inst.Instruction.NumDstRegs = 1;
773          inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
774          inst.Dst[0].Register.Index = ctx->fog_factor_temp;
775          inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
776          inst.Instruction.NumSrcRegs = 2;
777          SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W);
778          SET_SRC(&inst, 1, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W);
779          tctx->emit_instruction(tctx, &inst);
780 
781          inst = tgsi_default_full_instruction();
782          inst.Instruction.Opcode = TGSI_OPCODE_EX2;
783          inst.Instruction.NumDstRegs = 1;
784          inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
785          inst.Dst[0].Register.Index = ctx->fog_factor_temp;
786          inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
787          inst.Instruction.NumSrcRegs = 1;
788          SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W);
789          inst.Src[0].Register.Negate ^= 1;
790          tctx->emit_instruction(tctx, &inst);
791       }
792       /* f = CLAMP(f, 0.0, 1.0) */
793       inst = tgsi_default_full_instruction();
794       inst.Instruction.Opcode = TGSI_OPCODE_CLAMP;
795       inst.Instruction.NumDstRegs = 1;
796       inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
797       inst.Dst[0].Register.Index = ctx->fog_factor_temp;
798       inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
799       inst.Instruction.NumSrcRegs = 3;
800       SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W);
801       SET_SRC(&inst, 1, TGSI_FILE_IMMEDIATE, ctx->fog_clamp_imm, Y, Y, Y, Y); // 0.0
802       SET_SRC(&inst, 2, TGSI_FILE_IMMEDIATE, ctx->fog_clamp_imm, X, X, X, X); // 1.0
803       tctx->emit_instruction(tctx, &inst);
804 
805       /* REG0 = LRP(f, REG0, fogcolor) */
806       inst = tgsi_default_full_instruction();
807       inst.Instruction.Opcode = TGSI_OPCODE_LRP;
808       inst.Instruction.NumDstRegs = 1;
809       inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
810       inst.Dst[0].Register.Index = reg0_index;
811       inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
812       inst.Instruction.NumSrcRegs = 3;
813       SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, X, X, Y);
814       SET_SRC(&inst, 1, TGSI_FILE_TEMPORARY, reg0_index, X, Y, Z, W);
815       SET_SRC(&inst, 2, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI + 1, X, Y, Z, W);
816       tctx->emit_instruction(tctx, &inst);
817    }
818 
819    tctx->emit_instruction(tctx, current_inst);
820 }
821 
822 /*
823  * A post-process step in the draw call to fix texture targets and
824  * insert code for fog.
825  */
826 const struct tgsi_token *
st_fixup_atifs(const struct tgsi_token * tokens,const struct st_fp_variant_key * key)827 st_fixup_atifs(const struct tgsi_token *tokens,
828                const struct st_fp_variant_key *key)
829 {
830    struct tgsi_atifs_transform ctx;
831    struct tgsi_token *newtoks;
832    int newlen;
833 
834    memset(&ctx, 0, sizeof(ctx));
835    ctx.base.transform_declaration = transform_decl;
836    ctx.base.transform_instruction = transform_instr;
837    ctx.key = key;
838    tgsi_scan_shader(tokens, &ctx.info);
839 
840    newlen = tgsi_num_tokens(tokens) + 30;
841    newtoks = tgsi_alloc_tokens(newlen);
842    if (!newtoks)
843       return NULL;
844 
845    tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
846    return newtoks;
847 }
848 
849