• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3  * Copyright 2013 Christoph Bumiller
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * on the rights to use, copy, modify, merge, publish, distribute, sub
9  * license, and/or sell copies of the Software, and to permit persons to whom
10  * the Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23 
24 #include "nine_shader.h"
25 
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29 #include "vertexdeclaration9.h"
30 
31 #include "util/macros.h"
32 #include "util/u_memory.h"
33 #include "util/u_inlines.h"
34 #include "pipe/p_shader_tokens.h"
35 #include "tgsi/tgsi_ureg.h"
36 #include "tgsi/tgsi_dump.h"
37 
38 #define DBG_CHANNEL DBG_SHADER
39 
40 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
41 
42 
43 struct shader_translator;
44 
45 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
46 
47 static inline const char *d3dsio_to_string(unsigned opcode);
48 
49 
50 #define NINED3D_SM1_VS 0xfffe
51 #define NINED3D_SM1_PS 0xffff
52 
53 #define NINE_MAX_COND_DEPTH 64
54 #define NINE_MAX_LOOP_DEPTH 64
55 
56 #define NINED3DSP_END 0x0000ffff
57 
58 #define NINED3DSPTYPE_FLOAT4  0
59 #define NINED3DSPTYPE_INT4    1
60 #define NINED3DSPTYPE_BOOL    2
61 
62 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
63 
64 #define NINED3DSP_WRITEMASK_MASK  D3DSP_WRITEMASK_ALL
65 #define NINED3DSP_WRITEMASK_SHIFT 16
66 
67 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
68 
69 #define NINED3DSHADER_REL_OP_GT 1
70 #define NINED3DSHADER_REL_OP_EQ 2
71 #define NINED3DSHADER_REL_OP_GE 3
72 #define NINED3DSHADER_REL_OP_LT 4
73 #define NINED3DSHADER_REL_OP_NE 5
74 #define NINED3DSHADER_REL_OP_LE 6
75 
76 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
77 #define NINED3DSIO_OPCODE_FLAGS_MASK  (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
78 
79 #define NINED3DSI_TEXLD_PROJECT 0x1
80 #define NINED3DSI_TEXLD_BIAS    0x2
81 
82 #define NINED3DSP_WRITEMASK_0   0x1
83 #define NINED3DSP_WRITEMASK_1   0x2
84 #define NINED3DSP_WRITEMASK_2   0x4
85 #define NINED3DSP_WRITEMASK_3   0x8
86 #define NINED3DSP_WRITEMASK_ALL 0xf
87 
88 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
89 
90 #define NINE_SWIZZLE4(x,y,z,w) \
91    TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
92 
93 #define NINE_CONSTANT_SRC(index) \
94    ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, index), 0)
95 
96 #define NINE_APPLY_SWIZZLE(src, s) \
97    ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
98 
99 #define NINE_CONSTANT_SRC_SWIZZLE(index, s) \
100    NINE_APPLY_SWIZZLE(NINE_CONSTANT_SRC(index), s)
101 
102 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
103 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
104 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
105 
106 /*
107  * NEG     all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
108  * BIAS    <= PS 1.4 (x-0.5)
109  * BIASNEG <= PS 1.4 (-(x-0.5))
110  * SIGN    <= PS 1.4 (2(x-0.5))
111  * SIGNNEG <= PS 1.4 (-2(x-0.5))
112  * COMP    <= PS 1.4 (1-x)
113  * X2       = PS 1.4 (2x)
114  * X2NEG    = PS 1.4 (-2x)
115  * DZ      <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
116  * DW      <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
117  * ABS     >= SM 3.0 (abs(x))
118  * ABSNEG  >= SM 3.0 (-abs(x))
119  * NOT     >= SM 2.0 pedication only
120  */
121 #define NINED3DSPSM_NONE    (D3DSPSM_NONE    >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_NEG     (D3DSPSM_NEG     >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_BIAS    (D3DSPSM_BIAS    >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
125 #define NINED3DSPSM_SIGN    (D3DSPSM_SIGN    >> D3DSP_SRCMOD_SHIFT)
126 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
127 #define NINED3DSPSM_COMP    (D3DSPSM_COMP    >> D3DSP_SRCMOD_SHIFT)
128 #define NINED3DSPSM_X2      (D3DSPSM_X2      >> D3DSP_SRCMOD_SHIFT)
129 #define NINED3DSPSM_X2NEG   (D3DSPSM_X2NEG   >> D3DSP_SRCMOD_SHIFT)
130 #define NINED3DSPSM_DZ      (D3DSPSM_DZ      >> D3DSP_SRCMOD_SHIFT)
131 #define NINED3DSPSM_DW      (D3DSPSM_DW      >> D3DSP_SRCMOD_SHIFT)
132 #define NINED3DSPSM_ABS     (D3DSPSM_ABS     >> D3DSP_SRCMOD_SHIFT)
133 #define NINED3DSPSM_ABSNEG  (D3DSPSM_ABSNEG  >> D3DSP_SRCMOD_SHIFT)
134 #define NINED3DSPSM_NOT     (D3DSPSM_NOT     >> D3DSP_SRCMOD_SHIFT)
135 
136 static const char *sm1_mod_str[] =
137 {
138     [NINED3DSPSM_NONE] = "",
139     [NINED3DSPSM_NEG] = "-",
140     [NINED3DSPSM_BIAS] = "bias",
141     [NINED3DSPSM_BIASNEG] = "biasneg",
142     [NINED3DSPSM_SIGN] = "sign",
143     [NINED3DSPSM_SIGNNEG] = "signneg",
144     [NINED3DSPSM_COMP] = "comp",
145     [NINED3DSPSM_X2] = "x2",
146     [NINED3DSPSM_X2NEG] = "x2neg",
147     [NINED3DSPSM_DZ] = "dz",
148     [NINED3DSPSM_DW] = "dw",
149     [NINED3DSPSM_ABS] = "abs",
150     [NINED3DSPSM_ABSNEG] = "-abs",
151     [NINED3DSPSM_NOT] = "not"
152 };
153 
154 static void
sm1_dump_writemask(BYTE mask)155 sm1_dump_writemask(BYTE mask)
156 {
157     if (mask & 1) DUMP("x"); else DUMP("_");
158     if (mask & 2) DUMP("y"); else DUMP("_");
159     if (mask & 4) DUMP("z"); else DUMP("_");
160     if (mask & 8) DUMP("w"); else DUMP("_");
161 }
162 
163 static void
sm1_dump_swizzle(BYTE s)164 sm1_dump_swizzle(BYTE s)
165 {
166     char c[4] = { 'x', 'y', 'z', 'w' };
167     DUMP("%c%c%c%c",
168          c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
169 }
170 
171 static const char sm1_file_char[] =
172 {
173     [D3DSPR_TEMP] = 'r',
174     [D3DSPR_INPUT] = 'v',
175     [D3DSPR_CONST] = 'c',
176     [D3DSPR_ADDR] = 'A',
177     [D3DSPR_RASTOUT] = 'R',
178     [D3DSPR_ATTROUT] = 'D',
179     [D3DSPR_OUTPUT] = 'o',
180     [D3DSPR_CONSTINT] = 'I',
181     [D3DSPR_COLOROUT] = 'C',
182     [D3DSPR_DEPTHOUT] = 'D',
183     [D3DSPR_SAMPLER] = 's',
184     [D3DSPR_CONST2] = 'c',
185     [D3DSPR_CONST3] = 'c',
186     [D3DSPR_CONST4] = 'c',
187     [D3DSPR_CONSTBOOL] = 'B',
188     [D3DSPR_LOOP] = 'L',
189     [D3DSPR_TEMPFLOAT16] = 'h',
190     [D3DSPR_MISCTYPE] = 'M',
191     [D3DSPR_LABEL] = 'X',
192     [D3DSPR_PREDICATE] = 'p'
193 };
194 
195 static void
sm1_dump_reg(BYTE file,INT index)196 sm1_dump_reg(BYTE file, INT index)
197 {
198     switch (file) {
199     case D3DSPR_LOOP:
200         DUMP("aL");
201         break;
202     case D3DSPR_COLOROUT:
203         DUMP("oC%i", index);
204         break;
205     case D3DSPR_DEPTHOUT:
206         DUMP("oDepth");
207         break;
208     case D3DSPR_RASTOUT:
209         DUMP("oRast%i", index);
210         break;
211     case D3DSPR_CONSTINT:
212         DUMP("iconst[%i]", index);
213         break;
214     case D3DSPR_CONSTBOOL:
215         DUMP("bconst[%i]", index);
216         break;
217     default:
218         DUMP("%c%i", sm1_file_char[file], index);
219         break;
220     }
221 }
222 
223 struct sm1_src_param
224 {
225     INT idx;
226     struct sm1_src_param *rel;
227     BYTE file;
228     BYTE swizzle;
229     BYTE mod;
230     BYTE type;
231     union {
232         DWORD d[4];
233         float f[4];
234         int i[4];
235         BOOL b;
236     } imm;
237 };
238 static void
239 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
240 
241 struct sm1_dst_param
242 {
243     INT idx;
244     struct sm1_src_param *rel;
245     BYTE file;
246     BYTE mask;
247     BYTE mod;
248     int8_t shift; /* sint4 */
249     BYTE type;
250 };
251 
252 static inline void
assert_replicate_swizzle(const struct ureg_src * reg)253 assert_replicate_swizzle(const struct ureg_src *reg)
254 {
255     assert(reg->SwizzleY == reg->SwizzleX &&
256            reg->SwizzleZ == reg->SwizzleX &&
257            reg->SwizzleW == reg->SwizzleX);
258 }
259 
260 static void
sm1_dump_immediate(const struct sm1_src_param * param)261 sm1_dump_immediate(const struct sm1_src_param *param)
262 {
263     switch (param->type) {
264     case NINED3DSPTYPE_FLOAT4:
265         DUMP("{ %f %f %f %f }",
266              param->imm.f[0], param->imm.f[1],
267              param->imm.f[2], param->imm.f[3]);
268         break;
269     case NINED3DSPTYPE_INT4:
270         DUMP("{ %i %i %i %i }",
271              param->imm.i[0], param->imm.i[1],
272              param->imm.i[2], param->imm.i[3]);
273         break;
274     case NINED3DSPTYPE_BOOL:
275         DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
276         break;
277     default:
278         assert(0);
279         break;
280     }
281 }
282 
283 static void
sm1_dump_src_param(const struct sm1_src_param * param)284 sm1_dump_src_param(const struct sm1_src_param *param)
285 {
286     if (param->file == NINED3DSPR_IMMEDIATE) {
287         assert(!param->mod &&
288                !param->rel &&
289                param->swizzle == NINED3DSP_NOSWIZZLE);
290         sm1_dump_immediate(param);
291         return;
292     }
293 
294     if (param->mod)
295         DUMP("%s(", sm1_mod_str[param->mod]);
296     if (param->rel) {
297         DUMP("%c[", sm1_file_char[param->file]);
298         sm1_dump_src_param(param->rel);
299         DUMP("+%i]", param->idx);
300     } else {
301         sm1_dump_reg(param->file, param->idx);
302     }
303     if (param->mod)
304        DUMP(")");
305     if (param->swizzle != NINED3DSP_NOSWIZZLE) {
306        DUMP(".");
307        sm1_dump_swizzle(param->swizzle);
308     }
309 }
310 
311 static void
sm1_dump_dst_param(const struct sm1_dst_param * param)312 sm1_dump_dst_param(const struct sm1_dst_param *param)
313 {
314    if (param->mod & NINED3DSPDM_SATURATE)
315       DUMP("sat ");
316    if (param->mod & NINED3DSPDM_PARTIALP)
317       DUMP("pp ");
318    if (param->mod & NINED3DSPDM_CENTROID)
319       DUMP("centroid ");
320    if (param->shift < 0)
321       DUMP("/%u ", 1 << -param->shift);
322    if (param->shift > 0)
323       DUMP("*%u ", 1 << param->shift);
324 
325    if (param->rel) {
326       DUMP("%c[", sm1_file_char[param->file]);
327       sm1_dump_src_param(param->rel);
328       DUMP("+%i]", param->idx);
329    } else {
330       sm1_dump_reg(param->file, param->idx);
331    }
332    if (param->mask != NINED3DSP_WRITEMASK_ALL) {
333       DUMP(".");
334       sm1_dump_writemask(param->mask);
335    }
336 }
337 
338 struct sm1_semantic
339 {
340    struct sm1_dst_param reg;
341    BYTE sampler_type;
342    D3DDECLUSAGE usage;
343    BYTE usage_idx;
344 };
345 
346 struct sm1_op_info
347 {
348     /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
349      * should be ignored completely */
350     unsigned sio;
351     unsigned opcode; /* TGSI_OPCODE_x */
352 
353     /* versions are still set even handler is set */
354     struct {
355         unsigned min;
356         unsigned max;
357     } vert_version, frag_version;
358 
359     /* number of regs parsed outside of special handler */
360     unsigned ndst;
361     unsigned nsrc;
362 
363     /* some instructions don't map perfectly, so use a special handler */
364     translate_instruction_func handler;
365 };
366 
367 struct sm1_instruction
368 {
369     D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
370     BYTE flags;
371     BOOL coissue;
372     BOOL predicated;
373     BYTE ndst;
374     BYTE nsrc;
375     struct sm1_src_param src[4];
376     struct sm1_src_param src_rel[4];
377     struct sm1_src_param pred;
378     struct sm1_src_param dst_rel[1];
379     struct sm1_dst_param dst[1];
380 
381     struct sm1_op_info *info;
382 };
383 
384 static void
sm1_dump_instruction(struct sm1_instruction * insn,unsigned indent)385 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
386 {
387     unsigned i;
388 
389     /* no info stored for these: */
390     if (insn->opcode == D3DSIO_DCL)
391         return;
392     for (i = 0; i < indent; ++i)
393         DUMP("  ");
394 
395     if (insn->predicated) {
396         DUMP("@");
397         sm1_dump_src_param(&insn->pred);
398         DUMP(" ");
399     }
400     DUMP("%s", d3dsio_to_string(insn->opcode));
401     if (insn->flags) {
402         switch (insn->opcode) {
403         case D3DSIO_TEX:
404             DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
405             break;
406         default:
407             DUMP("_%x", insn->flags);
408             break;
409         }
410     }
411     if (insn->coissue)
412         DUMP("_co");
413     DUMP(" ");
414 
415     for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) {
416         sm1_dump_dst_param(&insn->dst[i]);
417         DUMP(" ");
418     }
419 
420     for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) {
421         sm1_dump_src_param(&insn->src[i]);
422         DUMP(" ");
423     }
424     if (insn->opcode == D3DSIO_DEF ||
425         insn->opcode == D3DSIO_DEFI ||
426         insn->opcode == D3DSIO_DEFB)
427         sm1_dump_immediate(&insn->src[0]);
428 
429     DUMP("\n");
430 }
431 
432 struct sm1_local_const
433 {
434     INT idx;
435     struct ureg_src reg;
436     float f[4]; /* for indirect addressing of float constants */
437 };
438 
439 struct shader_translator
440 {
441     const DWORD *byte_code;
442     const DWORD *parse;
443     const DWORD *parse_next;
444 
445     struct ureg_program *ureg;
446 
447     /* shader version */
448     struct {
449         BYTE major;
450         BYTE minor;
451     } version;
452     unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */
453     unsigned num_constf_allowed;
454     unsigned num_consti_allowed;
455     unsigned num_constb_allowed;
456 
457     boolean native_integers;
458     boolean inline_subroutines;
459     boolean want_texcoord;
460     boolean shift_wpos;
461     boolean wpos_is_sysval;
462     boolean face_is_sysval_integer;
463     unsigned texcoord_sn;
464 
465     struct sm1_instruction insn; /* current instruction */
466 
467     struct {
468         struct ureg_dst *r;
469         struct ureg_dst oPos;
470         struct ureg_dst oPos_out; /* the real output when doing streamout */
471         struct ureg_dst oFog;
472         struct ureg_dst oPts;
473         struct ureg_dst oCol[4];
474         struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
475         struct ureg_dst oDepth;
476         struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
477         struct ureg_src v_consecutive; /* copy in temp array of ps inputs for rel addressing */
478         struct ureg_src vPos;
479         struct ureg_src vFace;
480         struct ureg_src s;
481         struct ureg_dst p;
482         struct ureg_dst address;
483         struct ureg_dst a0;
484         struct ureg_dst tS[8]; /* texture stage registers */
485         struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
486         struct ureg_dst t[5]; /* scratch TEMPs */
487         struct ureg_src vC[2]; /* PS color in */
488         struct ureg_src vT[8]; /* PS texcoord in */
489         struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
490     } regs;
491     unsigned num_temp; /* ARRAY_SIZE(regs.r) */
492     unsigned num_scratch;
493     unsigned loop_depth;
494     unsigned loop_depth_max;
495     unsigned cond_depth;
496     unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
497     unsigned cond_labels[NINE_MAX_COND_DEPTH];
498     boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
499 
500     unsigned *inst_labels; /* LABEL op */
501     unsigned num_inst_labels;
502 
503     unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
504 
505     struct sm1_local_const *lconstf;
506     unsigned num_lconstf;
507     struct sm1_local_const *lconsti;
508     unsigned num_lconsti;
509     struct sm1_local_const *lconstb;
510     unsigned num_lconstb;
511 
512     boolean indirect_const_access;
513     boolean failure;
514 
515     struct nine_vs_output_info output_info[16];
516     int num_outputs;
517 
518     struct nine_shader_info *info;
519 
520     int16_t op_info_map[D3DSIO_BREAKP + 1];
521 };
522 
523 #define IS_VS (tx->processor == PIPE_SHADER_VERTEX)
524 #define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT)
525 
526 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
527 
528 static void
529 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
530 
531 static void
sm1_instruction_check(const struct sm1_instruction * insn)532 sm1_instruction_check(const struct sm1_instruction *insn)
533 {
534     if (insn->opcode == D3DSIO_CRS)
535     {
536         if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
537         {
538             DBG("CRS.mask.w\n");
539         }
540     }
541 }
542 
543 static void
nine_record_outputs(struct shader_translator * tx,BYTE Usage,BYTE UsageIndex,int mask,int output_index)544 nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex,
545                     int mask, int output_index)
546 {
547     tx->output_info[tx->num_outputs].output_semantic = Usage;
548     tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex;
549     tx->output_info[tx->num_outputs].mask = mask;
550     tx->output_info[tx->num_outputs].output_index = output_index;
551     tx->num_outputs++;
552 }
553 
554 static boolean
tx_lconstf(struct shader_translator * tx,struct ureg_src * src,INT index)555 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
556 {
557    INT i;
558 
559    if (index < 0 || index >= tx->num_constf_allowed) {
560        tx->failure = TRUE;
561        return FALSE;
562    }
563    for (i = 0; i < tx->num_lconstf; ++i) {
564       if (tx->lconstf[i].idx == index) {
565          *src = tx->lconstf[i].reg;
566          return TRUE;
567       }
568    }
569    return FALSE;
570 }
571 static boolean
tx_lconsti(struct shader_translator * tx,struct ureg_src * src,INT index)572 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
573 {
574    int i;
575 
576    if (index < 0 || index >= tx->num_consti_allowed) {
577        tx->failure = TRUE;
578        return FALSE;
579    }
580    for (i = 0; i < tx->num_lconsti; ++i) {
581       if (tx->lconsti[i].idx == index) {
582          *src = tx->lconsti[i].reg;
583          return TRUE;
584       }
585    }
586    return FALSE;
587 }
588 static boolean
tx_lconstb(struct shader_translator * tx,struct ureg_src * src,INT index)589 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
590 {
591    int i;
592 
593    if (index < 0 || index >= tx->num_constb_allowed) {
594        tx->failure = TRUE;
595        return FALSE;
596    }
597    for (i = 0; i < tx->num_lconstb; ++i) {
598       if (tx->lconstb[i].idx == index) {
599          *src = tx->lconstb[i].reg;
600          return TRUE;
601       }
602    }
603    return FALSE;
604 }
605 
606 static void
tx_set_lconstf(struct shader_translator * tx,INT index,float f[4])607 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
608 {
609     unsigned n;
610 
611     FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
612 
613     for (n = 0; n < tx->num_lconstf; ++n)
614         if (tx->lconstf[n].idx == index)
615             break;
616     if (n == tx->num_lconstf) {
617        if ((n % 8) == 0) {
618           tx->lconstf = REALLOC(tx->lconstf,
619                                 (n + 0) * sizeof(tx->lconstf[0]),
620                                 (n + 8) * sizeof(tx->lconstf[0]));
621           assert(tx->lconstf);
622        }
623        tx->num_lconstf++;
624     }
625     tx->lconstf[n].idx = index;
626     tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
627 
628     memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f));
629 }
630 static void
tx_set_lconsti(struct shader_translator * tx,INT index,int i[4])631 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
632 {
633     unsigned n;
634 
635     FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
636 
637     for (n = 0; n < tx->num_lconsti; ++n)
638         if (tx->lconsti[n].idx == index)
639             break;
640     if (n == tx->num_lconsti) {
641        if ((n % 8) == 0) {
642           tx->lconsti = REALLOC(tx->lconsti,
643                                 (n + 0) * sizeof(tx->lconsti[0]),
644                                 (n + 8) * sizeof(tx->lconsti[0]));
645           assert(tx->lconsti);
646        }
647        tx->num_lconsti++;
648     }
649 
650     tx->lconsti[n].idx = index;
651     tx->lconsti[n].reg = tx->native_integers ?
652        ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
653        ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
654 }
655 static void
tx_set_lconstb(struct shader_translator * tx,INT index,BOOL b)656 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
657 {
658     unsigned n;
659 
660     FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
661 
662     for (n = 0; n < tx->num_lconstb; ++n)
663         if (tx->lconstb[n].idx == index)
664             break;
665     if (n == tx->num_lconstb) {
666        if ((n % 8) == 0) {
667           tx->lconstb = REALLOC(tx->lconstb,
668                                 (n + 0) * sizeof(tx->lconstb[0]),
669                                 (n + 8) * sizeof(tx->lconstb[0]));
670           assert(tx->lconstb);
671        }
672        tx->num_lconstb++;
673     }
674 
675     tx->lconstb[n].idx = index;
676     tx->lconstb[n].reg = tx->native_integers ?
677        ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
678        ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
679 }
680 
681 static inline struct ureg_dst
tx_scratch(struct shader_translator * tx)682 tx_scratch(struct shader_translator *tx)
683 {
684     if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) {
685         tx->failure = TRUE;
686         return tx->regs.t[0];
687     }
688     if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
689         tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
690     return tx->regs.t[tx->num_scratch++];
691 }
692 
693 static inline struct ureg_dst
tx_scratch_scalar(struct shader_translator * tx)694 tx_scratch_scalar(struct shader_translator *tx)
695 {
696     return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
697 }
698 
699 static inline struct ureg_src
tx_src_scalar(struct ureg_dst dst)700 tx_src_scalar(struct ureg_dst dst)
701 {
702     struct ureg_src src = ureg_src(dst);
703     int c = ffs(dst.WriteMask) - 1;
704     if (dst.WriteMask == (1 << c))
705         src = ureg_scalar(src, c);
706     return src;
707 }
708 
709 static inline void
tx_temp_alloc(struct shader_translator * tx,INT idx)710 tx_temp_alloc(struct shader_translator *tx, INT idx)
711 {
712     assert(idx >= 0);
713     if (idx >= tx->num_temp) {
714        unsigned k = tx->num_temp;
715        unsigned n = idx + 1;
716        tx->regs.r = REALLOC(tx->regs.r,
717                             k * sizeof(tx->regs.r[0]),
718                             n * sizeof(tx->regs.r[0]));
719        for (; k < n; ++k)
720           tx->regs.r[k] = ureg_dst_undef();
721        tx->num_temp = n;
722     }
723     if (ureg_dst_is_undef(tx->regs.r[idx]))
724         tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
725 }
726 
727 static inline void
tx_addr_alloc(struct shader_translator * tx,INT idx)728 tx_addr_alloc(struct shader_translator *tx, INT idx)
729 {
730     assert(idx == 0);
731     if (ureg_dst_is_undef(tx->regs.address))
732         tx->regs.address = ureg_DECL_address(tx->ureg);
733     if (ureg_dst_is_undef(tx->regs.a0))
734         tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
735 }
736 
737 /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
738  * the projection should be applied on the texture. It doesn't
739  * apply on texkill.
740  * The doc is very imprecise here (it says the projection is done
741  * before rasterization, thus in vs, which seems wrong since ps instructions
742  * are affected differently)
743  * For now we only apply to the ps TEX instruction and TEXBEM.
744  * Perhaps some other instructions would need it */
745 static inline void
apply_ps1x_projection(struct shader_translator * tx,struct ureg_dst dst,struct ureg_src src,INT idx)746 apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
747                       struct ureg_src src, INT idx)
748 {
749     struct ureg_dst tmp;
750     unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
751 
752     /* no projection */
753     if (dim == 1) {
754         ureg_MOV(tx->ureg, dst, src);
755     } else {
756         tmp = tx_scratch_scalar(tx);
757         ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
758         ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
759     }
760 }
761 
762 static inline void
TEX_with_ps1x_projection(struct shader_translator * tx,struct ureg_dst dst,unsigned target,struct ureg_src src0,struct ureg_src src1,INT idx)763 TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
764                          unsigned target, struct ureg_src src0,
765                          struct ureg_src src1, INT idx)
766 {
767     unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
768     struct ureg_dst tmp;
769 
770     /* dim == 1: no projection
771      * Looks like must be disabled when it makes no
772      * sense according the texture dimensions
773      */
774     if (dim == 1 || dim <= target) {
775         ureg_TEX(tx->ureg, dst, target, src0, src1);
776     } else if (dim == 4) {
777         ureg_TXP(tx->ureg, dst, target, src0, src1);
778     } else {
779         tmp = tx_scratch(tx);
780         apply_ps1x_projection(tx, tmp, src0, idx);
781         ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
782     }
783 }
784 
785 static inline void
tx_texcoord_alloc(struct shader_translator * tx,INT idx)786 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
787 {
788     assert(IS_PS);
789     assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT));
790     if (ureg_src_is_undef(tx->regs.vT[idx]))
791        tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
792                                              TGSI_INTERPOLATE_PERSPECTIVE);
793 }
794 
795 static inline unsigned *
tx_bgnloop(struct shader_translator * tx)796 tx_bgnloop(struct shader_translator *tx)
797 {
798     tx->loop_depth++;
799     if (tx->loop_depth_max < tx->loop_depth)
800         tx->loop_depth_max = tx->loop_depth;
801     assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
802     return &tx->loop_labels[tx->loop_depth - 1];
803 }
804 
805 static inline unsigned *
tx_endloop(struct shader_translator * tx)806 tx_endloop(struct shader_translator *tx)
807 {
808     assert(tx->loop_depth);
809     tx->loop_depth--;
810     ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
811                      ureg_get_instruction_number(tx->ureg));
812     return &tx->loop_labels[tx->loop_depth];
813 }
814 
815 static struct ureg_dst
tx_get_loopctr(struct shader_translator * tx,boolean loop_or_rep)816 tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
817 {
818     const unsigned l = tx->loop_depth - 1;
819 
820     if (!tx->loop_depth)
821     {
822         DBG("loop counter requested outside of loop\n");
823         return ureg_dst_undef();
824     }
825 
826     if (ureg_dst_is_undef(tx->regs.rL[l])) {
827         /* loop or rep ctr creation */
828         tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
829         tx->loop_or_rep[l] = loop_or_rep;
830     }
831     /* loop - rep - endloop - endrep not allowed */
832     assert(tx->loop_or_rep[l] == loop_or_rep);
833 
834     return tx->regs.rL[l];
835 }
836 
837 static struct ureg_src
tx_get_loopal(struct shader_translator * tx)838 tx_get_loopal(struct shader_translator *tx)
839 {
840     int loop_level = tx->loop_depth - 1;
841 
842     while (loop_level >= 0) {
843         /* handle loop - rep - endrep - endloop case */
844         if (tx->loop_or_rep[loop_level])
845             /* the value is in the loop counter y component (nine implementation) */
846             return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
847         loop_level--;
848     }
849 
850     DBG("aL counter requested outside of loop\n");
851     return ureg_src_undef();
852 }
853 
854 static inline unsigned *
tx_cond(struct shader_translator * tx)855 tx_cond(struct shader_translator *tx)
856 {
857    assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
858    tx->cond_depth++;
859    return &tx->cond_labels[tx->cond_depth - 1];
860 }
861 
862 static inline unsigned *
tx_elsecond(struct shader_translator * tx)863 tx_elsecond(struct shader_translator *tx)
864 {
865    assert(tx->cond_depth);
866    return &tx->cond_labels[tx->cond_depth - 1];
867 }
868 
869 static inline void
tx_endcond(struct shader_translator * tx)870 tx_endcond(struct shader_translator *tx)
871 {
872    assert(tx->cond_depth);
873    tx->cond_depth--;
874    ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
875                     ureg_get_instruction_number(tx->ureg));
876 }
877 
878 static inline struct ureg_dst
nine_ureg_dst_register(unsigned file,int index)879 nine_ureg_dst_register(unsigned file, int index)
880 {
881     return ureg_dst(ureg_src_register(file, index));
882 }
883 
884 static inline struct ureg_src
nine_get_position_input(struct shader_translator * tx)885 nine_get_position_input(struct shader_translator *tx)
886 {
887     struct ureg_program *ureg = tx->ureg;
888 
889     if (tx->wpos_is_sysval)
890         return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
891     else
892         return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION,
893                                   0, TGSI_INTERPOLATE_LINEAR);
894 }
895 
896 static struct ureg_src
tx_src_param(struct shader_translator * tx,const struct sm1_src_param * param)897 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
898 {
899     struct ureg_program *ureg = tx->ureg;
900     struct ureg_src src;
901     struct ureg_dst tmp;
902 
903     switch (param->file)
904     {
905     case D3DSPR_TEMP:
906         assert(!param->rel);
907         tx_temp_alloc(tx, param->idx);
908         src = ureg_src(tx->regs.r[param->idx]);
909         break;
910  /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
911     case D3DSPR_ADDR:
912         assert(!param->rel);
913         if (IS_VS) {
914             assert(param->idx == 0);
915             /* the address register (vs only) must be
916              * assigned before use */
917             assert(!ureg_dst_is_undef(tx->regs.a0));
918             /* Round to lowest for vs1.1 (contrary to the doc), else
919              * round to nearest */
920             if (tx->version.major < 2 && tx->version.minor < 2)
921                 ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
922             else
923                 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
924             src = ureg_src(tx->regs.address);
925         } else {
926             if (tx->version.major < 2 && tx->version.minor < 4) {
927                 /* no subroutines, so should be defined */
928                 src = ureg_src(tx->regs.tS[param->idx]);
929             } else {
930                 tx_texcoord_alloc(tx, param->idx);
931                 src = tx->regs.vT[param->idx];
932             }
933         }
934         break;
935     case D3DSPR_INPUT:
936         if (IS_VS) {
937             src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
938         } else {
939             if (tx->version.major < 3) {
940                 assert(!param->rel);
941                 src = ureg_DECL_fs_input_cyl_centroid(
942                     ureg, TGSI_SEMANTIC_COLOR, param->idx,
943                     TGSI_INTERPOLATE_COLOR, 0,
944                     tx->info->force_color_in_centroid ?
945                       TGSI_INTERPOLATE_LOC_CENTROID : 0,
946                     0, 1);
947             } else {
948                 if(param->rel) {
949                     /* Copy all inputs (non consecutive)
950                      * to temp array (consecutive).
951                      * This is not good for performance.
952                      * A better way would be to have inputs
953                      * consecutive (would need implement alternative
954                      * way to match vs outputs and ps inputs).
955                      * However even with the better way, the temp array
956                      * copy would need to be used if some inputs
957                      * are not GENERIC or if they have different
958                      * interpolation flag. */
959                     if (ureg_src_is_undef(tx->regs.v_consecutive)) {
960                         int i;
961                         tx->regs.v_consecutive = ureg_src(ureg_DECL_array_temporary(ureg, 10, 0));
962                         for (i = 0; i < 10; i++) {
963                             if (!ureg_src_is_undef(tx->regs.v[i]))
964                                 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), tx->regs.v[i]);
965                             else
966                                 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
967                         }
968                     }
969                     src = ureg_src_array_offset(tx->regs.v_consecutive, param->idx);
970                 } else {
971                     assert(param->idx < ARRAY_SIZE(tx->regs.v));
972                     src = tx->regs.v[param->idx];
973                 }
974             }
975         }
976         break;
977     case D3DSPR_PREDICATE:
978         assert(!"D3DSPR_PREDICATE");
979         break;
980     case D3DSPR_SAMPLER:
981         assert(param->mod == NINED3DSPSM_NONE);
982         assert(param->swizzle == NINED3DSP_NOSWIZZLE);
983         assert(!param->rel);
984         src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx);
985         break;
986     case D3DSPR_CONST:
987         assert(!param->rel || IS_VS);
988         if (param->rel)
989             tx->indirect_const_access = TRUE;
990         if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
991             if (!param->rel)
992                 nine_info_mark_const_f_used(tx->info, param->idx);
993             /* vswp constant handling: we use two buffers
994              * to fit all the float constants. The special handling
995              * doesn't need to be elsewhere, because all the instructions
996              * accessing the constants directly are VS1, and swvp
997              * is VS >= 2 */
998             if (IS_VS && tx->info->swvp_on) {
999                 if (!param->rel) {
1000                     if (param->idx < 4096) {
1001                         src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
1002                         src = ureg_src_dimension(src, 0);
1003                     } else {
1004                         src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx - 4096);
1005                         src = ureg_src_dimension(src, 1);
1006                     }
1007                 } else {
1008                     src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx); /* TODO: swvp rel > 4096 */
1009                     src = ureg_src_dimension(src, 0);
1010                 }
1011             } else
1012                 src = NINE_CONSTANT_SRC(param->idx);
1013         }
1014         if (!IS_VS && tx->version.major < 2) {
1015             /* ps 1.X clamps constants */
1016             tmp = tx_scratch(tx);
1017             ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
1018             ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
1019             src = ureg_src(tmp);
1020         }
1021         break;
1022     case D3DSPR_CONST2:
1023     case D3DSPR_CONST3:
1024     case D3DSPR_CONST4:
1025         DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
1026         assert(!"CONST2/3/4");
1027         src = ureg_imm1f(ureg, 0.0f);
1028         break;
1029     case D3DSPR_CONSTINT:
1030         /* relative adressing only possible for float constants in vs */
1031         assert(!param->rel);
1032         if (!tx_lconsti(tx, &src, param->idx)) {
1033             nine_info_mark_const_i_used(tx->info, param->idx);
1034             if (IS_VS && tx->info->swvp_on) {
1035                 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
1036                 src = ureg_src_dimension(src, 2);
1037             } else
1038                 src = NINE_CONSTANT_SRC(tx->info->const_i_base + param->idx);
1039         }
1040         break;
1041     case D3DSPR_CONSTBOOL:
1042         assert(!param->rel);
1043         if (!tx_lconstb(tx, &src, param->idx)) {
1044            char r = param->idx / 4;
1045            char s = param->idx & 3;
1046            nine_info_mark_const_b_used(tx->info, param->idx);
1047            if (IS_VS && tx->info->swvp_on) {
1048                src = ureg_src_register(TGSI_FILE_CONSTANT, r);
1049                src = ureg_src_dimension(src, 3);
1050            } else
1051                src = NINE_CONSTANT_SRC(tx->info->const_b_base + r);
1052            src = ureg_swizzle(src, s, s, s, s);
1053         }
1054         break;
1055     case D3DSPR_LOOP:
1056         if (ureg_dst_is_undef(tx->regs.address))
1057             tx->regs.address = ureg_DECL_address(ureg);
1058         if (!tx->native_integers)
1059             ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
1060         else
1061             ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
1062         src = ureg_src(tx->regs.address);
1063         break;
1064     case D3DSPR_MISCTYPE:
1065         switch (param->idx) {
1066         case D3DSMO_POSITION:
1067            if (ureg_src_is_undef(tx->regs.vPos))
1068               tx->regs.vPos = nine_get_position_input(tx);
1069            if (tx->shift_wpos) {
1070                /* TODO: do this only once */
1071                struct ureg_dst wpos = tx_scratch(tx);
1072                ureg_ADD(ureg, wpos, tx->regs.vPos,
1073                         ureg_imm4f(ureg, -0.5f, -0.5f, 0.0f, 0.0f));
1074                src = ureg_src(wpos);
1075            } else {
1076                src = tx->regs.vPos;
1077            }
1078            break;
1079         case D3DSMO_FACE:
1080            if (ureg_src_is_undef(tx->regs.vFace)) {
1081                if (tx->face_is_sysval_integer) {
1082                    tmp = ureg_DECL_temporary(ureg);
1083                    tx->regs.vFace =
1084                        ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
1085 
1086                    /* convert bool to float */
1087                    ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X),
1088                              ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1));
1089                    tx->regs.vFace = ureg_src(tmp);
1090                } else {
1091                    tx->regs.vFace = ureg_DECL_fs_input(ureg,
1092                                                        TGSI_SEMANTIC_FACE, 0,
1093                                                        TGSI_INTERPOLATE_CONSTANT);
1094                }
1095                tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
1096            }
1097            src = tx->regs.vFace;
1098            break;
1099         default:
1100             assert(!"invalid src D3DSMO");
1101             break;
1102         }
1103         assert(!param->rel);
1104         break;
1105     case D3DSPR_TEMPFLOAT16:
1106         break;
1107     default:
1108         assert(!"invalid src D3DSPR");
1109     }
1110     if (param->rel)
1111         src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1112 
1113     switch (param->mod) {
1114     case NINED3DSPSM_DW:
1115         tmp = tx_scratch(tx);
1116         /* NOTE: app is not allowed to read w with this modifier */
1117         ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), ureg_scalar(src, TGSI_SWIZZLE_W));
1118         ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
1119         src = ureg_src(tmp);
1120         break;
1121     case NINED3DSPSM_DZ:
1122         tmp = tx_scratch(tx);
1123         /* NOTE: app is not allowed to read z with this modifier */
1124         ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), ureg_scalar(src, TGSI_SWIZZLE_Z));
1125         ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
1126         src = ureg_src(tmp);
1127         break;
1128     default:
1129         break;
1130     }
1131 
1132     if (param->swizzle != NINED3DSP_NOSWIZZLE)
1133         src = ureg_swizzle(src,
1134                            (param->swizzle >> 0) & 0x3,
1135                            (param->swizzle >> 2) & 0x3,
1136                            (param->swizzle >> 4) & 0x3,
1137                            (param->swizzle >> 6) & 0x3);
1138 
1139     switch (param->mod) {
1140     case NINED3DSPSM_ABS:
1141         src = ureg_abs(src);
1142         break;
1143     case NINED3DSPSM_ABSNEG:
1144         src = ureg_negate(ureg_abs(src));
1145         break;
1146     case NINED3DSPSM_NEG:
1147         src = ureg_negate(src);
1148         break;
1149     case NINED3DSPSM_BIAS:
1150         tmp = tx_scratch(tx);
1151         ureg_ADD(ureg, tmp, src, ureg_imm1f(ureg, -0.5f));
1152         src = ureg_src(tmp);
1153         break;
1154     case NINED3DSPSM_BIASNEG:
1155         tmp = tx_scratch(tx);
1156         ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 0.5f), ureg_negate(src));
1157         src = ureg_src(tmp);
1158         break;
1159     case NINED3DSPSM_NOT:
1160         if (tx->native_integers) {
1161             tmp = tx_scratch(tx);
1162             ureg_NOT(ureg, tmp, src);
1163             src = ureg_src(tmp);
1164             break;
1165         }
1166         /* fall through */
1167     case NINED3DSPSM_COMP:
1168         tmp = tx_scratch(tx);
1169         ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
1170         src = ureg_src(tmp);
1171         break;
1172     case NINED3DSPSM_DZ:
1173     case NINED3DSPSM_DW:
1174         /* Already handled*/
1175         break;
1176     case NINED3DSPSM_SIGN:
1177         tmp = tx_scratch(tx);
1178         ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1179         src = ureg_src(tmp);
1180         break;
1181     case NINED3DSPSM_SIGNNEG:
1182         tmp = tx_scratch(tx);
1183         ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
1184         src = ureg_src(tmp);
1185         break;
1186     case NINED3DSPSM_X2:
1187         tmp = tx_scratch(tx);
1188         ureg_ADD(ureg, tmp, src, src);
1189         src = ureg_src(tmp);
1190         break;
1191     case NINED3DSPSM_X2NEG:
1192         tmp = tx_scratch(tx);
1193         ureg_ADD(ureg, tmp, src, src);
1194         src = ureg_negate(ureg_src(tmp));
1195         break;
1196     default:
1197         assert(param->mod == NINED3DSPSM_NONE);
1198         break;
1199     }
1200 
1201     return src;
1202 }
1203 
1204 static struct ureg_dst
_tx_dst_param(struct shader_translator * tx,const struct sm1_dst_param * param)1205 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1206 {
1207     struct ureg_dst dst;
1208 
1209     switch (param->file)
1210     {
1211     case D3DSPR_TEMP:
1212         assert(!param->rel);
1213         tx_temp_alloc(tx, param->idx);
1214         dst = tx->regs.r[param->idx];
1215         break;
1216  /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1217     case D3DSPR_ADDR:
1218         assert(!param->rel);
1219         if (tx->version.major < 2 && !IS_VS) {
1220             if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1221                 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1222             dst = tx->regs.tS[param->idx];
1223         } else
1224         if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1225             tx_texcoord_alloc(tx, param->idx);
1226             dst = ureg_dst(tx->regs.vT[param->idx]);
1227         } else {
1228             tx_addr_alloc(tx, param->idx);
1229             dst = tx->regs.a0;
1230         }
1231         break;
1232     case D3DSPR_RASTOUT:
1233         assert(!param->rel);
1234         switch (param->idx) {
1235         case 0:
1236             if (ureg_dst_is_undef(tx->regs.oPos))
1237                 tx->regs.oPos =
1238                     ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1239             dst = tx->regs.oPos;
1240             break;
1241         case 1:
1242             if (ureg_dst_is_undef(tx->regs.oFog))
1243                 tx->regs.oFog =
1244                     ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0));
1245             dst = tx->regs.oFog;
1246             break;
1247         case 2:
1248             if (ureg_dst_is_undef(tx->regs.oPts))
1249                 tx->regs.oPts = ureg_DECL_temporary(tx->ureg);
1250             dst = tx->regs.oPts;
1251             break;
1252         default:
1253             assert(0);
1254             break;
1255         }
1256         break;
1257  /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1258     case D3DSPR_OUTPUT:
1259         if (tx->version.major < 3) {
1260             assert(!param->rel);
1261             dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1262         } else {
1263             assert(!param->rel); /* TODO */
1264             assert(param->idx < ARRAY_SIZE(tx->regs.o));
1265             dst = tx->regs.o[param->idx];
1266         }
1267         break;
1268     case D3DSPR_ATTROUT: /* VS */
1269     case D3DSPR_COLOROUT: /* PS */
1270         assert(param->idx >= 0 && param->idx < 4);
1271         assert(!param->rel);
1272         tx->info->rt_mask |= 1 << param->idx;
1273         if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
1274             /* ps < 3: oCol[0] will have fog blending afterward */
1275             if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
1276                 tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
1277             } else {
1278                 tx->regs.oCol[param->idx] =
1279                     ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1280             }
1281         }
1282         dst = tx->regs.oCol[param->idx];
1283         if (IS_VS && tx->version.major < 3)
1284             dst = ureg_saturate(dst);
1285         break;
1286     case D3DSPR_DEPTHOUT:
1287         assert(!param->rel);
1288         if (ureg_dst_is_undef(tx->regs.oDepth))
1289            tx->regs.oDepth =
1290               ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1291                                       TGSI_WRITEMASK_Z, 0, 1);
1292         dst = tx->regs.oDepth; /* XXX: must write .z component */
1293         break;
1294     case D3DSPR_PREDICATE:
1295         assert(!"D3DSPR_PREDICATE");
1296         break;
1297     case D3DSPR_TEMPFLOAT16:
1298         DBG("unhandled D3DSPR: %u\n", param->file);
1299         break;
1300     default:
1301         assert(!"invalid dst D3DSPR");
1302         break;
1303     }
1304     if (param->rel)
1305         dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1306 
1307     if (param->mask != NINED3DSP_WRITEMASK_ALL)
1308         dst = ureg_writemask(dst, param->mask);
1309     if (param->mod & NINED3DSPDM_SATURATE)
1310         dst = ureg_saturate(dst);
1311 
1312     return dst;
1313 }
1314 
1315 static struct ureg_dst
tx_dst_param(struct shader_translator * tx,const struct sm1_dst_param * param)1316 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1317 {
1318     if (param->shift) {
1319         tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1320         return tx->regs.tdst;
1321     }
1322     return _tx_dst_param(tx, param);
1323 }
1324 
1325 static void
tx_apply_dst0_modifiers(struct shader_translator * tx)1326 tx_apply_dst0_modifiers(struct shader_translator *tx)
1327 {
1328     struct ureg_dst rdst;
1329     float f;
1330 
1331     if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1332         return;
1333     rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1334 
1335     assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1336 
1337     if (tx->insn.dst[0].shift < 0)
1338         f = 1.0f / (1 << -tx->insn.dst[0].shift);
1339     else
1340         f = 1 << tx->insn.dst[0].shift;
1341 
1342     ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1343 }
1344 
1345 static struct ureg_src
tx_dst_param_as_src(struct shader_translator * tx,const struct sm1_dst_param * param)1346 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1347 {
1348     struct ureg_src src;
1349 
1350     assert(!param->shift);
1351     assert(!(param->mod & NINED3DSPDM_SATURATE));
1352 
1353     switch (param->file) {
1354     case D3DSPR_INPUT:
1355         if (IS_VS) {
1356             src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1357         } else {
1358             assert(!param->rel);
1359             assert(param->idx < ARRAY_SIZE(tx->regs.v));
1360             src = tx->regs.v[param->idx];
1361         }
1362         break;
1363     default:
1364         src = ureg_src(tx_dst_param(tx, param));
1365         break;
1366     }
1367     if (param->rel)
1368         src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1369 
1370     if (!param->mask)
1371         WARN("mask is 0, using identity swizzle\n");
1372 
1373     if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1374         char s[4];
1375         int n;
1376         int c;
1377         for (n = 0, c = 0; c < 4; ++c)
1378             if (param->mask & (1 << c))
1379                 s[n++] = c;
1380         assert(n);
1381         for (c = n; c < 4; ++c)
1382             s[c] = s[n - 1];
1383         src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1384     }
1385     return src;
1386 }
1387 
1388 static HRESULT
NineTranslateInstruction_Mkxn(struct shader_translator * tx,const unsigned k,const unsigned n)1389 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1390 {
1391     struct ureg_program *ureg = tx->ureg;
1392     struct ureg_dst dst;
1393     struct ureg_src src[2];
1394     struct sm1_src_param *src_mat = &tx->insn.src[1];
1395     unsigned i;
1396 
1397     dst = tx_dst_param(tx, &tx->insn.dst[0]);
1398     src[0] = tx_src_param(tx, &tx->insn.src[0]);
1399 
1400     for (i = 0; i < n; i++)
1401     {
1402         const unsigned m = (1 << i);
1403 
1404         src[1] = tx_src_param(tx, src_mat);
1405         src_mat->idx++;
1406 
1407         if (!(dst.WriteMask & m))
1408             continue;
1409 
1410         /* XXX: src == dst case ? */
1411 
1412         switch (k) {
1413         case 3:
1414             ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1415             break;
1416         case 4:
1417             ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1418             break;
1419         default:
1420             DBG("invalid operation: M%ux%u\n", m, n);
1421             break;
1422         }
1423     }
1424 
1425     return D3D_OK;
1426 }
1427 
1428 #define VNOTSUPPORTED   0, 0
1429 #define V(maj, min)     (((maj) << 8) | (min))
1430 
1431 static inline const char *
d3dsio_to_string(unsigned opcode)1432 d3dsio_to_string( unsigned opcode )
1433 {
1434     static const char *names[] = {
1435         "NOP",
1436         "MOV",
1437         "ADD",
1438         "SUB",
1439         "MAD",
1440         "MUL",
1441         "RCP",
1442         "RSQ",
1443         "DP3",
1444         "DP4",
1445         "MIN",
1446         "MAX",
1447         "SLT",
1448         "SGE",
1449         "EXP",
1450         "LOG",
1451         "LIT",
1452         "DST",
1453         "LRP",
1454         "FRC",
1455         "M4x4",
1456         "M4x3",
1457         "M3x4",
1458         "M3x3",
1459         "M3x2",
1460         "CALL",
1461         "CALLNZ",
1462         "LOOP",
1463         "RET",
1464         "ENDLOOP",
1465         "LABEL",
1466         "DCL",
1467         "POW",
1468         "CRS",
1469         "SGN",
1470         "ABS",
1471         "NRM",
1472         "SINCOS",
1473         "REP",
1474         "ENDREP",
1475         "IF",
1476         "IFC",
1477         "ELSE",
1478         "ENDIF",
1479         "BREAK",
1480         "BREAKC",
1481         "MOVA",
1482         "DEFB",
1483         "DEFI",
1484         NULL,
1485         NULL,
1486         NULL,
1487         NULL,
1488         NULL,
1489         NULL,
1490         NULL,
1491         NULL,
1492         NULL,
1493         NULL,
1494         NULL,
1495         NULL,
1496         NULL,
1497         NULL,
1498         NULL,
1499         "TEXCOORD",
1500         "TEXKILL",
1501         "TEX",
1502         "TEXBEM",
1503         "TEXBEML",
1504         "TEXREG2AR",
1505         "TEXREG2GB",
1506         "TEXM3x2PAD",
1507         "TEXM3x2TEX",
1508         "TEXM3x3PAD",
1509         "TEXM3x3TEX",
1510         NULL,
1511         "TEXM3x3SPEC",
1512         "TEXM3x3VSPEC",
1513         "EXPP",
1514         "LOGP",
1515         "CND",
1516         "DEF",
1517         "TEXREG2RGB",
1518         "TEXDP3TEX",
1519         "TEXM3x2DEPTH",
1520         "TEXDP3",
1521         "TEXM3x3",
1522         "TEXDEPTH",
1523         "CMP",
1524         "BEM",
1525         "DP2ADD",
1526         "DSX",
1527         "DSY",
1528         "TEXLDD",
1529         "SETP",
1530         "TEXLDL",
1531         "BREAKP"
1532     };
1533 
1534     if (opcode < ARRAY_SIZE(names)) return names[opcode];
1535 
1536     switch (opcode) {
1537     case D3DSIO_PHASE: return "PHASE";
1538     case D3DSIO_COMMENT: return "COMMENT";
1539     case D3DSIO_END: return "END";
1540     default:
1541         return NULL;
1542     }
1543 }
1544 
1545 #define NULL_INSTRUCTION            { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1546 #define IS_VALID_INSTRUCTION(inst)  ((inst).vert_version.min | \
1547                                      (inst).vert_version.max | \
1548                                      (inst).frag_version.min | \
1549                                      (inst).frag_version.max)
1550 
1551 #define SPECIAL(name) \
1552     NineTranslateInstruction_##name
1553 
1554 #define DECL_SPECIAL(name) \
1555     static HRESULT \
1556     NineTranslateInstruction_##name( struct shader_translator *tx )
1557 
1558 static HRESULT
1559 NineTranslateInstruction_Generic(struct shader_translator *);
1560 
DECL_SPECIAL(NOP)1561 DECL_SPECIAL(NOP)
1562 {
1563     /* Nothing to do. NOP was used to avoid hangs
1564      * with very old d3d drivers. */
1565     return D3D_OK;
1566 }
1567 
DECL_SPECIAL(SUB)1568 DECL_SPECIAL(SUB)
1569 {
1570     struct ureg_program *ureg = tx->ureg;
1571     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1572     struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1573     struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1574 
1575     ureg_ADD(ureg, dst, src0, ureg_negate(src1));
1576     return D3D_OK;
1577 }
1578 
DECL_SPECIAL(ABS)1579 DECL_SPECIAL(ABS)
1580 {
1581     struct ureg_program *ureg = tx->ureg;
1582     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1583     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1584 
1585     ureg_MOV(ureg, dst, ureg_abs(src));
1586     return D3D_OK;
1587 }
1588 
DECL_SPECIAL(XPD)1589 DECL_SPECIAL(XPD)
1590 {
1591     struct ureg_program *ureg = tx->ureg;
1592     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1593     struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1594     struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1595 
1596     ureg_MUL(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
1597              ureg_swizzle(src0, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
1598                           TGSI_SWIZZLE_X, 0),
1599              ureg_swizzle(src1, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
1600                           TGSI_SWIZZLE_Y, 0));
1601     ureg_MAD(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
1602              ureg_swizzle(src0, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
1603                           TGSI_SWIZZLE_Y, 0),
1604              ureg_negate(ureg_swizzle(src1, TGSI_SWIZZLE_Y,
1605                                       TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)),
1606              ureg_src(dst));
1607     ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
1608              ureg_imm1f(ureg, 1));
1609     return D3D_OK;
1610 }
1611 
DECL_SPECIAL(M4x4)1612 DECL_SPECIAL(M4x4)
1613 {
1614     return NineTranslateInstruction_Mkxn(tx, 4, 4);
1615 }
1616 
DECL_SPECIAL(M4x3)1617 DECL_SPECIAL(M4x3)
1618 {
1619     return NineTranslateInstruction_Mkxn(tx, 4, 3);
1620 }
1621 
DECL_SPECIAL(M3x4)1622 DECL_SPECIAL(M3x4)
1623 {
1624     return NineTranslateInstruction_Mkxn(tx, 3, 4);
1625 }
1626 
DECL_SPECIAL(M3x3)1627 DECL_SPECIAL(M3x3)
1628 {
1629     return NineTranslateInstruction_Mkxn(tx, 3, 3);
1630 }
1631 
DECL_SPECIAL(M3x2)1632 DECL_SPECIAL(M3x2)
1633 {
1634     return NineTranslateInstruction_Mkxn(tx, 3, 2);
1635 }
1636 
DECL_SPECIAL(CMP)1637 DECL_SPECIAL(CMP)
1638 {
1639     ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1640              tx_src_param(tx, &tx->insn.src[0]),
1641              tx_src_param(tx, &tx->insn.src[2]),
1642              tx_src_param(tx, &tx->insn.src[1]));
1643     return D3D_OK;
1644 }
1645 
DECL_SPECIAL(CND)1646 DECL_SPECIAL(CND)
1647 {
1648     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1649     struct ureg_dst cgt;
1650     struct ureg_src cnd;
1651 
1652     /* the coissue flag was a tip for compilers to advise to
1653      * execute two operations at the same time, in cases
1654      * the two executions had same dst with different channels.
1655      * It has no effect on current hw. However it seems CND
1656      * is affected. The handling of this very specific case
1657      * handled below mimick wine behaviour */
1658     if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1659         ureg_MOV(tx->ureg,
1660                  dst, tx_src_param(tx, &tx->insn.src[1]));
1661         return D3D_OK;
1662     }
1663 
1664     cnd = tx_src_param(tx, &tx->insn.src[0]);
1665     cgt = tx_scratch(tx);
1666 
1667     if (tx->version.major == 1 && tx->version.minor < 4)
1668         cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1669 
1670     ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1671 
1672     ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1673              tx_src_param(tx, &tx->insn.src[1]),
1674              tx_src_param(tx, &tx->insn.src[2]));
1675     return D3D_OK;
1676 }
1677 
DECL_SPECIAL(CALL)1678 DECL_SPECIAL(CALL)
1679 {
1680     assert(tx->insn.src[0].idx < tx->num_inst_labels);
1681     ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1682     return D3D_OK;
1683 }
1684 
DECL_SPECIAL(CALLNZ)1685 DECL_SPECIAL(CALLNZ)
1686 {
1687     struct ureg_program *ureg = tx->ureg;
1688     struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1689 
1690     if (!tx->native_integers)
1691         ureg_IF(ureg, src, tx_cond(tx));
1692     else
1693         ureg_UIF(ureg, src, tx_cond(tx));
1694     ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1695     tx_endcond(tx);
1696     ureg_ENDIF(ureg);
1697     return D3D_OK;
1698 }
1699 
DECL_SPECIAL(LOOP)1700 DECL_SPECIAL(LOOP)
1701 {
1702     struct ureg_program *ureg = tx->ureg;
1703     unsigned *label;
1704     struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1705     struct ureg_dst ctr;
1706     struct ureg_dst tmp;
1707     struct ureg_src ctrx;
1708 
1709     label = tx_bgnloop(tx);
1710     ctr = tx_get_loopctr(tx, TRUE);
1711     ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1712 
1713     /* src: num_iterations - start_value of al - step for al - 0 */
1714     ureg_MOV(ureg, ctr, src);
1715     ureg_BGNLOOP(tx->ureg, label);
1716     tmp = tx_scratch_scalar(tx);
1717     /* Initially ctr.x contains the number of iterations.
1718      * ctr.y will contain the updated value of al.
1719      * We decrease ctr.x at the end of every iteration,
1720      * and stop when it reaches 0. */
1721 
1722     if (!tx->native_integers) {
1723         /* case src and ctr contain floats */
1724         /* to avoid precision issue, we stop when ctr <= 0.5 */
1725         ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1726         ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1727     } else {
1728         /* case src and ctr contain integers */
1729         ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1730         ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1731     }
1732     ureg_BRK(ureg);
1733     tx_endcond(tx);
1734     ureg_ENDIF(ureg);
1735     return D3D_OK;
1736 }
1737 
DECL_SPECIAL(RET)1738 DECL_SPECIAL(RET)
1739 {
1740     ureg_RET(tx->ureg);
1741     return D3D_OK;
1742 }
1743 
DECL_SPECIAL(ENDLOOP)1744 DECL_SPECIAL(ENDLOOP)
1745 {
1746     struct ureg_program *ureg = tx->ureg;
1747     struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
1748     struct ureg_dst dst_ctrx, dst_al;
1749     struct ureg_src src_ctr, al_counter;
1750 
1751     dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1752     dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
1753     src_ctr = ureg_src(ctr);
1754     al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
1755 
1756     /* ctr.x -= 1
1757      * ctr.y (aL) += step */
1758     if (!tx->native_integers) {
1759         ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1760         ureg_ADD(ureg, dst_al, src_ctr, al_counter);
1761     } else {
1762         ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1763         ureg_UADD(ureg, dst_al, src_ctr, al_counter);
1764     }
1765     ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1766     return D3D_OK;
1767 }
1768 
DECL_SPECIAL(LABEL)1769 DECL_SPECIAL(LABEL)
1770 {
1771     unsigned k = tx->num_inst_labels;
1772     unsigned n = tx->insn.src[0].idx;
1773     assert(n < 2048);
1774     if (n >= k)
1775        tx->inst_labels = REALLOC(tx->inst_labels,
1776                                  k * sizeof(tx->inst_labels[0]),
1777                                  n * sizeof(tx->inst_labels[0]));
1778 
1779     tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1780     return D3D_OK;
1781 }
1782 
DECL_SPECIAL(SINCOS)1783 DECL_SPECIAL(SINCOS)
1784 {
1785     struct ureg_program *ureg = tx->ureg;
1786     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1787     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1788 
1789     assert(!(dst.WriteMask & 0xc));
1790 
1791     /* z undefined, w untouched */
1792     ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
1793              ureg_scalar(src, TGSI_SWIZZLE_X));
1794     ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
1795              ureg_scalar(src, TGSI_SWIZZLE_X));
1796     return D3D_OK;
1797 }
1798 
DECL_SPECIAL(SGN)1799 DECL_SPECIAL(SGN)
1800 {
1801     ureg_SSG(tx->ureg,
1802              tx_dst_param(tx, &tx->insn.dst[0]),
1803              tx_src_param(tx, &tx->insn.src[0]));
1804     return D3D_OK;
1805 }
1806 
DECL_SPECIAL(REP)1807 DECL_SPECIAL(REP)
1808 {
1809     struct ureg_program *ureg = tx->ureg;
1810     unsigned *label;
1811     struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1812     struct ureg_dst ctr;
1813     struct ureg_dst tmp;
1814     struct ureg_src ctrx;
1815 
1816     label = tx_bgnloop(tx);
1817     ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
1818     ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1819 
1820     /* NOTE: rep must be constant, so we don't have to save the count */
1821     assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1822 
1823     /* rep: num_iterations - 0 - 0 - 0 */
1824     ureg_MOV(ureg, ctr, rep);
1825     ureg_BGNLOOP(ureg, label);
1826     tmp = tx_scratch_scalar(tx);
1827     /* Initially ctr.x contains the number of iterations.
1828      * We decrease ctr.x at the end of every iteration,
1829      * and stop when it reaches 0. */
1830 
1831     if (!tx->native_integers) {
1832         /* case src and ctr contain floats */
1833         /* to avoid precision issue, we stop when ctr <= 0.5 */
1834         ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1835         ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1836     } else {
1837         /* case src and ctr contain integers */
1838         ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1839         ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1840     }
1841     ureg_BRK(ureg);
1842     tx_endcond(tx);
1843     ureg_ENDIF(ureg);
1844 
1845     return D3D_OK;
1846 }
1847 
DECL_SPECIAL(ENDREP)1848 DECL_SPECIAL(ENDREP)
1849 {
1850     struct ureg_program *ureg = tx->ureg;
1851     struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
1852     struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1853     struct ureg_src src_ctr = ureg_src(ctr);
1854 
1855     /* ctr.x -= 1 */
1856     if (!tx->native_integers)
1857         ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1858     else
1859         ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1860 
1861     ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1862     return D3D_OK;
1863 }
1864 
DECL_SPECIAL(ENDIF)1865 DECL_SPECIAL(ENDIF)
1866 {
1867     tx_endcond(tx);
1868     ureg_ENDIF(tx->ureg);
1869     return D3D_OK;
1870 }
1871 
DECL_SPECIAL(IF)1872 DECL_SPECIAL(IF)
1873 {
1874     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1875 
1876     if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1877         ureg_UIF(tx->ureg, src, tx_cond(tx));
1878     else
1879         ureg_IF(tx->ureg, src, tx_cond(tx));
1880 
1881     return D3D_OK;
1882 }
1883 
1884 static inline unsigned
sm1_insn_flags_to_tgsi_setop(BYTE flags)1885 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1886 {
1887     switch (flags) {
1888     case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1889     case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1890     case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1891     case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1892     case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1893     case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1894     default:
1895         assert(!"invalid comparison flags");
1896         return TGSI_OPCODE_SGT;
1897     }
1898 }
1899 
DECL_SPECIAL(IFC)1900 DECL_SPECIAL(IFC)
1901 {
1902     const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1903     struct ureg_src src[2];
1904     struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1905     src[0] = tx_src_param(tx, &tx->insn.src[0]);
1906     src[1] = tx_src_param(tx, &tx->insn.src[1]);
1907     ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
1908     ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1909     return D3D_OK;
1910 }
1911 
DECL_SPECIAL(ELSE)1912 DECL_SPECIAL(ELSE)
1913 {
1914     ureg_ELSE(tx->ureg, tx_elsecond(tx));
1915     return D3D_OK;
1916 }
1917 
DECL_SPECIAL(BREAKC)1918 DECL_SPECIAL(BREAKC)
1919 {
1920     const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1921     struct ureg_src src[2];
1922     struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1923     src[0] = tx_src_param(tx, &tx->insn.src[0]);
1924     src[1] = tx_src_param(tx, &tx->insn.src[1]);
1925     ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
1926     ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1927     ureg_BRK(tx->ureg);
1928     tx_endcond(tx);
1929     ureg_ENDIF(tx->ureg);
1930     return D3D_OK;
1931 }
1932 
1933 static const char *sm1_declusage_names[] =
1934 {
1935     [D3DDECLUSAGE_POSITION] = "POSITION",
1936     [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
1937     [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
1938     [D3DDECLUSAGE_NORMAL] = "NORMAL",
1939     [D3DDECLUSAGE_PSIZE] = "PSIZE",
1940     [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
1941     [D3DDECLUSAGE_TANGENT] = "TANGENT",
1942     [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
1943     [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
1944     [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
1945     [D3DDECLUSAGE_COLOR] = "COLOR",
1946     [D3DDECLUSAGE_FOG] = "FOG",
1947     [D3DDECLUSAGE_DEPTH] = "DEPTH",
1948     [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
1949 };
1950 
1951 static inline unsigned
sm1_to_nine_declusage(struct sm1_semantic * dcl)1952 sm1_to_nine_declusage(struct sm1_semantic *dcl)
1953 {
1954     return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
1955 }
1956 
1957 static void
sm1_declusage_to_tgsi(struct tgsi_declaration_semantic * sem,boolean tc,struct sm1_semantic * dcl)1958 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
1959                       boolean tc,
1960                       struct sm1_semantic *dcl)
1961 {
1962     BYTE index = dcl->usage_idx;
1963 
1964     /* For everything that is not matching to a TGSI_SEMANTIC_****,
1965      * we match to a TGSI_SEMANTIC_GENERIC with index.
1966      *
1967      * The index can be anything UINT16 and usage_idx is BYTE,
1968      * so we can fit everything. It doesn't matter if indices
1969      * are close together or low.
1970      *
1971      *
1972      * POSITION >= 1: 10 * index + 6
1973      * COLOR >= 2: 10 * (index-1) + 7
1974      * TEXCOORD[0..15]: index
1975      * BLENDWEIGHT: 10 * index + 18
1976      * BLENDINDICES: 10 * index + 19
1977      * NORMAL: 10 * index + 20
1978      * TANGENT: 10 * index + 21
1979      * BINORMAL: 10 * index + 22
1980      * TESSFACTOR: 10 * index + 23
1981      */
1982 
1983     switch (dcl->usage) {
1984     case D3DDECLUSAGE_POSITION:
1985     case D3DDECLUSAGE_POSITIONT:
1986     case D3DDECLUSAGE_DEPTH:
1987         if (index == 0) {
1988             sem->Name = TGSI_SEMANTIC_POSITION;
1989             sem->Index = 0;
1990         } else {
1991             sem->Name = TGSI_SEMANTIC_GENERIC;
1992             sem->Index = 10 * index + 6;
1993         }
1994         break;
1995     case D3DDECLUSAGE_COLOR:
1996         if (index < 2) {
1997             sem->Name = TGSI_SEMANTIC_COLOR;
1998             sem->Index = index;
1999         } else {
2000             sem->Name = TGSI_SEMANTIC_GENERIC;
2001             sem->Index = 10 * (index-1) + 7;
2002         }
2003         break;
2004     case D3DDECLUSAGE_FOG:
2005         assert(index == 0);
2006         sem->Name = TGSI_SEMANTIC_FOG;
2007         sem->Index = 0;
2008         break;
2009     case D3DDECLUSAGE_PSIZE:
2010         assert(index == 0);
2011         sem->Name = TGSI_SEMANTIC_PSIZE;
2012         sem->Index = 0;
2013         break;
2014     case D3DDECLUSAGE_TEXCOORD:
2015         assert(index < 16);
2016         if (index < 8 && tc)
2017             sem->Name = TGSI_SEMANTIC_TEXCOORD;
2018         else
2019             sem->Name = TGSI_SEMANTIC_GENERIC;
2020         sem->Index = index;
2021         break;
2022     case D3DDECLUSAGE_BLENDWEIGHT:
2023         sem->Name = TGSI_SEMANTIC_GENERIC;
2024         sem->Index = 10 * index + 18;
2025         break;
2026     case D3DDECLUSAGE_BLENDINDICES:
2027         sem->Name = TGSI_SEMANTIC_GENERIC;
2028         sem->Index = 10 * index + 19;
2029         break;
2030     case D3DDECLUSAGE_NORMAL:
2031         sem->Name = TGSI_SEMANTIC_GENERIC;
2032         sem->Index = 10 * index + 20;
2033         break;
2034     case D3DDECLUSAGE_TANGENT:
2035         sem->Name = TGSI_SEMANTIC_GENERIC;
2036         sem->Index = 10 * index + 21;
2037         break;
2038     case D3DDECLUSAGE_BINORMAL:
2039         sem->Name = TGSI_SEMANTIC_GENERIC;
2040         sem->Index = 10 * index + 22;
2041         break;
2042     case D3DDECLUSAGE_TESSFACTOR:
2043         sem->Name = TGSI_SEMANTIC_GENERIC;
2044         sem->Index = 10 * index + 23;
2045         break;
2046     case D3DDECLUSAGE_SAMPLE:
2047         sem->Name = TGSI_SEMANTIC_COUNT;
2048         sem->Index = 0;
2049         break;
2050     default:
2051         unreachable("Invalid DECLUSAGE.");
2052         break;
2053     }
2054 }
2055 
2056 #define NINED3DSTT_1D     (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
2057 #define NINED3DSTT_2D     (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
2058 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
2059 #define NINED3DSTT_CUBE   (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
2060 static inline unsigned
d3dstt_to_tgsi_tex(BYTE sampler_type)2061 d3dstt_to_tgsi_tex(BYTE sampler_type)
2062 {
2063     switch (sampler_type) {
2064     case NINED3DSTT_1D:     return TGSI_TEXTURE_1D;
2065     case NINED3DSTT_2D:     return TGSI_TEXTURE_2D;
2066     case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
2067     case NINED3DSTT_CUBE:   return TGSI_TEXTURE_CUBE;
2068     default:
2069         assert(0);
2070         return TGSI_TEXTURE_UNKNOWN;
2071     }
2072 }
2073 static inline unsigned
d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)2074 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
2075 {
2076     switch (sampler_type) {
2077     case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
2078     case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
2079     case NINED3DSTT_VOLUME:
2080     case NINED3DSTT_CUBE:
2081     default:
2082         assert(0);
2083         return TGSI_TEXTURE_UNKNOWN;
2084     }
2085 }
2086 static inline unsigned
ps1x_sampler_type(const struct nine_shader_info * info,unsigned stage)2087 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
2088 {
2089     switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
2090     case 1: return TGSI_TEXTURE_1D;
2091     case 0: return TGSI_TEXTURE_2D;
2092     case 3: return TGSI_TEXTURE_3D;
2093     default:
2094         return TGSI_TEXTURE_CUBE;
2095     }
2096 }
2097 
2098 static const char *
sm1_sampler_type_name(BYTE sampler_type)2099 sm1_sampler_type_name(BYTE sampler_type)
2100 {
2101     switch (sampler_type) {
2102     case NINED3DSTT_1D:     return "1D";
2103     case NINED3DSTT_2D:     return "2D";
2104     case NINED3DSTT_VOLUME: return "VOLUME";
2105     case NINED3DSTT_CUBE:   return "CUBE";
2106     default:
2107         return "(D3DSTT_?)";
2108     }
2109 }
2110 
2111 static inline unsigned
nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic * sem)2112 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
2113 {
2114     switch (sem->Name) {
2115     case TGSI_SEMANTIC_POSITION:
2116     case TGSI_SEMANTIC_NORMAL:
2117         return TGSI_INTERPOLATE_LINEAR;
2118     case TGSI_SEMANTIC_BCOLOR:
2119     case TGSI_SEMANTIC_COLOR:
2120         return TGSI_INTERPOLATE_COLOR;
2121     case TGSI_SEMANTIC_FOG:
2122     case TGSI_SEMANTIC_GENERIC:
2123     case TGSI_SEMANTIC_TEXCOORD:
2124     case TGSI_SEMANTIC_CLIPDIST:
2125     case TGSI_SEMANTIC_CLIPVERTEX:
2126         return TGSI_INTERPOLATE_PERSPECTIVE;
2127     case TGSI_SEMANTIC_EDGEFLAG:
2128     case TGSI_SEMANTIC_FACE:
2129     case TGSI_SEMANTIC_INSTANCEID:
2130     case TGSI_SEMANTIC_PCOORD:
2131     case TGSI_SEMANTIC_PRIMID:
2132     case TGSI_SEMANTIC_PSIZE:
2133     case TGSI_SEMANTIC_VERTEXID:
2134         return TGSI_INTERPOLATE_CONSTANT;
2135     default:
2136         assert(0);
2137         return TGSI_INTERPOLATE_CONSTANT;
2138     }
2139 }
2140 
DECL_SPECIAL(DCL)2141 DECL_SPECIAL(DCL)
2142 {
2143     struct ureg_program *ureg = tx->ureg;
2144     boolean is_input;
2145     boolean is_sampler;
2146     struct tgsi_declaration_semantic tgsi;
2147     struct sm1_semantic sem;
2148     sm1_read_semantic(tx, &sem);
2149 
2150     is_input = sem.reg.file == D3DSPR_INPUT;
2151     is_sampler =
2152         sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
2153 
2154     DUMP("DCL ");
2155     sm1_dump_dst_param(&sem.reg);
2156     if (is_sampler)
2157         DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
2158     else
2159     if (tx->version.major >= 3)
2160         DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
2161     else
2162     if (sem.usage | sem.usage_idx)
2163         DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
2164     else
2165         DUMP("\n");
2166 
2167     if (is_sampler) {
2168         const unsigned m = 1 << sem.reg.idx;
2169         ureg_DECL_sampler(ureg, sem.reg.idx);
2170         tx->info->sampler_mask |= m;
2171         tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
2172             d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
2173             d3dstt_to_tgsi_tex(sem.sampler_type);
2174         return D3D_OK;
2175     }
2176 
2177     sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
2178     if (IS_VS) {
2179         if (is_input) {
2180             /* linkage outside of shader with vertex declaration */
2181             ureg_DECL_vs_input(ureg, sem.reg.idx);
2182             assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map));
2183             tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
2184             tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1);
2185             /* NOTE: preserving order in case of indirect access */
2186         } else
2187         if (tx->version.major >= 3) {
2188             /* SM2 output semantic determined by file */
2189             assert(sem.reg.mask != 0);
2190             if (sem.usage == D3DDECLUSAGE_POSITIONT)
2191                 tx->info->position_t = TRUE;
2192             assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o));
2193             assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing");
2194             tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
2195                 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
2196             nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx);
2197             if (tx->info->process_vertices && sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) {
2198                 tx->regs.oPos_out = tx->regs.o[sem.reg.idx];
2199                 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2200                 tx->regs.oPos = tx->regs.o[sem.reg.idx];
2201             }
2202 
2203             if (tgsi.Name == TGSI_SEMANTIC_PSIZE) {
2204                 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2205                 tx->regs.oPts = tx->regs.o[sem.reg.idx];
2206             }
2207         }
2208     } else {
2209         if (is_input && tx->version.major >= 3) {
2210             unsigned interp_location = 0;
2211             /* SM3 only, SM2 input semantic determined by file */
2212             assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v));
2213             assert(ureg_src_is_undef(tx->regs.v[sem.reg.idx]) && "Nine doesn't support yet packing");
2214             /* PositionT and tessfactor forbidden */
2215             if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR)
2216                 return D3DERR_INVALIDCALL;
2217 
2218             if (tgsi.Name == TGSI_SEMANTIC_POSITION) {
2219                 /* Position0 is forbidden (likely because vPos already does that) */
2220                 if (sem.usage == D3DDECLUSAGE_POSITION)
2221                     return D3DERR_INVALIDCALL;
2222                 /* Following code is for depth */
2223                 tx->regs.v[sem.reg.idx] = nine_get_position_input(tx);
2224                 return D3D_OK;
2225             }
2226 
2227             if (sem.reg.mod & NINED3DSPDM_CENTROID ||
2228                 (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
2229                 interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
2230 
2231             tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
2232                 ureg, tgsi.Name, tgsi.Index,
2233                 nine_tgsi_to_interp_mode(&tgsi),
2234                 0, /* cylwrap */
2235                 interp_location, 0, 1);
2236         } else
2237         if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
2238             /* FragColor or FragDepth */
2239             assert(sem.reg.mask != 0);
2240             ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
2241                                     0, 1);
2242         }
2243     }
2244     return D3D_OK;
2245 }
2246 
DECL_SPECIAL(DEF)2247 DECL_SPECIAL(DEF)
2248 {
2249     tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
2250     return D3D_OK;
2251 }
2252 
DECL_SPECIAL(DEFB)2253 DECL_SPECIAL(DEFB)
2254 {
2255     tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2256     return D3D_OK;
2257 }
2258 
DECL_SPECIAL(DEFI)2259 DECL_SPECIAL(DEFI)
2260 {
2261     tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2262     return D3D_OK;
2263 }
2264 
DECL_SPECIAL(POW)2265 DECL_SPECIAL(POW)
2266 {
2267     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2268     struct ureg_src src[2] = {
2269         tx_src_param(tx, &tx->insn.src[0]),
2270         tx_src_param(tx, &tx->insn.src[1])
2271     };
2272     ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2273     return D3D_OK;
2274 }
2275 
DECL_SPECIAL(RSQ)2276 DECL_SPECIAL(RSQ)
2277 {
2278     struct ureg_program *ureg = tx->ureg;
2279     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2280     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2281     struct ureg_dst tmp = tx_scratch(tx);
2282     ureg_RSQ(ureg, tmp, ureg_abs(src));
2283     ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2284     return D3D_OK;
2285 }
2286 
DECL_SPECIAL(LOG)2287 DECL_SPECIAL(LOG)
2288 {
2289     struct ureg_program *ureg = tx->ureg;
2290     struct ureg_dst tmp = tx_scratch_scalar(tx);
2291     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2292     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2293     ureg_LG2(ureg, tmp, ureg_abs(src));
2294     ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2295     return D3D_OK;
2296 }
2297 
DECL_SPECIAL(LIT)2298 DECL_SPECIAL(LIT)
2299 {
2300     struct ureg_program *ureg = tx->ureg;
2301     struct ureg_dst tmp = tx_scratch(tx);
2302     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2303     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2304     ureg_LIT(ureg, tmp, src);
2305     /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2306      * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2307      * it 0^0 if src.w=0, which value is driver dependent. */
2308     ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
2309              ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
2310              ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
2311     ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
2312     return D3D_OK;
2313 }
2314 
DECL_SPECIAL(NRM)2315 DECL_SPECIAL(NRM)
2316 {
2317     struct ureg_program *ureg = tx->ureg;
2318     struct ureg_dst tmp = tx_scratch_scalar(tx);
2319     struct ureg_src nrm = tx_src_scalar(tmp);
2320     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2321     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2322     ureg_DP3(ureg, tmp, src, src);
2323     ureg_RSQ(ureg, tmp, nrm);
2324     ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2325     ureg_MUL(ureg, dst, src, nrm);
2326     return D3D_OK;
2327 }
2328 
DECL_SPECIAL(DP2ADD)2329 DECL_SPECIAL(DP2ADD)
2330 {
2331     struct ureg_dst tmp = tx_scratch_scalar(tx);
2332     struct ureg_src dp2 = tx_src_scalar(tmp);
2333     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2334     struct ureg_src src[3];
2335     int i;
2336     for (i = 0; i < 3; ++i)
2337         src[i] = tx_src_param(tx, &tx->insn.src[i]);
2338     assert_replicate_swizzle(&src[2]);
2339 
2340     ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2341     ureg_ADD(tx->ureg, dst, src[2], dp2);
2342 
2343     return D3D_OK;
2344 }
2345 
DECL_SPECIAL(TEXCOORD)2346 DECL_SPECIAL(TEXCOORD)
2347 {
2348     struct ureg_program *ureg = tx->ureg;
2349     const unsigned s = tx->insn.dst[0].idx;
2350     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2351 
2352     tx_texcoord_alloc(tx, s);
2353     ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2354     ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2355 
2356     return D3D_OK;
2357 }
2358 
DECL_SPECIAL(TEXCOORD_ps14)2359 DECL_SPECIAL(TEXCOORD_ps14)
2360 {
2361     struct ureg_program *ureg = tx->ureg;
2362     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2363     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2364 
2365     assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2366 
2367     ureg_MOV(ureg, dst, src);
2368 
2369     return D3D_OK;
2370 }
2371 
DECL_SPECIAL(TEXKILL)2372 DECL_SPECIAL(TEXKILL)
2373 {
2374     struct ureg_src reg;
2375 
2376     if (tx->version.major > 1 || tx->version.minor > 3) {
2377         reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2378     } else {
2379         tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2380         reg = tx->regs.vT[tx->insn.dst[0].idx];
2381     }
2382     if (tx->version.major < 2)
2383         reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2384     ureg_KILL_IF(tx->ureg, reg);
2385 
2386     return D3D_OK;
2387 }
2388 
DECL_SPECIAL(TEXBEM)2389 DECL_SPECIAL(TEXBEM)
2390 {
2391     struct ureg_program *ureg = tx->ureg;
2392     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2393     struct ureg_dst tmp, tmp2, texcoord;
2394     struct ureg_src sample, m00, m01, m10, m11;
2395     struct ureg_src bumpenvlscale, bumpenvloffset;
2396     const int m = tx->insn.dst[0].idx;
2397     const int n = tx->insn.src[0].idx;
2398 
2399     assert(tx->version.major == 1);
2400 
2401     sample = ureg_DECL_sampler(ureg, m);
2402     tx->info->sampler_mask |= 1 << m;
2403 
2404     tx_texcoord_alloc(tx, m);
2405 
2406     tmp = tx_scratch(tx);
2407     tmp2 = tx_scratch(tx);
2408     texcoord = tx_scratch(tx);
2409     /*
2410      * Bump-env-matrix:
2411      * 00 is X
2412      * 01 is Y
2413      * 10 is Z
2414      * 11 is W
2415      */
2416     nine_info_mark_const_f_used(tx->info, 8 + 8 + m/2);
2417     m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
2418     m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
2419     m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
2420     m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
2421 
2422     /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
2423     if (m % 2 == 0) {
2424         bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, X);
2425         bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Y);
2426     } else {
2427         bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Z);
2428         bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, W);
2429     }
2430 
2431     apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
2432 
2433     /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R  */
2434     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2435              NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
2436     /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
2437     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2438              NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
2439              NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2440 
2441     /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
2442     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2443              NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
2444     /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
2445     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2446              NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
2447              NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2448 
2449     /* Now the texture coordinates are in tmp.xy */
2450 
2451     if (tx->insn.opcode == D3DSIO_TEXBEM) {
2452         ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2453     } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
2454         /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
2455         ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2456         ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Z),
2457                  bumpenvlscale, bumpenvloffset);
2458         ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
2459     }
2460 
2461     tx->info->bumpenvmat_needed = 1;
2462 
2463     return D3D_OK;
2464 }
2465 
DECL_SPECIAL(TEXREG2AR)2466 DECL_SPECIAL(TEXREG2AR)
2467 {
2468     struct ureg_program *ureg = tx->ureg;
2469     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2470     struct ureg_src sample;
2471     const int m = tx->insn.dst[0].idx;
2472     const int n = tx->insn.src[0].idx;
2473     assert(m >= 0 && m > n);
2474 
2475     sample = ureg_DECL_sampler(ureg, m);
2476     tx->info->sampler_mask |= 1 << m;
2477     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(W,X,X,X)), sample);
2478 
2479     return D3D_OK;
2480 }
2481 
DECL_SPECIAL(TEXREG2GB)2482 DECL_SPECIAL(TEXREG2GB)
2483 {
2484     struct ureg_program *ureg = tx->ureg;
2485     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2486     struct ureg_src sample;
2487     const int m = tx->insn.dst[0].idx;
2488     const int n = tx->insn.src[0].idx;
2489     assert(m >= 0 && m > n);
2490 
2491     sample = ureg_DECL_sampler(ureg, m);
2492     tx->info->sampler_mask |= 1 << m;
2493     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2494 
2495     return D3D_OK;
2496 }
2497 
DECL_SPECIAL(TEXM3x2PAD)2498 DECL_SPECIAL(TEXM3x2PAD)
2499 {
2500     return D3D_OK; /* this is just padding */
2501 }
2502 
DECL_SPECIAL(TEXM3x2TEX)2503 DECL_SPECIAL(TEXM3x2TEX)
2504 {
2505     struct ureg_program *ureg = tx->ureg;
2506     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2507     struct ureg_src sample;
2508     const int m = tx->insn.dst[0].idx - 1;
2509     const int n = tx->insn.src[0].idx;
2510     assert(m >= 0 && m > n);
2511 
2512     tx_texcoord_alloc(tx, m);
2513     tx_texcoord_alloc(tx, m+1);
2514 
2515     /* performs the matrix multiplication */
2516     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2517     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2518 
2519     sample = ureg_DECL_sampler(ureg, m + 1);
2520     tx->info->sampler_mask |= 1 << (m + 1);
2521     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2522 
2523     return D3D_OK;
2524 }
2525 
DECL_SPECIAL(TEXM3x3PAD)2526 DECL_SPECIAL(TEXM3x3PAD)
2527 {
2528     return D3D_OK; /* this is just padding */
2529 }
2530 
DECL_SPECIAL(TEXM3x3SPEC)2531 DECL_SPECIAL(TEXM3x3SPEC)
2532 {
2533     struct ureg_program *ureg = tx->ureg;
2534     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2535     struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2536     struct ureg_src sample;
2537     struct ureg_dst tmp;
2538     const int m = tx->insn.dst[0].idx - 2;
2539     const int n = tx->insn.src[0].idx;
2540     assert(m >= 0 && m > n);
2541 
2542     tx_texcoord_alloc(tx, m);
2543     tx_texcoord_alloc(tx, m+1);
2544     tx_texcoord_alloc(tx, m+2);
2545 
2546     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2547     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2548     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2549 
2550     sample = ureg_DECL_sampler(ureg, m + 2);
2551     tx->info->sampler_mask |= 1 << (m + 2);
2552     tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2553 
2554     /* At this step, dst = N = (u', w', z').
2555      * We want dst to be the texture sampled at (u'', w'', z''), with
2556      * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2557     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2558     ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2559     /* at this step tmp.x = 1/N.N */
2560     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2561     /* at this step tmp.y = N.E */
2562     ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2563     /* at this step tmp.x = N.E/N.N */
2564     ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2565     ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2566     /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2567     ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(E));
2568     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2569 
2570     return D3D_OK;
2571 }
2572 
DECL_SPECIAL(TEXREG2RGB)2573 DECL_SPECIAL(TEXREG2RGB)
2574 {
2575     struct ureg_program *ureg = tx->ureg;
2576     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2577     struct ureg_src sample;
2578     const int m = tx->insn.dst[0].idx;
2579     const int n = tx->insn.src[0].idx;
2580     assert(m >= 0 && m > n);
2581 
2582     sample = ureg_DECL_sampler(ureg, m);
2583     tx->info->sampler_mask |= 1 << m;
2584     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tx->regs.tS[n]), sample);
2585 
2586     return D3D_OK;
2587 }
2588 
DECL_SPECIAL(TEXDP3TEX)2589 DECL_SPECIAL(TEXDP3TEX)
2590 {
2591     struct ureg_program *ureg = tx->ureg;
2592     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2593     struct ureg_dst tmp;
2594     struct ureg_src sample;
2595     const int m = tx->insn.dst[0].idx;
2596     const int n = tx->insn.src[0].idx;
2597     assert(m >= 0 && m > n);
2598 
2599     tx_texcoord_alloc(tx, m);
2600 
2601     tmp = tx_scratch(tx);
2602     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2603     ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2604 
2605     sample = ureg_DECL_sampler(ureg, m);
2606     tx->info->sampler_mask |= 1 << m;
2607     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2608 
2609     return D3D_OK;
2610 }
2611 
DECL_SPECIAL(TEXM3x2DEPTH)2612 DECL_SPECIAL(TEXM3x2DEPTH)
2613 {
2614     struct ureg_program *ureg = tx->ureg;
2615     struct ureg_dst tmp;
2616     const int m = tx->insn.dst[0].idx - 1;
2617     const int n = tx->insn.src[0].idx;
2618     assert(m >= 0 && m > n);
2619 
2620     tx_texcoord_alloc(tx, m);
2621     tx_texcoord_alloc(tx, m+1);
2622 
2623     tmp = tx_scratch(tx);
2624 
2625     /* performs the matrix multiplication */
2626     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2627     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2628 
2629     ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2630     /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2631     ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2632     /* res = 'w' == 0 ? 1.0 : z/w */
2633     ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2634              ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2635     /* replace the depth for depth testing with the result */
2636     tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2637                                               TGSI_WRITEMASK_Z, 0, 1);
2638     ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2639     /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2640     return D3D_OK;
2641 }
2642 
DECL_SPECIAL(TEXDP3)2643 DECL_SPECIAL(TEXDP3)
2644 {
2645     struct ureg_program *ureg = tx->ureg;
2646     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2647     const int m = tx->insn.dst[0].idx;
2648     const int n = tx->insn.src[0].idx;
2649     assert(m >= 0 && m > n);
2650 
2651     tx_texcoord_alloc(tx, m);
2652 
2653     ureg_DP3(ureg, dst, tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2654 
2655     return D3D_OK;
2656 }
2657 
DECL_SPECIAL(TEXM3x3)2658 DECL_SPECIAL(TEXM3x3)
2659 {
2660     struct ureg_program *ureg = tx->ureg;
2661     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2662     struct ureg_src sample;
2663     struct ureg_dst E, tmp;
2664     const int m = tx->insn.dst[0].idx - 2;
2665     const int n = tx->insn.src[0].idx;
2666     assert(m >= 0 && m > n);
2667 
2668     tx_texcoord_alloc(tx, m);
2669     tx_texcoord_alloc(tx, m+1);
2670     tx_texcoord_alloc(tx, m+2);
2671 
2672     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2673     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2674     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2675 
2676     switch (tx->insn.opcode) {
2677     case D3DSIO_TEXM3x3:
2678         ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2679         break;
2680     case D3DSIO_TEXM3x3TEX:
2681         sample = ureg_DECL_sampler(ureg, m + 2);
2682         tx->info->sampler_mask |= 1 << (m + 2);
2683         ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2684         break;
2685     case D3DSIO_TEXM3x3VSPEC:
2686         sample = ureg_DECL_sampler(ureg, m + 2);
2687         tx->info->sampler_mask |= 1 << (m + 2);
2688         E = tx_scratch(tx);
2689         tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2690         ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2691         ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2692         ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2693         /* At this step, dst = N = (u', w', z').
2694          * We want dst to be the texture sampled at (u'', w'', z''), with
2695          * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2696         ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2697         ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2698         /* at this step tmp.x = 1/N.N */
2699         ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2700         /* at this step tmp.y = N.E */
2701         ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2702         /* at this step tmp.x = N.E/N.N */
2703         ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2704         ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2705         /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2706         ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(ureg_src(E)));
2707         ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2708         break;
2709     default:
2710         return D3DERR_INVALIDCALL;
2711     }
2712     return D3D_OK;
2713 }
2714 
DECL_SPECIAL(TEXDEPTH)2715 DECL_SPECIAL(TEXDEPTH)
2716 {
2717     struct ureg_program *ureg = tx->ureg;
2718     struct ureg_dst r5;
2719     struct ureg_src r5r, r5g;
2720 
2721     assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2722 
2723     /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2724      * r5 won't be used afterward, thus we can use r5.ba */
2725     r5 = tx->regs.r[5];
2726     r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2727     r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2728 
2729     ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2730     ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2731     /* r5.r = r/g */
2732     ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2733              r5r, ureg_imm1f(ureg, 1.0f));
2734     /* replace the depth for depth testing with the result */
2735     tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2736                                               TGSI_WRITEMASK_Z, 0, 1);
2737     ureg_MOV(ureg, tx->regs.oDepth, r5r);
2738 
2739     return D3D_OK;
2740 }
2741 
DECL_SPECIAL(BEM)2742 DECL_SPECIAL(BEM)
2743 {
2744     struct ureg_program *ureg = tx->ureg;
2745     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2746     struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
2747     struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
2748     struct ureg_src m00, m01, m10, m11;
2749     const int m = tx->insn.dst[0].idx;
2750     struct ureg_dst tmp;
2751     /*
2752      * Bump-env-matrix:
2753      * 00 is X
2754      * 01 is Y
2755      * 10 is Z
2756      * 11 is W
2757      */
2758     nine_info_mark_const_f_used(tx->info, 8 + m);
2759     m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
2760     m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
2761     m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
2762     m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
2763     /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r  */
2764     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2765              NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
2766     /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
2767     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2768              NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2769 
2770     /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
2771     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2772              NINE_APPLY_SWIZZLE(src1, X), src0);
2773     /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
2774     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2775              NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2776     ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
2777 
2778     tx->info->bumpenvmat_needed = 1;
2779 
2780     return D3D_OK;
2781 }
2782 
DECL_SPECIAL(TEXLD)2783 DECL_SPECIAL(TEXLD)
2784 {
2785     struct ureg_program *ureg = tx->ureg;
2786     unsigned target;
2787     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2788     struct ureg_src src[2] = {
2789         tx_src_param(tx, &tx->insn.src[0]),
2790         tx_src_param(tx, &tx->insn.src[1])
2791     };
2792     assert(tx->insn.src[1].idx >= 0 &&
2793            tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2794     target = tx->sampler_targets[tx->insn.src[1].idx];
2795 
2796     switch (tx->insn.flags) {
2797     case 0:
2798         ureg_TEX(ureg, dst, target, src[0], src[1]);
2799         break;
2800     case NINED3DSI_TEXLD_PROJECT:
2801         ureg_TXP(ureg, dst, target, src[0], src[1]);
2802         break;
2803     case NINED3DSI_TEXLD_BIAS:
2804         ureg_TXB(ureg, dst, target, src[0], src[1]);
2805         break;
2806     default:
2807         assert(0);
2808         return D3DERR_INVALIDCALL;
2809     }
2810     return D3D_OK;
2811 }
2812 
DECL_SPECIAL(TEXLD_14)2813 DECL_SPECIAL(TEXLD_14)
2814 {
2815     struct ureg_program *ureg = tx->ureg;
2816     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2817     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2818     const unsigned s = tx->insn.dst[0].idx;
2819     const unsigned t = ps1x_sampler_type(tx->info, s);
2820 
2821     tx->info->sampler_mask |= 1 << s;
2822     ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2823 
2824     return D3D_OK;
2825 }
2826 
DECL_SPECIAL(TEX)2827 DECL_SPECIAL(TEX)
2828 {
2829     struct ureg_program *ureg = tx->ureg;
2830     const unsigned s = tx->insn.dst[0].idx;
2831     const unsigned t = ps1x_sampler_type(tx->info, s);
2832     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2833     struct ureg_src src[2];
2834 
2835     tx_texcoord_alloc(tx, s);
2836 
2837     src[0] = tx->regs.vT[s];
2838     src[1] = ureg_DECL_sampler(ureg, s);
2839     tx->info->sampler_mask |= 1 << s;
2840 
2841     TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
2842 
2843     return D3D_OK;
2844 }
2845 
DECL_SPECIAL(TEXLDD)2846 DECL_SPECIAL(TEXLDD)
2847 {
2848     unsigned target;
2849     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2850     struct ureg_src src[4] = {
2851         tx_src_param(tx, &tx->insn.src[0]),
2852         tx_src_param(tx, &tx->insn.src[1]),
2853         tx_src_param(tx, &tx->insn.src[2]),
2854         tx_src_param(tx, &tx->insn.src[3])
2855     };
2856     assert(tx->insn.src[1].idx >= 0 &&
2857            tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2858     target = tx->sampler_targets[tx->insn.src[1].idx];
2859 
2860     ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
2861     return D3D_OK;
2862 }
2863 
DECL_SPECIAL(TEXLDL)2864 DECL_SPECIAL(TEXLDL)
2865 {
2866     unsigned target;
2867     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2868     struct ureg_src src[2] = {
2869        tx_src_param(tx, &tx->insn.src[0]),
2870        tx_src_param(tx, &tx->insn.src[1])
2871     };
2872     assert(tx->insn.src[1].idx >= 0 &&
2873            tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2874     target = tx->sampler_targets[tx->insn.src[1].idx];
2875 
2876     ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
2877     return D3D_OK;
2878 }
2879 
DECL_SPECIAL(SETP)2880 DECL_SPECIAL(SETP)
2881 {
2882     STUB(D3DERR_INVALIDCALL);
2883 }
2884 
DECL_SPECIAL(BREAKP)2885 DECL_SPECIAL(BREAKP)
2886 {
2887     STUB(D3DERR_INVALIDCALL);
2888 }
2889 
DECL_SPECIAL(PHASE)2890 DECL_SPECIAL(PHASE)
2891 {
2892     return D3D_OK; /* we don't care about phase */
2893 }
2894 
DECL_SPECIAL(COMMENT)2895 DECL_SPECIAL(COMMENT)
2896 {
2897     return D3D_OK; /* nothing to do */
2898 }
2899 
2900 
2901 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2902     { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2903 
2904 struct sm1_op_info inst_table[] =
2905 {
2906     _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP)), /* 0 */
2907     _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2908     _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
2909     _OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */
2910     _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
2911     _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
2912     _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
2913     _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
2914     _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
2915     _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
2916     _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
2917     _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
2918     _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
2919     _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
2920     _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
2921     _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
2922     _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
2923     _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
2924     _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
2925     _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
2926 
2927     _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
2928     _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
2929     _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
2930     _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
2931     _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
2932 
2933     _OPI(CALL,    CAL,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
2934     _OPI(CALLNZ,  CAL,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
2935     _OPI(LOOP,    BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
2936     _OPI(RET,     RET,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
2937     _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
2938     _OPI(LABEL,   NOP,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
2939 
2940     _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
2941 
2942     _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
2943     _OPI(CRS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(XPD)), /* XXX: .w */
2944     _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
2945     _OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)),
2946     _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
2947 
2948     _OPI(SINCOS, NOP, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
2949     _OPI(SINCOS, NOP, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
2950 
2951     /* More flow control */
2952     _OPI(REP,    NOP,    V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
2953     _OPI(ENDREP, NOP,    V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
2954     _OPI(IF,     IF,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
2955     _OPI(IFC,    IF,     V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
2956     _OPI(ELSE,   ELSE,   V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
2957     _OPI(ENDIF,  ENDIF,  V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
2958     _OPI(BREAK,  BRK,    V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
2959     _OPI(BREAKC, NOP,    V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
2960     /* we don't write to the address register, but a normal register (copied
2961      * when needed to the address register), thus we don't use ARR */
2962     _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2963 
2964     _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
2965     _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
2966 
2967     _OPI(TEXCOORD,     NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
2968     _OPI(TEXCOORD,     MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
2969     _OPI(TEXKILL,      KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
2970     _OPI(TEX,          TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
2971     _OPI(TEX,          TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
2972     _OPI(TEX,          TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
2973     _OPI(TEXBEM,       TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2974     _OPI(TEXBEML,      TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2975     _OPI(TEXREG2AR,    TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
2976     _OPI(TEXREG2GB,    TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
2977     _OPI(TEXM3x2PAD,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
2978     _OPI(TEXM3x2TEX,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
2979     _OPI(TEXM3x3PAD,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
2980     _OPI(TEXM3x3TEX,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2981     _OPI(TEXM3x3SPEC,  TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
2982     _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2983 
2984     _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
2985     _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2986     _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
2987     _OPI(CND,  NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
2988 
2989     _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
2990 
2991     /* More tex stuff */
2992     _OPI(TEXREG2RGB,   TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
2993     _OPI(TEXDP3TEX,    TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
2994     _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
2995     _OPI(TEXDP3,       TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
2996     _OPI(TEXM3x3,      TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2997     _OPI(TEXDEPTH,     TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
2998 
2999     /* Misc */
3000     _OPI(CMP,    CMP,  V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
3001     _OPI(BEM,    NOP,  V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
3002     _OPI(DP2ADD, NOP,  V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
3003     _OPI(DSX,    DDX,  V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
3004     _OPI(DSY,    DDY,  V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
3005     _OPI(TEXLDD, TXD,  V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
3006     _OPI(SETP,   NOP,  V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
3007     _OPI(TEXLDL, TXL,  V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
3008     _OPI(BREAKP, BRK,  V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
3009 };
3010 
3011 struct sm1_op_info inst_phase =
3012     _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
3013 
3014 struct sm1_op_info inst_comment =
3015     _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
3016 
3017 static void
create_op_info_map(struct shader_translator * tx)3018 create_op_info_map(struct shader_translator *tx)
3019 {
3020     const unsigned version = (tx->version.major << 8) | tx->version.minor;
3021     unsigned i;
3022 
3023     for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i)
3024         tx->op_info_map[i] = -1;
3025 
3026     if (tx->processor == PIPE_SHADER_VERTEX) {
3027         for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3028             assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3029             if (inst_table[i].vert_version.min <= version &&
3030                 inst_table[i].vert_version.max >= version)
3031                 tx->op_info_map[inst_table[i].sio] = i;
3032         }
3033     } else {
3034         for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3035             assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3036             if (inst_table[i].frag_version.min <= version &&
3037                 inst_table[i].frag_version.max >= version)
3038                 tx->op_info_map[inst_table[i].sio] = i;
3039         }
3040     }
3041 }
3042 
3043 static inline HRESULT
NineTranslateInstruction_Generic(struct shader_translator * tx)3044 NineTranslateInstruction_Generic(struct shader_translator *tx)
3045 {
3046     struct ureg_dst dst[1];
3047     struct ureg_src src[4];
3048     unsigned i;
3049 
3050     for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i)
3051         dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
3052     for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i)
3053         src[i] = tx_src_param(tx, &tx->insn.src[i]);
3054 
3055     ureg_insn(tx->ureg, tx->insn.info->opcode,
3056               dst, tx->insn.ndst,
3057               src, tx->insn.nsrc, 0);
3058     return D3D_OK;
3059 }
3060 
3061 static inline DWORD
TOKEN_PEEK(struct shader_translator * tx)3062 TOKEN_PEEK(struct shader_translator *tx)
3063 {
3064     return *(tx->parse);
3065 }
3066 
3067 static inline DWORD
TOKEN_NEXT(struct shader_translator * tx)3068 TOKEN_NEXT(struct shader_translator *tx)
3069 {
3070     return *(tx->parse)++;
3071 }
3072 
3073 static inline void
TOKEN_JUMP(struct shader_translator * tx)3074 TOKEN_JUMP(struct shader_translator *tx)
3075 {
3076     if (tx->parse_next && tx->parse != tx->parse_next) {
3077         WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
3078         tx->parse = tx->parse_next;
3079     }
3080 }
3081 
3082 static inline boolean
sm1_parse_eof(struct shader_translator * tx)3083 sm1_parse_eof(struct shader_translator *tx)
3084 {
3085     return TOKEN_PEEK(tx) == NINED3DSP_END;
3086 }
3087 
3088 static void
sm1_read_version(struct shader_translator * tx)3089 sm1_read_version(struct shader_translator *tx)
3090 {
3091     const DWORD tok = TOKEN_NEXT(tx);
3092 
3093     tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
3094     tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
3095 
3096     switch (tok >> 16) {
3097     case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break;
3098     case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break;
3099     default:
3100        DBG("Invalid shader type: %x\n", tok);
3101        tx->processor = ~0;
3102        break;
3103     }
3104 }
3105 
3106 /* This is just to check if we parsed the instruction properly. */
3107 static void
sm1_parse_get_skip(struct shader_translator * tx)3108 sm1_parse_get_skip(struct shader_translator *tx)
3109 {
3110     const DWORD tok = TOKEN_PEEK(tx);
3111 
3112     if (tx->version.major >= 2) {
3113         tx->parse_next = tx->parse + 1 /* this */ +
3114             ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
3115     } else {
3116         tx->parse_next = NULL; /* TODO: determine from param count */
3117     }
3118 }
3119 
3120 static void
sm1_print_comment(const char * comment,UINT size)3121 sm1_print_comment(const char *comment, UINT size)
3122 {
3123     if (!size)
3124         return;
3125     /* TODO */
3126 }
3127 
3128 static void
sm1_parse_comments(struct shader_translator * tx,BOOL print)3129 sm1_parse_comments(struct shader_translator *tx, BOOL print)
3130 {
3131     DWORD tok = TOKEN_PEEK(tx);
3132 
3133     while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
3134     {
3135         const char *comment = "";
3136         UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
3137         tx->parse += size + 1;
3138 
3139         if (print)
3140             sm1_print_comment(comment, size);
3141 
3142         tok = TOKEN_PEEK(tx);
3143     }
3144 }
3145 
3146 static void
sm1_parse_get_param(struct shader_translator * tx,DWORD * reg,DWORD * rel)3147 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
3148 {
3149     *reg = TOKEN_NEXT(tx);
3150 
3151     if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
3152     {
3153         if (tx->version.major < 2)
3154             *rel = (1 << 31) |
3155                 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
3156                 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT)  & D3DSP_REGTYPE_MASK) |
3157                 D3DSP_NOSWIZZLE;
3158         else
3159             *rel = TOKEN_NEXT(tx);
3160     }
3161 }
3162 
3163 static void
sm1_parse_dst_param(struct sm1_dst_param * dst,DWORD tok)3164 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
3165 {
3166     int8_t shift;
3167     dst->file =
3168         (tok & D3DSP_REGTYPE_MASK)  >> D3DSP_REGTYPE_SHIFT |
3169         (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
3170     dst->type = TGSI_RETURN_TYPE_FLOAT;
3171     dst->idx = tok & D3DSP_REGNUM_MASK;
3172     dst->rel = NULL;
3173     dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
3174     dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
3175     shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
3176     dst->shift = (shift & 0x7) - (shift & 0x8);
3177 }
3178 
3179 static void
sm1_parse_src_param(struct sm1_src_param * src,DWORD tok)3180 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
3181 {
3182     src->file =
3183         ((tok & D3DSP_REGTYPE_MASK)  >> D3DSP_REGTYPE_SHIFT) |
3184         ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
3185     src->type = TGSI_RETURN_TYPE_FLOAT;
3186     src->idx = tok & D3DSP_REGNUM_MASK;
3187     src->rel = NULL;
3188     src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
3189     src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
3190 
3191     switch (src->file) {
3192     case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
3193     case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
3194     case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
3195     default:
3196         break;
3197     }
3198 }
3199 
3200 static void
sm1_parse_immediate(struct shader_translator * tx,struct sm1_src_param * imm)3201 sm1_parse_immediate(struct shader_translator *tx,
3202                     struct sm1_src_param *imm)
3203 {
3204     imm->file = NINED3DSPR_IMMEDIATE;
3205     imm->idx = INT_MIN;
3206     imm->rel = NULL;
3207     imm->swizzle = NINED3DSP_NOSWIZZLE;
3208     imm->mod = 0;
3209     switch (tx->insn.opcode) {
3210     case D3DSIO_DEF:
3211         imm->type = NINED3DSPTYPE_FLOAT4;
3212         memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3213         tx->parse += 4;
3214         break;
3215     case D3DSIO_DEFI:
3216         imm->type = NINED3DSPTYPE_INT4;
3217         memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3218         tx->parse += 4;
3219         break;
3220     case D3DSIO_DEFB:
3221         imm->type = NINED3DSPTYPE_BOOL;
3222         memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
3223         tx->parse += 1;
3224         break;
3225     default:
3226        assert(0);
3227        break;
3228     }
3229 }
3230 
3231 static void
sm1_read_dst_param(struct shader_translator * tx,struct sm1_dst_param * dst,struct sm1_src_param * rel)3232 sm1_read_dst_param(struct shader_translator *tx,
3233                    struct sm1_dst_param *dst,
3234                    struct sm1_src_param *rel)
3235 {
3236     DWORD tok_dst, tok_rel = 0;
3237 
3238     sm1_parse_get_param(tx, &tok_dst, &tok_rel);
3239     sm1_parse_dst_param(dst, tok_dst);
3240     if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
3241         sm1_parse_src_param(rel, tok_rel);
3242         dst->rel = rel;
3243     }
3244 }
3245 
3246 static void
sm1_read_src_param(struct shader_translator * tx,struct sm1_src_param * src,struct sm1_src_param * rel)3247 sm1_read_src_param(struct shader_translator *tx,
3248                    struct sm1_src_param *src,
3249                    struct sm1_src_param *rel)
3250 {
3251     DWORD tok_src, tok_rel = 0;
3252 
3253     sm1_parse_get_param(tx, &tok_src, &tok_rel);
3254     sm1_parse_src_param(src, tok_src);
3255     if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
3256         assert(rel);
3257         sm1_parse_src_param(rel, tok_rel);
3258         src->rel = rel;
3259     }
3260 }
3261 
3262 static void
sm1_read_semantic(struct shader_translator * tx,struct sm1_semantic * sem)3263 sm1_read_semantic(struct shader_translator *tx,
3264                   struct sm1_semantic *sem)
3265 {
3266     const DWORD tok_usg = TOKEN_NEXT(tx);
3267     const DWORD tok_dst = TOKEN_NEXT(tx);
3268 
3269     sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
3270     sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
3271     sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
3272 
3273     sm1_parse_dst_param(&sem->reg, tok_dst);
3274 }
3275 
3276 static void
sm1_parse_instruction(struct shader_translator * tx)3277 sm1_parse_instruction(struct shader_translator *tx)
3278 {
3279     struct sm1_instruction *insn = &tx->insn;
3280     HRESULT hr;
3281     DWORD tok;
3282     struct sm1_op_info *info = NULL;
3283     unsigned i;
3284 
3285     sm1_parse_comments(tx, TRUE);
3286     sm1_parse_get_skip(tx);
3287 
3288     tok = TOKEN_NEXT(tx);
3289 
3290     insn->opcode = tok & D3DSI_OPCODE_MASK;
3291     insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
3292     insn->coissue = !!(tok & D3DSI_COISSUE);
3293     insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
3294 
3295     if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) {
3296         int k = tx->op_info_map[insn->opcode];
3297         if (k >= 0) {
3298             assert(k < ARRAY_SIZE(inst_table));
3299             info = &inst_table[k];
3300         }
3301     } else {
3302        if (insn->opcode == D3DSIO_PHASE)   info = &inst_phase;
3303        if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
3304     }
3305     if (!info) {
3306        DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
3307        TOKEN_JUMP(tx);
3308        return;
3309     }
3310     insn->info = info;
3311     insn->ndst = info->ndst;
3312     insn->nsrc = info->nsrc;
3313 
3314     assert(!insn->predicated && "TODO: predicated instructions");
3315 
3316     /* check version */
3317     {
3318         unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
3319         unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
3320         unsigned ver = (tx->version.major << 8) | tx->version.minor;
3321         if (ver < min || ver > max) {
3322             DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
3323                 min, ver, max);
3324             return;
3325         }
3326     }
3327 
3328     for (i = 0; i < insn->ndst; ++i)
3329         sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
3330     if (insn->predicated)
3331         sm1_read_src_param(tx, &insn->pred, NULL);
3332     for (i = 0; i < insn->nsrc; ++i)
3333         sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
3334 
3335     /* parse here so we can dump them before processing */
3336     if (insn->opcode == D3DSIO_DEF ||
3337         insn->opcode == D3DSIO_DEFI ||
3338         insn->opcode == D3DSIO_DEFB)
3339         sm1_parse_immediate(tx, &tx->insn.src[0]);
3340 
3341     sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
3342     sm1_instruction_check(insn);
3343 
3344     if (info->handler)
3345         hr = info->handler(tx);
3346     else
3347         hr = NineTranslateInstruction_Generic(tx);
3348     tx_apply_dst0_modifiers(tx);
3349 
3350     if (hr != D3D_OK)
3351         tx->failure = TRUE;
3352     tx->num_scratch = 0; /* reset */
3353 
3354     TOKEN_JUMP(tx);
3355 }
3356 
3357 static void
tx_ctor(struct shader_translator * tx,struct nine_shader_info * info)3358 tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
3359 {
3360     unsigned i;
3361 
3362     tx->info = info;
3363 
3364     tx->byte_code = info->byte_code;
3365     tx->parse = info->byte_code;
3366 
3367     for (i = 0; i < ARRAY_SIZE(info->input_map); ++i)
3368         info->input_map[i] = NINE_DECLUSAGE_NONE;
3369     info->num_inputs = 0;
3370 
3371     info->position_t = FALSE;
3372     info->point_size = FALSE;
3373 
3374     tx->info->const_float_slots = 0;
3375     tx->info->const_int_slots = 0;
3376     tx->info->const_bool_slots = 0;
3377 
3378     info->sampler_mask = 0x0;
3379     info->rt_mask = 0x0;
3380 
3381     info->lconstf.data = NULL;
3382     info->lconstf.ranges = NULL;
3383 
3384     info->bumpenvmat_needed = 0;
3385 
3386     for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) {
3387         tx->regs.rL[i] = ureg_dst_undef();
3388     }
3389     tx->regs.address = ureg_dst_undef();
3390     tx->regs.a0 = ureg_dst_undef();
3391     tx->regs.p = ureg_dst_undef();
3392     tx->regs.oDepth = ureg_dst_undef();
3393     tx->regs.vPos = ureg_src_undef();
3394     tx->regs.vFace = ureg_src_undef();
3395     for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i)
3396         tx->regs.o[i] = ureg_dst_undef();
3397     for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i)
3398         tx->regs.oCol[i] = ureg_dst_undef();
3399     for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i)
3400         tx->regs.vC[i] = ureg_src_undef();
3401     for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i)
3402         tx->regs.vT[i] = ureg_src_undef();
3403 
3404     sm1_read_version(tx);
3405 
3406     info->version = (tx->version.major << 4) | tx->version.minor;
3407 
3408     tx->num_outputs = 0;
3409 
3410     create_op_info_map(tx);
3411 }
3412 
3413 static void
tx_dtor(struct shader_translator * tx)3414 tx_dtor(struct shader_translator *tx)
3415 {
3416     if (tx->num_inst_labels)
3417         FREE(tx->inst_labels);
3418     FREE(tx->lconstf);
3419     FREE(tx->regs.r);
3420     FREE(tx);
3421 }
3422 
3423 /* CONST[0].xyz = width/2, -height/2, zmax-zmin
3424  * CONST[1].xyz = x+width/2, y+height/2, zmin */
3425 static void
shader_add_vs_viewport_transform(struct shader_translator * tx)3426 shader_add_vs_viewport_transform(struct shader_translator *tx)
3427 {
3428     struct ureg_program *ureg = tx->ureg;
3429     struct ureg_src c0 = NINE_CONSTANT_SRC(0);
3430     struct ureg_src c1 = NINE_CONSTANT_SRC(1);
3431     /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/
3432 
3433     c0 = ureg_src_dimension(c0, 4);
3434     c1 = ureg_src_dimension(c1, 4);
3435     /* TODO: find out when we need to apply the viewport transformation or not.
3436      * Likely will be XYZ vs XYZRHW in vdecl_out
3437      * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0);
3438      * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1);
3439      */
3440     ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos));
3441 }
3442 
3443 static void
shader_add_ps_fog_stage(struct shader_translator * tx,struct ureg_src src_col)3444 shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
3445 {
3446     struct ureg_program *ureg = tx->ureg;
3447     struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
3448     struct ureg_src fog_end, fog_coeff, fog_density;
3449     struct ureg_src fog_vs, depth, fog_color;
3450     struct ureg_dst fog_factor;
3451 
3452     if (!tx->info->fog_enable) {
3453         ureg_MOV(ureg, oCol0, src_col);
3454         return;
3455     }
3456 
3457     if (tx->info->fog_mode != D3DFOG_NONE) {
3458         depth = nine_get_position_input(tx);
3459         depth = ureg_scalar(depth, TGSI_SWIZZLE_Z);
3460     }
3461 
3462     nine_info_mark_const_f_used(tx->info, 33);
3463     fog_color = NINE_CONSTANT_SRC(32);
3464     fog_factor = tx_scratch_scalar(tx);
3465 
3466     if (tx->info->fog_mode == D3DFOG_LINEAR) {
3467         fog_end = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3468         fog_coeff = NINE_CONSTANT_SRC_SWIZZLE(33, Y);
3469         ureg_ADD(ureg, fog_factor, fog_end, ureg_negate(depth));
3470         ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
3471     } else if (tx->info->fog_mode == D3DFOG_EXP) {
3472         fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3473         ureg_MUL(ureg, fog_factor, depth, fog_density);
3474         ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3475         ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3476     } else if (tx->info->fog_mode == D3DFOG_EXP2) {
3477         fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3478         ureg_MUL(ureg, fog_factor, depth, fog_density);
3479         ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
3480         ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3481         ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3482     } else {
3483         fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0,
3484                                             TGSI_INTERPOLATE_PERSPECTIVE),
3485                                             TGSI_SWIZZLE_X);
3486         ureg_MOV(ureg, fog_factor, fog_vs);
3487     }
3488 
3489     ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ),
3490              tx_src_scalar(fog_factor), src_col, fog_color);
3491     ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
3492 }
3493 
3494 #define GET_CAP(n) screen->get_param( \
3495       screen, PIPE_CAP_##n)
3496 #define GET_SHADER_CAP(n) screen->get_shader_param( \
3497       screen, info->type, PIPE_SHADER_CAP_##n)
3498 
3499 HRESULT
nine_translate_shader(struct NineDevice9 * device,struct nine_shader_info * info,struct pipe_context * pipe)3500 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, struct pipe_context *pipe)
3501 {
3502     struct shader_translator *tx;
3503     HRESULT hr = D3D_OK;
3504     const unsigned processor = info->type;
3505     struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen;
3506 
3507     user_assert(processor != ~0, D3DERR_INVALIDCALL);
3508 
3509     tx = CALLOC_STRUCT(shader_translator);
3510     if (!tx)
3511         return E_OUTOFMEMORY;
3512     tx_ctor(tx, info);
3513 
3514     if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
3515         hr = D3DERR_INVALIDCALL;
3516         DBG("Unsupported shader version: %u.%u !\n",
3517             tx->version.major, tx->version.minor);
3518         goto out;
3519     }
3520     if (tx->processor != processor) {
3521         hr = D3DERR_INVALIDCALL;
3522         DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
3523         goto out;
3524     }
3525     DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS",
3526          tx->version.major, tx->version.minor);
3527 
3528     tx->ureg = ureg_create(processor);
3529     if (!tx->ureg) {
3530         hr = E_OUTOFMEMORY;
3531         goto out;
3532     }
3533 
3534     tx->native_integers = GET_SHADER_CAP(INTEGERS);
3535     tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3536     tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3537     tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3538     tx->texcoord_sn = tx->want_texcoord ?
3539         TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3540     tx->wpos_is_sysval = GET_CAP(TGSI_FS_POSITION_IS_SYSVAL);
3541     tx->face_is_sysval_integer = GET_CAP(TGSI_FS_FACE_IS_INTEGER_SYSVAL);
3542 
3543     if (IS_VS) {
3544         tx->num_constf_allowed = NINE_MAX_CONST_F;
3545     } else if (tx->version.major < 2) {/* IS_PS v1 */
3546         tx->num_constf_allowed = 8;
3547     } else if (tx->version.major == 2) {/* IS_PS v2 */
3548         tx->num_constf_allowed = 32;
3549     } else {/* IS_PS v3 */
3550         tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
3551     }
3552 
3553     if (tx->version.major < 2) {
3554         tx->num_consti_allowed = 0;
3555         tx->num_constb_allowed = 0;
3556     } else {
3557         tx->num_consti_allowed = NINE_MAX_CONST_I;
3558         tx->num_constb_allowed = NINE_MAX_CONST_B;
3559     }
3560 
3561     if (IS_VS && tx->version.major >= 2 && info->swvp_on) {
3562         tx->num_constf_allowed = 8192;
3563         tx->num_consti_allowed = 2048;
3564         tx->num_constb_allowed = 2048;
3565     }
3566 
3567     /* VS must always write position. Declare it here to make it the 1st output.
3568      * (Some drivers like nv50 are buggy and rely on that.)
3569      */
3570     if (IS_VS) {
3571         tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3572     } else {
3573         ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3574         if (!tx->shift_wpos)
3575             ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3576     }
3577 
3578     if (GET_CAP(TGSI_MUL_ZERO_WINS))
3579        ureg_property(tx->ureg, TGSI_PROPERTY_MUL_ZERO_WINS, 1);
3580 
3581     while (!sm1_parse_eof(tx) && !tx->failure)
3582         sm1_parse_instruction(tx);
3583     tx->parse++; /* for byte_size */
3584 
3585     if (tx->failure) {
3586         /* For VS shaders, we print the warning later,
3587          * we first try with swvp. */
3588         if (IS_PS)
3589             ERR("Encountered buggy shader\n");
3590         ureg_destroy(tx->ureg);
3591         hr = D3DERR_INVALIDCALL;
3592         goto out;
3593     }
3594 
3595     if (IS_PS && tx->version.major < 3) {
3596         if (tx->version.major < 2) {
3597             assert(tx->num_temp); /* there must be color output */
3598             info->rt_mask |= 0x1;
3599             shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
3600         } else {
3601             shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
3602         }
3603     }
3604 
3605     if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
3606         tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0);
3607         ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
3608     }
3609 
3610     if (info->position_t)
3611         ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
3612 
3613     if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
3614         struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
3615         ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
3616         ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
3617         info->point_size = TRUE;
3618     }
3619 
3620     if (info->process_vertices)
3621         shader_add_vs_viewport_transform(tx);
3622 
3623     ureg_END(tx->ureg);
3624 
3625     /* record local constants */
3626     if (tx->num_lconstf && tx->indirect_const_access) {
3627         struct nine_range *ranges;
3628         float *data;
3629         int *indices;
3630         unsigned i, k, n;
3631 
3632         hr = E_OUTOFMEMORY;
3633 
3634         data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
3635         if (!data)
3636             goto out;
3637         info->lconstf.data = data;
3638 
3639         indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
3640         if (!indices)
3641             goto out;
3642 
3643         /* lazy sort, num_lconstf should be small */
3644         for (n = 0; n < tx->num_lconstf; ++n) {
3645             for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
3646                 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
3647                     k = i;
3648             }
3649             indices[n] = tx->lconstf[k].idx;
3650             memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float));
3651             tx->lconstf[k].idx = INT_MAX;
3652         }
3653 
3654         /* count ranges */
3655         for (n = 1, i = 1; i < tx->num_lconstf; ++i)
3656             if (indices[i] != indices[i - 1] + 1)
3657                 ++n;
3658         ranges = MALLOC(n * sizeof(ranges[0]));
3659         if (!ranges) {
3660             FREE(indices);
3661             goto out;
3662         }
3663         info->lconstf.ranges = ranges;
3664 
3665         k = 0;
3666         ranges[k].bgn = indices[0];
3667         for (i = 1; i < tx->num_lconstf; ++i) {
3668             if (indices[i] != indices[i - 1] + 1) {
3669                 ranges[k].next = &ranges[k + 1];
3670                 ranges[k].end = indices[i - 1] + 1;
3671                 ++k;
3672                 ranges[k].bgn = indices[i];
3673             }
3674         }
3675         ranges[k].end = indices[i - 1] + 1;
3676         ranges[k].next = NULL;
3677         assert(n == (k + 1));
3678 
3679         FREE(indices);
3680         hr = D3D_OK;
3681     }
3682 
3683     /* r500 */
3684     if (info->const_float_slots > device->max_vs_const_f &&
3685         (info->const_int_slots || info->const_bool_slots) &&
3686         (!IS_VS || !info->swvp_on))
3687         ERR("Overlapping constant slots. The shader is likely to be buggy\n");
3688 
3689 
3690     if (tx->indirect_const_access) /* vs only */
3691         info->const_float_slots = device->max_vs_const_f;
3692 
3693     if (!IS_VS || !info->swvp_on) {
3694         unsigned s, slot_max;
3695         unsigned max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f;
3696 
3697         slot_max = info->const_bool_slots > 0 ?
3698                        max_const_f + NINE_MAX_CONST_I
3699                        + DIV_ROUND_UP(info->const_bool_slots, 4) :
3700                            info->const_int_slots > 0 ?
3701                                max_const_f + info->const_int_slots :
3702                                    info->const_float_slots;
3703 
3704         info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
3705 
3706         for (s = 0; s < slot_max; s++)
3707             ureg_DECL_constant(tx->ureg, s);
3708     } else {
3709          ureg_DECL_constant2D(tx->ureg, 0, 4095, 0);
3710          ureg_DECL_constant2D(tx->ureg, 0, 4095, 1);
3711          ureg_DECL_constant2D(tx->ureg, 0, 2047, 2);
3712          ureg_DECL_constant2D(tx->ureg, 0, 511, 3);
3713     }
3714 
3715     if (info->process_vertices)
3716         ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */
3717 
3718     if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
3719         const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, NULL);
3720         tgsi_dump(toks, 0);
3721         ureg_free_tokens(toks);
3722     }
3723 
3724     if (info->process_vertices) {
3725         NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out,
3726                                                     tx->output_info,
3727                                                     tx->num_outputs,
3728                                                     &(info->so));
3729         info->cso = ureg_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so));
3730     } else
3731         info->cso = ureg_create_shader_and_destroy(tx->ureg, pipe);
3732     if (!info->cso) {
3733         hr = D3DERR_DRIVERINTERNALERROR;
3734         FREE(info->lconstf.data);
3735         FREE(info->lconstf.ranges);
3736         goto out;
3737     }
3738 
3739     info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
3740 out:
3741     tx_dtor(tx);
3742     return hr;
3743 }
3744