• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3  * Copyright 2013 Christoph Bumiller
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * on the rights to use, copy, modify, merge, publish, distribute, sub
9  * license, and/or sell copies of the Software, and to permit persons to whom
10  * the Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23 
24 #include "nine_shader.h"
25 
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29 #include "vertexdeclaration9.h"
30 
31 #include "util/bitscan.h"
32 #include "util/macros.h"
33 #include "util/u_memory.h"
34 #include "util/u_inlines.h"
35 #include "pipe/p_shader_tokens.h"
36 #include "tgsi/tgsi_ureg.h"
37 #include "tgsi/tgsi_dump.h"
38 #include "nir/tgsi_to_nir.h"
39 
40 #define DBG_CHANNEL DBG_SHADER
41 
42 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
43 
44 
45 struct shader_translator;
46 
47 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
48 
49 static inline const char *d3dsio_to_string(unsigned opcode);
50 
51 
52 #define NINED3D_SM1_VS 0xfffe
53 #define NINED3D_SM1_PS 0xffff
54 
55 #define NINE_MAX_COND_DEPTH 64
56 #define NINE_MAX_LOOP_DEPTH 64
57 
58 #define NINED3DSP_END 0x0000ffff
59 
60 #define NINED3DSPTYPE_FLOAT4  0
61 #define NINED3DSPTYPE_INT4    1
62 #define NINED3DSPTYPE_BOOL    2
63 
64 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
65 
66 #define NINED3DSP_WRITEMASK_MASK  D3DSP_WRITEMASK_ALL
67 #define NINED3DSP_WRITEMASK_SHIFT 16
68 
69 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
70 
71 #define NINED3DSHADER_REL_OP_GT 1
72 #define NINED3DSHADER_REL_OP_EQ 2
73 #define NINED3DSHADER_REL_OP_GE 3
74 #define NINED3DSHADER_REL_OP_LT 4
75 #define NINED3DSHADER_REL_OP_NE 5
76 #define NINED3DSHADER_REL_OP_LE 6
77 
78 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
79 #define NINED3DSIO_OPCODE_FLAGS_MASK  (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
80 
81 #define NINED3DSI_TEXLD_PROJECT 0x1
82 #define NINED3DSI_TEXLD_BIAS    0x2
83 
84 #define NINED3DSP_WRITEMASK_0   0x1
85 #define NINED3DSP_WRITEMASK_1   0x2
86 #define NINED3DSP_WRITEMASK_2   0x4
87 #define NINED3DSP_WRITEMASK_3   0x8
88 #define NINED3DSP_WRITEMASK_ALL 0xf
89 
90 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
91 
92 #define NINE_SWIZZLE4(x,y,z,w) \
93    TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
94 
95 #define NINE_APPLY_SWIZZLE(src, s) \
96    ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
97 
98 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
99 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
100 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
101 
102 /*
103  * NEG     all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
104  * BIAS    <= PS 1.4 (x-0.5)
105  * BIASNEG <= PS 1.4 (-(x-0.5))
106  * SIGN    <= PS 1.4 (2(x-0.5))
107  * SIGNNEG <= PS 1.4 (-2(x-0.5))
108  * COMP    <= PS 1.4 (1-x)
109  * X2       = PS 1.4 (2x)
110  * X2NEG    = PS 1.4 (-2x)
111  * DZ      <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
112  * DW      <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
113  * ABS     >= SM 3.0 (abs(x))
114  * ABSNEG  >= SM 3.0 (-abs(x))
115  * NOT     >= SM 2.0 pedication only
116  */
117 #define NINED3DSPSM_NONE    (D3DSPSM_NONE    >> D3DSP_SRCMOD_SHIFT)
118 #define NINED3DSPSM_NEG     (D3DSPSM_NEG     >> D3DSP_SRCMOD_SHIFT)
119 #define NINED3DSPSM_BIAS    (D3DSPSM_BIAS    >> D3DSP_SRCMOD_SHIFT)
120 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_SIGN    (D3DSPSM_SIGN    >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_COMP    (D3DSPSM_COMP    >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_X2      (D3DSPSM_X2      >> D3DSP_SRCMOD_SHIFT)
125 #define NINED3DSPSM_X2NEG   (D3DSPSM_X2NEG   >> D3DSP_SRCMOD_SHIFT)
126 #define NINED3DSPSM_DZ      (D3DSPSM_DZ      >> D3DSP_SRCMOD_SHIFT)
127 #define NINED3DSPSM_DW      (D3DSPSM_DW      >> D3DSP_SRCMOD_SHIFT)
128 #define NINED3DSPSM_ABS     (D3DSPSM_ABS     >> D3DSP_SRCMOD_SHIFT)
129 #define NINED3DSPSM_ABSNEG  (D3DSPSM_ABSNEG  >> D3DSP_SRCMOD_SHIFT)
130 #define NINED3DSPSM_NOT     (D3DSPSM_NOT     >> D3DSP_SRCMOD_SHIFT)
131 
132 static const char *sm1_mod_str[] =
133 {
134     [NINED3DSPSM_NONE] = "",
135     [NINED3DSPSM_NEG] = "-",
136     [NINED3DSPSM_BIAS] = "bias",
137     [NINED3DSPSM_BIASNEG] = "biasneg",
138     [NINED3DSPSM_SIGN] = "sign",
139     [NINED3DSPSM_SIGNNEG] = "signneg",
140     [NINED3DSPSM_COMP] = "comp",
141     [NINED3DSPSM_X2] = "x2",
142     [NINED3DSPSM_X2NEG] = "x2neg",
143     [NINED3DSPSM_DZ] = "dz",
144     [NINED3DSPSM_DW] = "dw",
145     [NINED3DSPSM_ABS] = "abs",
146     [NINED3DSPSM_ABSNEG] = "-abs",
147     [NINED3DSPSM_NOT] = "not"
148 };
149 
150 static void
sm1_dump_writemask(BYTE mask)151 sm1_dump_writemask(BYTE mask)
152 {
153     if (mask & 1) DUMP("x"); else DUMP("_");
154     if (mask & 2) DUMP("y"); else DUMP("_");
155     if (mask & 4) DUMP("z"); else DUMP("_");
156     if (mask & 8) DUMP("w"); else DUMP("_");
157 }
158 
159 static void
sm1_dump_swizzle(BYTE s)160 sm1_dump_swizzle(BYTE s)
161 {
162     char c[4] = { 'x', 'y', 'z', 'w' };
163     DUMP("%c%c%c%c",
164          c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
165 }
166 
167 static const char sm1_file_char[] =
168 {
169     [D3DSPR_TEMP] = 'r',
170     [D3DSPR_INPUT] = 'v',
171     [D3DSPR_CONST] = 'c',
172     [D3DSPR_ADDR] = 'A',
173     [D3DSPR_RASTOUT] = 'R',
174     [D3DSPR_ATTROUT] = 'D',
175     [D3DSPR_OUTPUT] = 'o',
176     [D3DSPR_CONSTINT] = 'I',
177     [D3DSPR_COLOROUT] = 'C',
178     [D3DSPR_DEPTHOUT] = 'D',
179     [D3DSPR_SAMPLER] = 's',
180     [D3DSPR_CONST2] = 'c',
181     [D3DSPR_CONST3] = 'c',
182     [D3DSPR_CONST4] = 'c',
183     [D3DSPR_CONSTBOOL] = 'B',
184     [D3DSPR_LOOP] = 'L',
185     [D3DSPR_TEMPFLOAT16] = 'h',
186     [D3DSPR_MISCTYPE] = 'M',
187     [D3DSPR_LABEL] = 'X',
188     [D3DSPR_PREDICATE] = 'p'
189 };
190 
191 static void
sm1_dump_reg(BYTE file,INT index)192 sm1_dump_reg(BYTE file, INT index)
193 {
194     switch (file) {
195     case D3DSPR_LOOP:
196         DUMP("aL");
197         break;
198     case D3DSPR_COLOROUT:
199         DUMP("oC%i", index);
200         break;
201     case D3DSPR_DEPTHOUT:
202         DUMP("oDepth");
203         break;
204     case D3DSPR_RASTOUT:
205         DUMP("oRast%i", index);
206         break;
207     case D3DSPR_CONSTINT:
208         DUMP("iconst[%i]", index);
209         break;
210     case D3DSPR_CONSTBOOL:
211         DUMP("bconst[%i]", index);
212         break;
213     default:
214         DUMP("%c%i", sm1_file_char[file], index);
215         break;
216     }
217 }
218 
219 struct sm1_src_param
220 {
221     INT idx;
222     struct sm1_src_param *rel;
223     BYTE file;
224     BYTE swizzle;
225     BYTE mod;
226     BYTE type;
227     union {
228         DWORD d[4];
229         float f[4];
230         int i[4];
231         BOOL b;
232     } imm;
233 };
234 static void
235 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
236 
237 struct sm1_dst_param
238 {
239     INT idx;
240     struct sm1_src_param *rel;
241     BYTE file;
242     BYTE mask;
243     BYTE mod;
244     int8_t shift; /* sint4 */
245     BYTE type;
246 };
247 
248 static inline void
assert_replicate_swizzle(const struct ureg_src * reg)249 assert_replicate_swizzle(const struct ureg_src *reg)
250 {
251     assert(reg->SwizzleY == reg->SwizzleX &&
252            reg->SwizzleZ == reg->SwizzleX &&
253            reg->SwizzleW == reg->SwizzleX);
254 }
255 
256 static void
sm1_dump_immediate(const struct sm1_src_param * param)257 sm1_dump_immediate(const struct sm1_src_param *param)
258 {
259     switch (param->type) {
260     case NINED3DSPTYPE_FLOAT4:
261         DUMP("{ %f %f %f %f }",
262              param->imm.f[0], param->imm.f[1],
263              param->imm.f[2], param->imm.f[3]);
264         break;
265     case NINED3DSPTYPE_INT4:
266         DUMP("{ %i %i %i %i }",
267              param->imm.i[0], param->imm.i[1],
268              param->imm.i[2], param->imm.i[3]);
269         break;
270     case NINED3DSPTYPE_BOOL:
271         DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
272         break;
273     default:
274         assert(0);
275         break;
276     }
277 }
278 
279 static void
sm1_dump_src_param(const struct sm1_src_param * param)280 sm1_dump_src_param(const struct sm1_src_param *param)
281 {
282     if (param->file == NINED3DSPR_IMMEDIATE) {
283         assert(!param->mod &&
284                !param->rel &&
285                param->swizzle == NINED3DSP_NOSWIZZLE);
286         sm1_dump_immediate(param);
287         return;
288     }
289 
290     if (param->mod)
291         DUMP("%s(", sm1_mod_str[param->mod]);
292     if (param->rel) {
293         DUMP("%c[", sm1_file_char[param->file]);
294         sm1_dump_src_param(param->rel);
295         DUMP("+%i]", param->idx);
296     } else {
297         sm1_dump_reg(param->file, param->idx);
298     }
299     if (param->mod)
300        DUMP(")");
301     if (param->swizzle != NINED3DSP_NOSWIZZLE) {
302        DUMP(".");
303        sm1_dump_swizzle(param->swizzle);
304     }
305 }
306 
307 static void
sm1_dump_dst_param(const struct sm1_dst_param * param)308 sm1_dump_dst_param(const struct sm1_dst_param *param)
309 {
310    if (param->mod & NINED3DSPDM_SATURATE)
311       DUMP("sat ");
312    if (param->mod & NINED3DSPDM_PARTIALP)
313       DUMP("pp ");
314    if (param->mod & NINED3DSPDM_CENTROID)
315       DUMP("centroid ");
316    if (param->shift < 0)
317       DUMP("/%u ", 1 << -param->shift);
318    if (param->shift > 0)
319       DUMP("*%u ", 1 << param->shift);
320 
321    if (param->rel) {
322       DUMP("%c[", sm1_file_char[param->file]);
323       sm1_dump_src_param(param->rel);
324       DUMP("+%i]", param->idx);
325    } else {
326       sm1_dump_reg(param->file, param->idx);
327    }
328    if (param->mask != NINED3DSP_WRITEMASK_ALL) {
329       DUMP(".");
330       sm1_dump_writemask(param->mask);
331    }
332 }
333 
334 struct sm1_semantic
335 {
336    struct sm1_dst_param reg;
337    BYTE sampler_type;
338    D3DDECLUSAGE usage;
339    BYTE usage_idx;
340 };
341 
342 struct sm1_op_info
343 {
344     /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
345      * should be ignored completely */
346     unsigned sio;
347     unsigned opcode; /* TGSI_OPCODE_x */
348 
349     /* versions are still set even handler is set */
350     struct {
351         unsigned min;
352         unsigned max;
353     } vert_version, frag_version;
354 
355     /* number of regs parsed outside of special handler */
356     unsigned ndst;
357     unsigned nsrc;
358 
359     /* some instructions don't map perfectly, so use a special handler */
360     translate_instruction_func handler;
361 };
362 
363 struct sm1_instruction
364 {
365     D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
366     BYTE flags;
367     BOOL coissue;
368     BOOL predicated;
369     BYTE ndst;
370     BYTE nsrc;
371     struct sm1_src_param src[4];
372     struct sm1_src_param src_rel[4];
373     struct sm1_src_param pred;
374     struct sm1_src_param dst_rel[1];
375     struct sm1_dst_param dst[1];
376 
377     const struct sm1_op_info *info;
378 };
379 
380 static void
sm1_dump_instruction(struct sm1_instruction * insn,unsigned indent)381 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
382 {
383     unsigned i;
384 
385     /* no info stored for these: */
386     if (insn->opcode == D3DSIO_DCL)
387         return;
388     for (i = 0; i < indent; ++i)
389         DUMP("  ");
390 
391     if (insn->predicated) {
392         DUMP("@");
393         sm1_dump_src_param(&insn->pred);
394         DUMP(" ");
395     }
396     DUMP("%s", d3dsio_to_string(insn->opcode));
397     if (insn->flags) {
398         switch (insn->opcode) {
399         case D3DSIO_TEX:
400             DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
401             break;
402         default:
403             DUMP("_%x", insn->flags);
404             break;
405         }
406     }
407     if (insn->coissue)
408         DUMP("_co");
409     DUMP(" ");
410 
411     for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) {
412         sm1_dump_dst_param(&insn->dst[i]);
413         DUMP(" ");
414     }
415 
416     for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) {
417         sm1_dump_src_param(&insn->src[i]);
418         DUMP(" ");
419     }
420     if (insn->opcode == D3DSIO_DEF ||
421         insn->opcode == D3DSIO_DEFI ||
422         insn->opcode == D3DSIO_DEFB)
423         sm1_dump_immediate(&insn->src[0]);
424 
425     DUMP("\n");
426 }
427 
428 struct sm1_local_const
429 {
430     INT idx;
431     struct ureg_src reg;
432     float f[4]; /* for indirect addressing of float constants */
433 };
434 
435 struct shader_translator
436 {
437     const DWORD *byte_code;
438     const DWORD *parse;
439     const DWORD *parse_next;
440 
441     struct ureg_program *ureg;
442 
443     /* shader version */
444     struct {
445         BYTE major;
446         BYTE minor;
447     } version;
448     unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */
449     unsigned num_constf_allowed;
450     unsigned num_consti_allowed;
451     unsigned num_constb_allowed;
452 
453     bool native_integers;
454     bool inline_subroutines;
455     bool want_texcoord;
456     bool shift_wpos;
457     bool wpos_is_sysval;
458     bool face_is_sysval_integer;
459     bool mul_zero_wins;
460     bool always_output_pointsize;
461     bool no_vs_window_space;
462     unsigned texcoord_sn;
463 
464     struct sm1_instruction insn; /* current instruction */
465 
466     struct {
467         struct ureg_dst *r;
468         struct ureg_dst oPos;
469         struct ureg_dst oPos_out; /* the real output when doing streamout or clipplane emulation */
470         struct ureg_dst oFog;
471         struct ureg_dst oPts;
472         struct ureg_dst oCol[4];
473         struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
474         struct ureg_dst oDepth;
475         struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
476         struct ureg_src v_consecutive; /* copy in temp array of ps inputs for rel addressing */
477         struct ureg_src vPos;
478         struct ureg_src vFace;
479         struct ureg_src s;
480         struct ureg_dst p;
481         struct ureg_dst address;
482         struct ureg_dst a0;
483         struct ureg_dst predicate;
484         struct ureg_dst predicate_tmp;
485         struct ureg_dst predicate_dst;
486         struct ureg_dst tS[8]; /* texture stage registers */
487         struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
488         struct ureg_dst t[8]; /* scratch TEMPs */
489         struct ureg_src vC[2]; /* PS color in */
490         struct ureg_src vT[8]; /* PS texcoord in */
491         struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop/rep ctr */
492         struct ureg_dst aL[NINE_MAX_LOOP_DEPTH]; /* aL emulation */
493     } regs;
494     unsigned num_temp; /* ARRAY_SIZE(regs.r) */
495     unsigned num_scratch;
496     unsigned loop_depth;
497     unsigned loop_depth_max;
498     unsigned cond_depth;
499     unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
500     unsigned cond_labels[NINE_MAX_COND_DEPTH];
501     bool loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
502     bool predicated_activated;
503 
504     unsigned *inst_labels; /* LABEL op */
505     unsigned num_inst_labels;
506 
507     unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
508 
509     struct sm1_local_const *lconstf;
510     unsigned num_lconstf;
511     struct sm1_local_const *lconsti;
512     unsigned num_lconsti;
513     struct sm1_local_const *lconstb;
514     unsigned num_lconstb;
515 
516     bool slots_used[NINE_MAX_CONST_ALL_VS];
517     unsigned *slot_map;
518     unsigned num_slots;
519 
520     bool indirect_const_access;
521     bool failure;
522 
523     struct nine_vs_output_info output_info[16];
524     int num_outputs;
525 
526     struct nine_shader_info *info;
527 
528     int16_t op_info_map[D3DSIO_BREAKP + 1];
529 };
530 
531 #define IS_VS (tx->processor == PIPE_SHADER_VERTEX)
532 #define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT)
533 
534 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
535 
536 static void
537 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
538 
539 static void
sm1_instruction_check(const struct sm1_instruction * insn)540 sm1_instruction_check(const struct sm1_instruction *insn)
541 {
542     if (insn->opcode == D3DSIO_CRS)
543     {
544         if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
545         {
546             DBG("CRS.mask.w\n");
547         }
548     }
549 }
550 
551 static void
nine_record_outputs(struct shader_translator * tx,BYTE Usage,BYTE UsageIndex,int mask,int output_index)552 nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex,
553                     int mask, int output_index)
554 {
555     tx->output_info[tx->num_outputs].output_semantic = Usage;
556     tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex;
557     tx->output_info[tx->num_outputs].mask = mask;
558     tx->output_info[tx->num_outputs].output_index = output_index;
559     tx->num_outputs++;
560 }
561 
nine_float_constant_src(struct shader_translator * tx,int idx)562 static struct ureg_src nine_float_constant_src(struct shader_translator *tx, int idx)
563 {
564     struct ureg_src src;
565 
566     if (tx->slot_map)
567         idx = tx->slot_map[idx];
568     /* vswp constant handling: we use two buffers
569      * to fit all the float constants. The special handling
570      * doesn't need to be elsewhere, because all the instructions
571      * accessing the constants directly are VS1, and swvp
572      * is VS >= 2 */
573     if (tx->info->swvp_on && idx >= 4096) {
574         /* TODO: swvp rel is broken if many constants are used */
575         src = ureg_src_register(TGSI_FILE_CONSTANT, idx - 4096);
576         src = ureg_src_dimension(src, 1);
577     } else {
578         src = ureg_src_register(TGSI_FILE_CONSTANT, idx);
579         src = ureg_src_dimension(src, 0);
580     }
581 
582     if (!tx->info->swvp_on)
583         tx->slots_used[idx] = true;
584     if (tx->info->const_float_slots < (idx + 1))
585         tx->info->const_float_slots = idx + 1;
586     if (tx->num_slots < (idx + 1))
587         tx->num_slots = idx + 1;
588 
589     return src;
590 }
591 
nine_integer_constant_src(struct shader_translator * tx,int idx)592 static struct ureg_src nine_integer_constant_src(struct shader_translator *tx, int idx)
593 {
594     struct ureg_src src;
595 
596     if (tx->info->swvp_on) {
597         src = ureg_src_register(TGSI_FILE_CONSTANT, idx);
598         src = ureg_src_dimension(src, 2);
599     } else {
600         unsigned slot_idx = tx->info->const_i_base + idx;
601         if (tx->slot_map)
602             slot_idx = tx->slot_map[slot_idx];
603         src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
604         src = ureg_src_dimension(src, 0);
605         tx->slots_used[slot_idx] = true;
606         tx->info->int_slots_used[idx] = true;
607         if (tx->num_slots < (slot_idx + 1))
608             tx->num_slots = slot_idx + 1;
609     }
610 
611     if (tx->info->const_int_slots < (idx + 1))
612         tx->info->const_int_slots = idx + 1;
613 
614     return src;
615 }
616 
nine_boolean_constant_src(struct shader_translator * tx,int idx)617 static struct ureg_src nine_boolean_constant_src(struct shader_translator *tx, int idx)
618 {
619     struct ureg_src src;
620 
621     char r = idx / 4;
622     char s = idx & 3;
623 
624     if (tx->info->swvp_on) {
625         src = ureg_src_register(TGSI_FILE_CONSTANT, r);
626         src = ureg_src_dimension(src, 3);
627     } else {
628         unsigned slot_idx = tx->info->const_b_base + r;
629         if (tx->slot_map)
630             slot_idx = tx->slot_map[slot_idx];
631         src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
632         src = ureg_src_dimension(src, 0);
633         tx->slots_used[slot_idx] = true;
634         tx->info->bool_slots_used[idx] = true;
635         if (tx->num_slots < (slot_idx + 1))
636             tx->num_slots = slot_idx + 1;
637     }
638     src = ureg_swizzle(src, s, s, s, s);
639 
640     if (tx->info->const_bool_slots < (idx + 1))
641         tx->info->const_bool_slots = idx + 1;
642 
643     return src;
644 }
645 
nine_special_constant_src(struct shader_translator * tx,int idx)646 static struct ureg_src nine_special_constant_src(struct shader_translator *tx, int idx)
647 {
648     struct ureg_src src;
649 
650     unsigned slot_idx = idx + (IS_PS ? NINE_MAX_CONST_PS_SPE_OFFSET :
651         (tx->info->swvp_on ? NINE_MAX_CONST_SWVP_SPE_OFFSET : NINE_MAX_CONST_VS_SPE_OFFSET));
652 
653     if (!tx->info->swvp_on && tx->slot_map)
654         slot_idx = tx->slot_map[slot_idx];
655     src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
656     src = ureg_src_dimension(src, 0);
657 
658     if (!tx->info->swvp_on)
659         tx->slots_used[slot_idx] = true;
660     if (tx->num_slots < (slot_idx + 1))
661         tx->num_slots = slot_idx + 1;
662 
663     return src;
664 }
665 
666 static bool
tx_lconstf(struct shader_translator * tx,struct ureg_src * src,INT index)667 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
668 {
669    INT i;
670 
671    if (index < 0 || index >= tx->num_constf_allowed) {
672        tx->failure = true;
673        return false;
674    }
675    for (i = 0; i < tx->num_lconstf; ++i) {
676       if (tx->lconstf[i].idx == index) {
677          *src = tx->lconstf[i].reg;
678          return true;
679       }
680    }
681    return false;
682 }
683 static bool
tx_lconsti(struct shader_translator * tx,struct ureg_src * src,INT index)684 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
685 {
686    int i;
687 
688    if (index < 0 || index >= tx->num_consti_allowed) {
689        tx->failure = true;
690        return false;
691    }
692    for (i = 0; i < tx->num_lconsti; ++i) {
693       if (tx->lconsti[i].idx == index) {
694          *src = tx->lconsti[i].reg;
695          return true;
696       }
697    }
698    return false;
699 }
700 static bool
tx_lconstb(struct shader_translator * tx,struct ureg_src * src,INT index)701 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
702 {
703    int i;
704 
705    if (index < 0 || index >= tx->num_constb_allowed) {
706        tx->failure = true;
707        return false;
708    }
709    for (i = 0; i < tx->num_lconstb; ++i) {
710       if (tx->lconstb[i].idx == index) {
711          *src = tx->lconstb[i].reg;
712          return true;
713       }
714    }
715    return false;
716 }
717 
718 static void
tx_set_lconstf(struct shader_translator * tx,INT index,float f[4])719 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
720 {
721     unsigned n;
722 
723     FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
724 
725     for (n = 0; n < tx->num_lconstf; ++n)
726         if (tx->lconstf[n].idx == index)
727             break;
728     if (n == tx->num_lconstf) {
729        if ((n % 8) == 0) {
730           tx->lconstf = REALLOC(tx->lconstf,
731                                 (n + 0) * sizeof(tx->lconstf[0]),
732                                 (n + 8) * sizeof(tx->lconstf[0]));
733           assert(tx->lconstf);
734        }
735        tx->num_lconstf++;
736     }
737     tx->lconstf[n].idx = index;
738     tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
739 
740     memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f));
741 }
742 static void
tx_set_lconsti(struct shader_translator * tx,INT index,int i[4])743 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
744 {
745     unsigned n;
746 
747     FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
748 
749     for (n = 0; n < tx->num_lconsti; ++n)
750         if (tx->lconsti[n].idx == index)
751             break;
752     if (n == tx->num_lconsti) {
753        if ((n % 8) == 0) {
754           tx->lconsti = REALLOC(tx->lconsti,
755                                 (n + 0) * sizeof(tx->lconsti[0]),
756                                 (n + 8) * sizeof(tx->lconsti[0]));
757           assert(tx->lconsti);
758        }
759        tx->num_lconsti++;
760     }
761 
762     tx->lconsti[n].idx = index;
763     tx->lconsti[n].reg = tx->native_integers ?
764        ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
765        ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
766 }
767 static void
tx_set_lconstb(struct shader_translator * tx,INT index,BOOL b)768 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
769 {
770     unsigned n;
771 
772     FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
773 
774     for (n = 0; n < tx->num_lconstb; ++n)
775         if (tx->lconstb[n].idx == index)
776             break;
777     if (n == tx->num_lconstb) {
778        if ((n % 8) == 0) {
779           tx->lconstb = REALLOC(tx->lconstb,
780                                 (n + 0) * sizeof(tx->lconstb[0]),
781                                 (n + 8) * sizeof(tx->lconstb[0]));
782           assert(tx->lconstb);
783        }
784        tx->num_lconstb++;
785     }
786 
787     tx->lconstb[n].idx = index;
788     tx->lconstb[n].reg = tx->native_integers ?
789        ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
790        ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
791 }
792 
793 static inline struct ureg_dst
tx_scratch(struct shader_translator * tx)794 tx_scratch(struct shader_translator *tx)
795 {
796     if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) {
797         tx->failure = true;
798         return tx->regs.t[0];
799     }
800     if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
801         tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
802     return tx->regs.t[tx->num_scratch++];
803 }
804 
805 static inline struct ureg_dst
tx_scratch_scalar(struct shader_translator * tx)806 tx_scratch_scalar(struct shader_translator *tx)
807 {
808     return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
809 }
810 
811 static inline struct ureg_src
tx_src_scalar(struct ureg_dst dst)812 tx_src_scalar(struct ureg_dst dst)
813 {
814     struct ureg_src src = ureg_src(dst);
815     int c = ffs(dst.WriteMask) - 1;
816     if (dst.WriteMask == (1 << c))
817         src = ureg_scalar(src, c);
818     return src;
819 }
820 
821 static inline void
tx_temp_alloc(struct shader_translator * tx,INT idx)822 tx_temp_alloc(struct shader_translator *tx, INT idx)
823 {
824     assert(idx >= 0);
825     if (idx >= tx->num_temp) {
826        unsigned k = tx->num_temp;
827        unsigned n = idx + 1;
828        tx->regs.r = REALLOC(tx->regs.r,
829                             k * sizeof(tx->regs.r[0]),
830                             n * sizeof(tx->regs.r[0]));
831        for (; k < n; ++k)
832           tx->regs.r[k] = ureg_dst_undef();
833        tx->num_temp = n;
834     }
835     if (ureg_dst_is_undef(tx->regs.r[idx]))
836         tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
837 }
838 
839 static inline void
tx_addr_alloc(struct shader_translator * tx,INT idx)840 tx_addr_alloc(struct shader_translator *tx, INT idx)
841 {
842     assert(idx == 0);
843     if (ureg_dst_is_undef(tx->regs.address))
844         tx->regs.address = ureg_DECL_address(tx->ureg);
845     if (ureg_dst_is_undef(tx->regs.a0))
846         tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
847 }
848 
849 static inline bool
TEX_if_fetch4(struct shader_translator * tx,struct ureg_dst dst,unsigned target,struct ureg_src src0,struct ureg_src src1,INT idx)850 TEX_if_fetch4(struct shader_translator *tx, struct ureg_dst dst,
851               unsigned target, struct ureg_src src0,
852               struct ureg_src src1, INT idx)
853 {
854     struct ureg_dst tmp;
855     struct ureg_src src_tg4[3] = {src0, ureg_imm1f(tx->ureg, 0.f), src1};
856 
857     if (!(tx->info->fetch4 & (1 << idx)))
858         return false;
859 
860     /* TODO: needs more tests, but this feature is not much used at all */
861 
862     tmp = tx_scratch(tx);
863     ureg_tex_insn(tx->ureg, TGSI_OPCODE_TG4, &tmp, 1, target, TGSI_RETURN_TYPE_FLOAT,
864                   NULL, 0, src_tg4, 3);
865     ureg_MOV(tx->ureg, dst, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z, X, Y, W)));
866     return true;
867 }
868 
869 /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
870  * the projection should be applied on the texture. It doesn't
871  * apply on texkill.
872  * The doc is very imprecise here (it says the projection is done
873  * before rasterization, thus in vs, which seems wrong since ps instructions
874  * are affected differently)
875  * For now we only apply to the ps TEX instruction and TEXBEM.
876  * Perhaps some other instructions would need it */
877 static inline void
apply_ps1x_projection(struct shader_translator * tx,struct ureg_dst dst,struct ureg_src src,INT idx)878 apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
879                       struct ureg_src src, INT idx)
880 {
881     struct ureg_dst tmp;
882     unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
883 
884     /* no projection */
885     if (dim == 1) {
886         ureg_MOV(tx->ureg, dst, src);
887     } else {
888         tmp = tx_scratch_scalar(tx);
889         ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
890         ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
891     }
892 }
893 
894 static inline void
TEX_with_ps1x_projection(struct shader_translator * tx,struct ureg_dst dst,unsigned target,struct ureg_src src0,struct ureg_src src1,INT idx)895 TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
896                          unsigned target, struct ureg_src src0,
897                          struct ureg_src src1, INT idx)
898 {
899     unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
900     struct ureg_dst tmp;
901     bool shadow = !!(tx->info->sampler_mask_shadow & (1 << idx));
902 
903     /* dim == 1: no projection
904      * Looks like must be disabled when it makes no
905      * sense according the texture dimensions
906      */
907     if (dim == 1 || (dim <= target && !shadow)) {
908         ureg_TEX(tx->ureg, dst, target, src0, src1);
909     } else if (dim == 4) {
910         ureg_TXP(tx->ureg, dst, target, src0, src1);
911     } else {
912         tmp = tx_scratch(tx);
913         apply_ps1x_projection(tx, tmp, src0, idx);
914         ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
915     }
916 }
917 
918 static inline void
tx_texcoord_alloc(struct shader_translator * tx,INT idx)919 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
920 {
921     assert(IS_PS);
922     assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT));
923     if (ureg_src_is_undef(tx->regs.vT[idx]))
924        tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
925                                              TGSI_INTERPOLATE_PERSPECTIVE);
926 }
927 
928 static inline unsigned *
tx_bgnloop(struct shader_translator * tx)929 tx_bgnloop(struct shader_translator *tx)
930 {
931     tx->loop_depth++;
932     if (tx->loop_depth_max < tx->loop_depth)
933         tx->loop_depth_max = tx->loop_depth;
934     assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
935     return &tx->loop_labels[tx->loop_depth - 1];
936 }
937 
938 static inline unsigned *
tx_endloop(struct shader_translator * tx)939 tx_endloop(struct shader_translator *tx)
940 {
941     assert(tx->loop_depth);
942     tx->loop_depth--;
943     ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
944                      ureg_get_instruction_number(tx->ureg));
945     return &tx->loop_labels[tx->loop_depth];
946 }
947 
948 static struct ureg_dst
tx_get_loopctr(struct shader_translator * tx,bool loop_or_rep)949 tx_get_loopctr(struct shader_translator *tx, bool loop_or_rep)
950 {
951     const unsigned l = tx->loop_depth - 1;
952 
953     if (!tx->loop_depth)
954     {
955         DBG("loop counter requested outside of loop\n");
956         return ureg_dst_undef();
957     }
958 
959     if (ureg_dst_is_undef(tx->regs.rL[l])) {
960         /* loop or rep ctr creation */
961         tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
962         if (loop_or_rep)
963             tx->regs.aL[l] = ureg_DECL_local_temporary(tx->ureg);
964         tx->loop_or_rep[l] = loop_or_rep;
965     }
966     /* loop - rep - endloop - endrep not allowed */
967     assert(tx->loop_or_rep[l] == loop_or_rep);
968 
969     return tx->regs.rL[l];
970 }
971 
972 static struct ureg_dst
tx_get_loopal(struct shader_translator * tx)973 tx_get_loopal(struct shader_translator *tx)
974 {
975     int loop_level = tx->loop_depth - 1;
976 
977     while (loop_level >= 0) {
978         /* handle loop - rep - endrep - endloop case */
979         if (tx->loop_or_rep[loop_level])
980             /* the aL value is in the Y component (nine implementation) */
981             return tx->regs.aL[loop_level];
982         loop_level--;
983     }
984 
985     DBG("aL counter requested outside of loop\n");
986     return ureg_dst_undef();
987 }
988 
989 static inline unsigned *
tx_cond(struct shader_translator * tx)990 tx_cond(struct shader_translator *tx)
991 {
992    assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
993    tx->cond_depth++;
994    return &tx->cond_labels[tx->cond_depth - 1];
995 }
996 
997 static inline unsigned *
tx_elsecond(struct shader_translator * tx)998 tx_elsecond(struct shader_translator *tx)
999 {
1000    assert(tx->cond_depth);
1001    return &tx->cond_labels[tx->cond_depth - 1];
1002 }
1003 
1004 static inline void
tx_endcond(struct shader_translator * tx)1005 tx_endcond(struct shader_translator *tx)
1006 {
1007    assert(tx->cond_depth);
1008    tx->cond_depth--;
1009    ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
1010                     ureg_get_instruction_number(tx->ureg));
1011 }
1012 
1013 static inline struct ureg_dst
nine_ureg_dst_register(unsigned file,int index)1014 nine_ureg_dst_register(unsigned file, int index)
1015 {
1016     return ureg_dst(ureg_src_register(file, index));
1017 }
1018 
1019 static inline struct ureg_src
nine_get_position_input(struct shader_translator * tx)1020 nine_get_position_input(struct shader_translator *tx)
1021 {
1022     struct ureg_program *ureg = tx->ureg;
1023 
1024     if (tx->wpos_is_sysval)
1025         return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
1026     else
1027         return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION,
1028                                   0, TGSI_INTERPOLATE_LINEAR);
1029 }
1030 
1031 static struct ureg_src
tx_src_param(struct shader_translator * tx,const struct sm1_src_param * param)1032 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
1033 {
1034     struct ureg_program *ureg = tx->ureg;
1035     struct ureg_src src;
1036     struct ureg_dst tmp;
1037 
1038     assert(!param->rel || (IS_VS && param->file == D3DSPR_CONST) ||
1039         (param->file == D3DSPR_INPUT && tx->version.major == 3));
1040 
1041     switch (param->file)
1042     {
1043     case D3DSPR_TEMP:
1044         tx_temp_alloc(tx, param->idx);
1045         src = ureg_src(tx->regs.r[param->idx]);
1046         break;
1047  /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1048     case D3DSPR_ADDR:
1049         if (IS_VS) {
1050             assert(param->idx == 0);
1051             /* the address register (vs only) must be
1052              * assigned before use */
1053             assert(!ureg_dst_is_undef(tx->regs.a0));
1054             /* Round to lowest for vs1.1 (contrary to the doc), else
1055              * round to nearest */
1056             if (tx->version.major < 2 && tx->version.minor < 2)
1057                 ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
1058             else
1059                 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
1060             src = ureg_src(tx->regs.address);
1061         } else {
1062             if (tx->version.major < 2 && tx->version.minor < 4) {
1063                 /* no subroutines, so should be defined */
1064                 src = ureg_src(tx->regs.tS[param->idx]);
1065             } else {
1066                 tx_texcoord_alloc(tx, param->idx);
1067                 src = tx->regs.vT[param->idx];
1068             }
1069         }
1070         break;
1071     case D3DSPR_INPUT:
1072         if (IS_VS) {
1073             src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1074         } else {
1075             if (tx->version.major < 3) {
1076                 src = ureg_DECL_fs_input_centroid(
1077                     ureg, TGSI_SEMANTIC_COLOR, param->idx,
1078                     tx->info->color_flatshade ? TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE,
1079                     tx->info->force_color_in_centroid ?
1080                       TGSI_INTERPOLATE_LOC_CENTROID : 0,
1081                     0, 1);
1082             } else {
1083                 if(param->rel) {
1084                     /* Copy all inputs (non consecutive)
1085                      * to temp array (consecutive).
1086                      * This is not good for performance.
1087                      * A better way would be to have inputs
1088                      * consecutive (would need implement alternative
1089                      * way to match vs outputs and ps inputs).
1090                      * However even with the better way, the temp array
1091                      * copy would need to be used if some inputs
1092                      * are not GENERIC or if they have different
1093                      * interpolation flag. */
1094                     if (ureg_src_is_undef(tx->regs.v_consecutive)) {
1095                         int i;
1096                         tx->regs.v_consecutive = ureg_src(ureg_DECL_array_temporary(ureg, 10, 0));
1097                         for (i = 0; i < 10; i++) {
1098                             if (!ureg_src_is_undef(tx->regs.v[i]))
1099                                 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), tx->regs.v[i]);
1100                             else
1101                                 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
1102                         }
1103                     }
1104                     src = ureg_src_array_offset(tx->regs.v_consecutive, param->idx);
1105                 } else {
1106                     assert(param->idx < ARRAY_SIZE(tx->regs.v));
1107                     src = tx->regs.v[param->idx];
1108                 }
1109             }
1110         }
1111         if (param->rel)
1112             src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1113         break;
1114     case D3DSPR_PREDICATE:
1115         if (ureg_dst_is_undef(tx->regs.predicate)) {
1116             /* Forbidden to use the predicate register before being set */
1117             tx->failure = true;
1118             tx->regs.predicate = ureg_DECL_temporary(tx->ureg);
1119         }
1120         src = ureg_src(tx->regs.predicate);
1121         break;
1122     case D3DSPR_SAMPLER:
1123         assert(param->mod == NINED3DSPSM_NONE);
1124         /* assert(param->swizzle == NINED3DSP_NOSWIZZLE); Passed by wine tests */
1125         src = ureg_DECL_sampler(ureg, param->idx);
1126         break;
1127     case D3DSPR_CONST:
1128         if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
1129             src = nine_float_constant_src(tx, param->idx);
1130             if (param->rel) {
1131                 tx->indirect_const_access = true;
1132                 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1133             }
1134         }
1135         if (!IS_VS && tx->version.major < 2) {
1136             /* ps 1.X clamps constants */
1137             tmp = tx_scratch(tx);
1138             ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
1139             ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
1140             src = ureg_src(tmp);
1141         }
1142         break;
1143     case D3DSPR_CONST2:
1144     case D3DSPR_CONST3:
1145     case D3DSPR_CONST4:
1146         DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
1147         assert(!"CONST2/3/4");
1148         src = ureg_imm1f(ureg, 0.0f);
1149         break;
1150     case D3DSPR_CONSTINT:
1151         /* relative adressing only possible for float constants in vs */
1152         if (!tx_lconsti(tx, &src, param->idx))
1153             src = nine_integer_constant_src(tx, param->idx);
1154         break;
1155     case D3DSPR_CONSTBOOL:
1156         if (!tx_lconstb(tx, &src, param->idx))
1157             src = nine_boolean_constant_src(tx, param->idx);
1158         break;
1159     case D3DSPR_LOOP:
1160         if (ureg_dst_is_undef(tx->regs.address))
1161             tx->regs.address = ureg_DECL_address(ureg);
1162         if (!tx->native_integers)
1163             ureg_ARR(ureg, tx->regs.address,
1164                      ureg_scalar(ureg_src(tx_get_loopal(tx)), TGSI_SWIZZLE_Y));
1165         else
1166             ureg_UARL(ureg, tx->regs.address,
1167                       ureg_scalar(ureg_src(tx_get_loopal(tx)), TGSI_SWIZZLE_Y));
1168         src = ureg_src(tx->regs.address);
1169         break;
1170     case D3DSPR_MISCTYPE:
1171         switch (param->idx) {
1172         case D3DSMO_POSITION:
1173            if (ureg_src_is_undef(tx->regs.vPos))
1174               tx->regs.vPos = nine_get_position_input(tx);
1175            if (tx->shift_wpos) {
1176                /* TODO: do this only once */
1177                struct ureg_dst wpos = tx_scratch(tx);
1178                ureg_ADD(ureg, wpos, tx->regs.vPos,
1179                         ureg_imm4f(ureg, -0.5f, -0.5f, 0.0f, 0.0f));
1180                src = ureg_src(wpos);
1181            } else {
1182                src = tx->regs.vPos;
1183            }
1184            break;
1185         case D3DSMO_FACE:
1186            if (ureg_src_is_undef(tx->regs.vFace)) {
1187                if (tx->face_is_sysval_integer) {
1188                    tmp = ureg_DECL_temporary(ureg);
1189                    tx->regs.vFace =
1190                        ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
1191 
1192                    /* convert bool to float */
1193                    ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X),
1194                              ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1));
1195                    tx->regs.vFace = ureg_src(tmp);
1196                } else {
1197                    tx->regs.vFace = ureg_DECL_fs_input(ureg,
1198                                                        TGSI_SEMANTIC_FACE, 0,
1199                                                        TGSI_INTERPOLATE_CONSTANT);
1200                }
1201                tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
1202            }
1203            src = tx->regs.vFace;
1204            break;
1205         default:
1206             assert(!"invalid src D3DSMO");
1207             break;
1208         }
1209         break;
1210     case D3DSPR_TEMPFLOAT16:
1211         break;
1212     default:
1213         assert(!"invalid src D3DSPR");
1214     }
1215 
1216     switch (param->mod) {
1217     case NINED3DSPSM_DW:
1218         tmp = tx_scratch(tx);
1219         /* NOTE: app is not allowed to read w with this modifier */
1220         ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), ureg_scalar(src, TGSI_SWIZZLE_W));
1221         ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
1222         src = ureg_src(tmp);
1223         break;
1224     case NINED3DSPSM_DZ:
1225         tmp = tx_scratch(tx);
1226         /* NOTE: app is not allowed to read z with this modifier */
1227         ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), ureg_scalar(src, TGSI_SWIZZLE_Z));
1228         ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
1229         src = ureg_src(tmp);
1230         break;
1231     default:
1232         break;
1233     }
1234 
1235     if (param->swizzle != NINED3DSP_NOSWIZZLE && param->file != D3DSPR_SAMPLER)
1236         src = ureg_swizzle(src,
1237                            (param->swizzle >> 0) & 0x3,
1238                            (param->swizzle >> 2) & 0x3,
1239                            (param->swizzle >> 4) & 0x3,
1240                            (param->swizzle >> 6) & 0x3);
1241 
1242     switch (param->mod) {
1243     case NINED3DSPSM_ABS:
1244         src = ureg_abs(src);
1245         break;
1246     case NINED3DSPSM_ABSNEG:
1247         src = ureg_negate(ureg_abs(src));
1248         break;
1249     case NINED3DSPSM_NEG:
1250         src = ureg_negate(src);
1251         break;
1252     case NINED3DSPSM_BIAS:
1253         tmp = tx_scratch(tx);
1254         ureg_ADD(ureg, tmp, src, ureg_imm1f(ureg, -0.5f));
1255         src = ureg_src(tmp);
1256         break;
1257     case NINED3DSPSM_BIASNEG:
1258         tmp = tx_scratch(tx);
1259         ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 0.5f), ureg_negate(src));
1260         src = ureg_src(tmp);
1261         break;
1262     case NINED3DSPSM_NOT:
1263         if (tx->native_integers && param->file == D3DSPR_CONSTBOOL) {
1264             tmp = tx_scratch(tx);
1265             ureg_NOT(ureg, tmp, src);
1266             src = ureg_src(tmp);
1267             break;
1268         } else { /* predicate */
1269             tmp = tx_scratch(tx);
1270             ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
1271             src = ureg_src(tmp);
1272         }
1273         FALLTHROUGH;
1274     case NINED3DSPSM_COMP:
1275         tmp = tx_scratch(tx);
1276         ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
1277         src = ureg_src(tmp);
1278         break;
1279     case NINED3DSPSM_DZ:
1280     case NINED3DSPSM_DW:
1281         /* Already handled*/
1282         break;
1283     case NINED3DSPSM_SIGN:
1284         tmp = tx_scratch(tx);
1285         ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1286         src = ureg_src(tmp);
1287         break;
1288     case NINED3DSPSM_SIGNNEG:
1289         tmp = tx_scratch(tx);
1290         ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
1291         src = ureg_src(tmp);
1292         break;
1293     case NINED3DSPSM_X2:
1294         tmp = tx_scratch(tx);
1295         ureg_ADD(ureg, tmp, src, src);
1296         src = ureg_src(tmp);
1297         break;
1298     case NINED3DSPSM_X2NEG:
1299         tmp = tx_scratch(tx);
1300         ureg_ADD(ureg, tmp, src, src);
1301         src = ureg_negate(ureg_src(tmp));
1302         break;
1303     default:
1304         assert(param->mod == NINED3DSPSM_NONE);
1305         break;
1306     }
1307 
1308     return src;
1309 }
1310 
1311 static struct ureg_dst
_tx_dst_param(struct shader_translator * tx,const struct sm1_dst_param * param)1312 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1313 {
1314     struct ureg_dst dst;
1315 
1316     switch (param->file)
1317     {
1318     case D3DSPR_TEMP:
1319         assert(!param->rel);
1320         tx_temp_alloc(tx, param->idx);
1321         dst = tx->regs.r[param->idx];
1322         break;
1323  /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1324     case D3DSPR_ADDR:
1325         assert(!param->rel);
1326         if (tx->version.major < 2 && !IS_VS) {
1327             if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1328                 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1329             dst = tx->regs.tS[param->idx];
1330         } else
1331         if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1332             tx_texcoord_alloc(tx, param->idx);
1333             dst = ureg_dst(tx->regs.vT[param->idx]);
1334         } else {
1335             tx_addr_alloc(tx, param->idx);
1336             dst = tx->regs.a0;
1337         }
1338         break;
1339     case D3DSPR_RASTOUT:
1340         assert(!param->rel);
1341         switch (param->idx) {
1342         case 0:
1343             if (ureg_dst_is_undef(tx->regs.oPos)) {
1344                 if (tx->info->clip_plane_emulation > 0) {
1345                     tx->regs.oPos = ureg_DECL_temporary(tx->ureg);
1346                 } else {
1347                     tx->regs.oPos = tx->regs.oPos_out;
1348                 }
1349             }
1350             dst = tx->regs.oPos;
1351             break;
1352         case 1:
1353             if (ureg_dst_is_undef(tx->regs.oFog))
1354                 tx->regs.oFog =
1355                     ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16));
1356             dst = tx->regs.oFog;
1357             break;
1358         case 2:
1359             if (ureg_dst_is_undef(tx->regs.oPts))
1360                 tx->regs.oPts = ureg_DECL_temporary(tx->ureg);
1361             dst = tx->regs.oPts;
1362             break;
1363         default:
1364             assert(0);
1365             break;
1366         }
1367         break;
1368  /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1369     case D3DSPR_OUTPUT:
1370         if (tx->version.major < 3) {
1371             assert(!param->rel);
1372             dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1373         } else {
1374             assert(!param->rel); /* TODO */
1375             assert(param->idx < ARRAY_SIZE(tx->regs.o));
1376             dst = tx->regs.o[param->idx];
1377         }
1378         break;
1379     case D3DSPR_ATTROUT: /* VS */
1380     case D3DSPR_COLOROUT: /* PS */
1381         assert(param->idx >= 0 && param->idx < 4);
1382         assert(!param->rel);
1383         tx->info->rt_mask |= 1 << param->idx;
1384         if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
1385             /* ps < 3: oCol[0] will have fog blending afterward
1386              * ps: oCol[0] might have alphatest afterward */
1387             if (!IS_VS && param->idx == 0) {
1388                 tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
1389             } else {
1390                 tx->regs.oCol[param->idx] =
1391                     ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1392             }
1393         }
1394         dst = tx->regs.oCol[param->idx];
1395         if (IS_VS && tx->version.major < 3)
1396             dst = ureg_saturate(dst);
1397         break;
1398     case D3DSPR_DEPTHOUT:
1399         assert(!param->rel);
1400         if (ureg_dst_is_undef(tx->regs.oDepth))
1401            tx->regs.oDepth =
1402               ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1403                                       TGSI_WRITEMASK_Z, 0, 1);
1404         dst = tx->regs.oDepth; /* XXX: must write .z component */
1405         break;
1406     case D3DSPR_PREDICATE:
1407         if (ureg_dst_is_undef(tx->regs.predicate))
1408             tx->regs.predicate = ureg_DECL_temporary(tx->ureg);
1409         dst = tx->regs.predicate;
1410         break;
1411     case D3DSPR_TEMPFLOAT16:
1412         DBG("unhandled D3DSPR: %u\n", param->file);
1413         break;
1414     default:
1415         assert(!"invalid dst D3DSPR");
1416         break;
1417     }
1418     if (param->rel)
1419         dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1420 
1421     if (param->mask != NINED3DSP_WRITEMASK_ALL)
1422         dst = ureg_writemask(dst, param->mask);
1423     if (param->mod & NINED3DSPDM_SATURATE)
1424         dst = ureg_saturate(dst);
1425 
1426     if (tx->predicated_activated) {
1427         tx->regs.predicate_dst = dst;
1428         dst = tx->regs.predicate_tmp;
1429     }
1430 
1431     return dst;
1432 }
1433 
1434 static struct ureg_dst
tx_dst_param(struct shader_translator * tx,const struct sm1_dst_param * param)1435 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1436 {
1437     if (param->shift) {
1438         tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1439         return tx->regs.tdst;
1440     }
1441     return _tx_dst_param(tx, param);
1442 }
1443 
1444 static void
tx_apply_dst0_modifiers(struct shader_translator * tx)1445 tx_apply_dst0_modifiers(struct shader_translator *tx)
1446 {
1447     struct ureg_dst rdst;
1448     float f;
1449 
1450     if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1451         return;
1452     rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1453 
1454     assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1455 
1456     if (tx->insn.dst[0].shift < 0)
1457         f = 1.0f / (1 << -tx->insn.dst[0].shift);
1458     else
1459         f = 1 << tx->insn.dst[0].shift;
1460 
1461     ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1462 }
1463 
1464 static struct ureg_src
tx_dst_param_as_src(struct shader_translator * tx,const struct sm1_dst_param * param)1465 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1466 {
1467     struct ureg_src src;
1468 
1469     assert(!param->shift);
1470     assert(!(param->mod & NINED3DSPDM_SATURATE));
1471 
1472     switch (param->file) {
1473     case D3DSPR_INPUT:
1474         if (IS_VS) {
1475             src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1476         } else {
1477             assert(!param->rel);
1478             assert(param->idx < ARRAY_SIZE(tx->regs.v));
1479             src = tx->regs.v[param->idx];
1480         }
1481         break;
1482     default:
1483         src = ureg_src(tx_dst_param(tx, param));
1484         break;
1485     }
1486     if (param->rel)
1487         src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1488 
1489     if (!param->mask)
1490         WARN("mask is 0, using identity swizzle\n");
1491 
1492     if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1493         char s[4];
1494         int n;
1495         int c;
1496         for (n = 0, c = 0; c < 4; ++c)
1497             if (param->mask & (1 << c))
1498                 s[n++] = c;
1499         assert(n);
1500         for (c = n; c < 4; ++c)
1501             s[c] = s[n - 1];
1502         src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1503     }
1504     return src;
1505 }
1506 
1507 static HRESULT
NineTranslateInstruction_Mkxn(struct shader_translator * tx,const unsigned k,const unsigned n)1508 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1509 {
1510     struct ureg_program *ureg = tx->ureg;
1511     struct ureg_dst dst;
1512     struct ureg_src src[2];
1513     struct sm1_src_param *src_mat = &tx->insn.src[1];
1514     unsigned i;
1515 
1516     dst = tx_dst_param(tx, &tx->insn.dst[0]);
1517     src[0] = tx_src_param(tx, &tx->insn.src[0]);
1518 
1519     for (i = 0; i < n; i++)
1520     {
1521         const unsigned m = (1 << i);
1522 
1523         src[1] = tx_src_param(tx, src_mat);
1524         src_mat->idx++;
1525 
1526         if (!(dst.WriteMask & m))
1527             continue;
1528 
1529         /* XXX: src == dst case ? */
1530 
1531         switch (k) {
1532         case 3:
1533             ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1534             break;
1535         case 4:
1536             ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1537             break;
1538         default:
1539             DBG("invalid operation: M%ux%u\n", m, n);
1540             break;
1541         }
1542     }
1543 
1544     return D3D_OK;
1545 }
1546 
1547 #define VNOTSUPPORTED   0, 0
1548 #define V(maj, min)     (((maj) << 8) | (min))
1549 
1550 static inline const char *
d3dsio_to_string(unsigned opcode)1551 d3dsio_to_string( unsigned opcode )
1552 {
1553     static const char *names[] = {
1554         "NOP",
1555         "MOV",
1556         "ADD",
1557         "SUB",
1558         "MAD",
1559         "MUL",
1560         "RCP",
1561         "RSQ",
1562         "DP3",
1563         "DP4",
1564         "MIN",
1565         "MAX",
1566         "SLT",
1567         "SGE",
1568         "EXP",
1569         "LOG",
1570         "LIT",
1571         "DST",
1572         "LRP",
1573         "FRC",
1574         "M4x4",
1575         "M4x3",
1576         "M3x4",
1577         "M3x3",
1578         "M3x2",
1579         "CALL",
1580         "CALLNZ",
1581         "LOOP",
1582         "RET",
1583         "ENDLOOP",
1584         "LABEL",
1585         "DCL",
1586         "POW",
1587         "CRS",
1588         "SGN",
1589         "ABS",
1590         "NRM",
1591         "SINCOS",
1592         "REP",
1593         "ENDREP",
1594         "IF",
1595         "IFC",
1596         "ELSE",
1597         "ENDIF",
1598         "BREAK",
1599         "BREAKC",
1600         "MOVA",
1601         "DEFB",
1602         "DEFI",
1603         NULL,
1604         NULL,
1605         NULL,
1606         NULL,
1607         NULL,
1608         NULL,
1609         NULL,
1610         NULL,
1611         NULL,
1612         NULL,
1613         NULL,
1614         NULL,
1615         NULL,
1616         NULL,
1617         NULL,
1618         "TEXCOORD",
1619         "TEXKILL",
1620         "TEX",
1621         "TEXBEM",
1622         "TEXBEML",
1623         "TEXREG2AR",
1624         "TEXREG2GB",
1625         "TEXM3x2PAD",
1626         "TEXM3x2TEX",
1627         "TEXM3x3PAD",
1628         "TEXM3x3TEX",
1629         NULL,
1630         "TEXM3x3SPEC",
1631         "TEXM3x3VSPEC",
1632         "EXPP",
1633         "LOGP",
1634         "CND",
1635         "DEF",
1636         "TEXREG2RGB",
1637         "TEXDP3TEX",
1638         "TEXM3x2DEPTH",
1639         "TEXDP3",
1640         "TEXM3x3",
1641         "TEXDEPTH",
1642         "CMP",
1643         "BEM",
1644         "DP2ADD",
1645         "DSX",
1646         "DSY",
1647         "TEXLDD",
1648         "SETP",
1649         "TEXLDL",
1650         "BREAKP"
1651     };
1652 
1653     if (opcode < ARRAY_SIZE(names)) return names[opcode];
1654 
1655     switch (opcode) {
1656     case D3DSIO_PHASE: return "PHASE";
1657     case D3DSIO_COMMENT: return "COMMENT";
1658     case D3DSIO_END: return "END";
1659     default:
1660         return NULL;
1661     }
1662 }
1663 
1664 #define NULL_INSTRUCTION            { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1665 #define IS_VALID_INSTRUCTION(inst)  ((inst).vert_version.min | \
1666                                      (inst).vert_version.max | \
1667                                      (inst).frag_version.min | \
1668                                      (inst).frag_version.max)
1669 
1670 #define SPECIAL(name) \
1671     NineTranslateInstruction_##name
1672 
1673 #define DECL_SPECIAL(name) \
1674     static HRESULT \
1675     NineTranslateInstruction_##name( struct shader_translator *tx )
1676 
1677 static HRESULT
1678 NineTranslateInstruction_Generic(struct shader_translator *);
1679 
DECL_SPECIAL(NOP)1680 DECL_SPECIAL(NOP)
1681 {
1682     /* Nothing to do. NOP was used to avoid hangs
1683      * with very old d3d drivers. */
1684     return D3D_OK;
1685 }
1686 
DECL_SPECIAL(SUB)1687 DECL_SPECIAL(SUB)
1688 {
1689     struct ureg_program *ureg = tx->ureg;
1690     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1691     struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1692     struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1693 
1694     ureg_ADD(ureg, dst, src0, ureg_negate(src1));
1695     return D3D_OK;
1696 }
1697 
DECL_SPECIAL(ABS)1698 DECL_SPECIAL(ABS)
1699 {
1700     struct ureg_program *ureg = tx->ureg;
1701     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1702     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1703 
1704     ureg_MOV(ureg, dst, ureg_abs(src));
1705     return D3D_OK;
1706 }
1707 
DECL_SPECIAL(XPD)1708 DECL_SPECIAL(XPD)
1709 {
1710     struct ureg_program *ureg = tx->ureg;
1711     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1712     struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1713     struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1714 
1715     ureg_MUL(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
1716              ureg_swizzle(src0, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
1717                           TGSI_SWIZZLE_X, 0),
1718              ureg_swizzle(src1, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
1719                           TGSI_SWIZZLE_Y, 0));
1720     ureg_MAD(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
1721              ureg_swizzle(src0, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
1722                           TGSI_SWIZZLE_Y, 0),
1723              ureg_negate(ureg_swizzle(src1, TGSI_SWIZZLE_Y,
1724                                       TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)),
1725              ureg_src(dst));
1726     ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
1727              ureg_imm1f(ureg, 1));
1728     return D3D_OK;
1729 }
1730 
DECL_SPECIAL(M4x4)1731 DECL_SPECIAL(M4x4)
1732 {
1733     return NineTranslateInstruction_Mkxn(tx, 4, 4);
1734 }
1735 
DECL_SPECIAL(M4x3)1736 DECL_SPECIAL(M4x3)
1737 {
1738     return NineTranslateInstruction_Mkxn(tx, 4, 3);
1739 }
1740 
DECL_SPECIAL(M3x4)1741 DECL_SPECIAL(M3x4)
1742 {
1743     return NineTranslateInstruction_Mkxn(tx, 3, 4);
1744 }
1745 
DECL_SPECIAL(M3x3)1746 DECL_SPECIAL(M3x3)
1747 {
1748     return NineTranslateInstruction_Mkxn(tx, 3, 3);
1749 }
1750 
DECL_SPECIAL(M3x2)1751 DECL_SPECIAL(M3x2)
1752 {
1753     return NineTranslateInstruction_Mkxn(tx, 3, 2);
1754 }
1755 
DECL_SPECIAL(CMP)1756 DECL_SPECIAL(CMP)
1757 {
1758     ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1759              tx_src_param(tx, &tx->insn.src[0]),
1760              tx_src_param(tx, &tx->insn.src[2]),
1761              tx_src_param(tx, &tx->insn.src[1]));
1762     return D3D_OK;
1763 }
1764 
DECL_SPECIAL(CND)1765 DECL_SPECIAL(CND)
1766 {
1767     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1768     struct ureg_dst cgt;
1769     struct ureg_src cnd;
1770 
1771     /* the coissue flag was a tip for compilers to advise to
1772      * execute two operations at the same time, in cases
1773      * the two executions had same dst with different channels.
1774      * It has no effect on current hw. However it seems CND
1775      * is affected. The handling of this very specific case
1776      * handled below mimick wine behaviour */
1777     if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1778         ureg_MOV(tx->ureg,
1779                  dst, tx_src_param(tx, &tx->insn.src[1]));
1780         return D3D_OK;
1781     }
1782 
1783     cnd = tx_src_param(tx, &tx->insn.src[0]);
1784     cgt = tx_scratch(tx);
1785 
1786     if (tx->version.major == 1 && tx->version.minor < 4)
1787         cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1788 
1789     ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1790 
1791     ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1792              tx_src_param(tx, &tx->insn.src[1]),
1793              tx_src_param(tx, &tx->insn.src[2]));
1794     return D3D_OK;
1795 }
1796 
DECL_SPECIAL(CALL)1797 DECL_SPECIAL(CALL)
1798 {
1799     assert(tx->insn.src[0].idx < tx->num_inst_labels);
1800     ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1801     return D3D_OK;
1802 }
1803 
DECL_SPECIAL(CALLNZ)1804 DECL_SPECIAL(CALLNZ)
1805 {
1806     struct ureg_program *ureg = tx->ureg;
1807     struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1808 
1809     if (!tx->native_integers)
1810         ureg_IF(ureg, src, tx_cond(tx));
1811     else
1812         ureg_UIF(ureg, src, tx_cond(tx));
1813     ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1814     tx_endcond(tx);
1815     ureg_ENDIF(ureg);
1816     return D3D_OK;
1817 }
1818 
DECL_SPECIAL(LOOP)1819 DECL_SPECIAL(LOOP)
1820 {
1821     struct ureg_program *ureg = tx->ureg;
1822     unsigned *label;
1823     struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1824     struct ureg_dst ctr;
1825     struct ureg_dst aL;
1826     struct ureg_dst tmp;
1827     struct ureg_src ctrx;
1828 
1829     label = tx_bgnloop(tx);
1830     ctr = tx_get_loopctr(tx, true);
1831     aL = tx_get_loopal(tx);
1832     ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1833 
1834     /* src: num_iterations*/
1835     ureg_MOV(ureg, ureg_writemask(ctr, NINED3DSP_WRITEMASK_0),
1836              ureg_scalar(src, TGSI_SWIZZLE_X));
1837     /* al: unused - start_value of al - step for al - unused */
1838     ureg_MOV(ureg, aL, src);
1839     ureg_BGNLOOP(tx->ureg, label);
1840     tmp = tx_scratch_scalar(tx);
1841     /* Initially ctr.x contains the number of iterations.
1842      * ctr.y will contain the updated value of al.
1843      * We decrease ctr.x at the end of every iteration,
1844      * and stop when it reaches 0. */
1845 
1846     if (!tx->native_integers) {
1847         /* case src and ctr contain floats */
1848         /* to avoid precision issue, we stop when ctr <= 0.5 */
1849         ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1850         ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1851     } else {
1852         /* case src and ctr contain integers */
1853         ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1854         ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1855     }
1856     ureg_BRK(ureg);
1857     tx_endcond(tx);
1858     ureg_ENDIF(ureg);
1859     return D3D_OK;
1860 }
1861 
DECL_SPECIAL(RET)1862 DECL_SPECIAL(RET)
1863 {
1864     /* RET as a last instruction could be safely ignored.
1865      * Remove it to prevent crashes/warnings in case underlying
1866      * driver doesn't implement arbitrary returns.
1867      */
1868     if (*(tx->parse_next) != NINED3DSP_END) {
1869         ureg_RET(tx->ureg);
1870     }
1871     return D3D_OK;
1872 }
1873 
DECL_SPECIAL(ENDLOOP)1874 DECL_SPECIAL(ENDLOOP)
1875 {
1876     struct ureg_program *ureg = tx->ureg;
1877     struct ureg_dst ctr = tx_get_loopctr(tx, true);
1878     struct ureg_dst al = tx_get_loopal(tx);
1879     struct ureg_dst dst_ctrx, dst_al;
1880     struct ureg_src src_ctr, al_counter;
1881 
1882     dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1883     dst_al = ureg_writemask(al, NINED3DSP_WRITEMASK_1);
1884     src_ctr = ureg_src(ctr);
1885     al_counter = ureg_scalar(ureg_src(al), TGSI_SWIZZLE_Z);
1886 
1887     /* ctr.x -= 1
1888      * al.y (aL) += step */
1889     if (!tx->native_integers) {
1890         ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1891         ureg_ADD(ureg, dst_al, ureg_src(al), al_counter);
1892     } else {
1893         ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1894         ureg_UADD(ureg, dst_al, ureg_src(al), al_counter);
1895     }
1896     ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1897     return D3D_OK;
1898 }
1899 
DECL_SPECIAL(LABEL)1900 DECL_SPECIAL(LABEL)
1901 {
1902     unsigned k = tx->num_inst_labels;
1903     unsigned n = tx->insn.src[0].idx;
1904     assert(n < 2048);
1905     if (n >= k)
1906        tx->inst_labels = REALLOC(tx->inst_labels,
1907                                  k * sizeof(tx->inst_labels[0]),
1908                                  n * sizeof(tx->inst_labels[0]));
1909 
1910     tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1911     return D3D_OK;
1912 }
1913 
DECL_SPECIAL(SINCOS)1914 DECL_SPECIAL(SINCOS)
1915 {
1916     struct ureg_program *ureg = tx->ureg;
1917     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1918     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1919     struct ureg_dst tmp = tx_scratch_scalar(tx);
1920 
1921     assert(!(dst.WriteMask & 0xc));
1922 
1923     /* Copying to a temporary register avoids src/dst aliasing.
1924      * src is supposed to have replicated swizzle. */
1925     ureg_MOV(ureg, tmp, src);
1926 
1927     /* z undefined, w untouched */
1928     ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
1929              tx_src_scalar(tmp));
1930     ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
1931              tx_src_scalar(tmp));
1932     return D3D_OK;
1933 }
1934 
DECL_SPECIAL(SGN)1935 DECL_SPECIAL(SGN)
1936 {
1937     ureg_SSG(tx->ureg,
1938              tx_dst_param(tx, &tx->insn.dst[0]),
1939              tx_src_param(tx, &tx->insn.src[0]));
1940     return D3D_OK;
1941 }
1942 
DECL_SPECIAL(REP)1943 DECL_SPECIAL(REP)
1944 {
1945     struct ureg_program *ureg = tx->ureg;
1946     unsigned *label;
1947     struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1948     struct ureg_dst ctr;
1949     struct ureg_dst tmp;
1950     struct ureg_src ctrx;
1951 
1952     label = tx_bgnloop(tx);
1953     ctr = ureg_writemask(tx_get_loopctr(tx, false), NINED3DSP_WRITEMASK_0);
1954     ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1955 
1956     /* NOTE: rep must be constant, so we don't have to save the count */
1957     assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1958 
1959     /* rep: num_iterations - 0 - 0 - 0 */
1960     ureg_MOV(ureg, ctr, rep);
1961     ureg_BGNLOOP(ureg, label);
1962     tmp = tx_scratch_scalar(tx);
1963     /* Initially ctr.x contains the number of iterations.
1964      * We decrease ctr.x at the end of every iteration,
1965      * and stop when it reaches 0. */
1966 
1967     if (!tx->native_integers) {
1968         /* case src and ctr contain floats */
1969         /* to avoid precision issue, we stop when ctr <= 0.5 */
1970         ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1971         ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1972     } else {
1973         /* case src and ctr contain integers */
1974         ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1975         ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1976     }
1977     ureg_BRK(ureg);
1978     tx_endcond(tx);
1979     ureg_ENDIF(ureg);
1980 
1981     return D3D_OK;
1982 }
1983 
DECL_SPECIAL(ENDREP)1984 DECL_SPECIAL(ENDREP)
1985 {
1986     struct ureg_program *ureg = tx->ureg;
1987     struct ureg_dst ctr = tx_get_loopctr(tx, false);
1988     struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1989     struct ureg_src src_ctr = ureg_src(ctr);
1990 
1991     /* ctr.x -= 1 */
1992     if (!tx->native_integers)
1993         ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1994     else
1995         ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1996 
1997     ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1998     return D3D_OK;
1999 }
2000 
DECL_SPECIAL(ENDIF)2001 DECL_SPECIAL(ENDIF)
2002 {
2003     tx_endcond(tx);
2004     ureg_ENDIF(tx->ureg);
2005     return D3D_OK;
2006 }
2007 
DECL_SPECIAL(IF)2008 DECL_SPECIAL(IF)
2009 {
2010     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2011 
2012     if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
2013         ureg_UIF(tx->ureg, src, tx_cond(tx));
2014     else
2015         ureg_IF(tx->ureg, src, tx_cond(tx));
2016 
2017     return D3D_OK;
2018 }
2019 
2020 static inline unsigned
sm1_insn_flags_to_tgsi_setop(BYTE flags)2021 sm1_insn_flags_to_tgsi_setop(BYTE flags)
2022 {
2023     switch (flags) {
2024     case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
2025     case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
2026     case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
2027     case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
2028     case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
2029     case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
2030     default:
2031         assert(!"invalid comparison flags");
2032         return TGSI_OPCODE_SGT;
2033     }
2034 }
2035 
DECL_SPECIAL(IFC)2036 DECL_SPECIAL(IFC)
2037 {
2038     const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
2039     struct ureg_src src[2];
2040     struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
2041     src[0] = tx_src_param(tx, &tx->insn.src[0]);
2042     src[1] = tx_src_param(tx, &tx->insn.src[1]);
2043     ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
2044     ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
2045     return D3D_OK;
2046 }
2047 
DECL_SPECIAL(ELSE)2048 DECL_SPECIAL(ELSE)
2049 {
2050     ureg_ELSE(tx->ureg, tx_elsecond(tx));
2051     return D3D_OK;
2052 }
2053 
DECL_SPECIAL(BREAKC)2054 DECL_SPECIAL(BREAKC)
2055 {
2056     const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
2057     struct ureg_src src[2];
2058     struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
2059     src[0] = tx_src_param(tx, &tx->insn.src[0]);
2060     src[1] = tx_src_param(tx, &tx->insn.src[1]);
2061     ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
2062     ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
2063     ureg_BRK(tx->ureg);
2064     tx_endcond(tx);
2065     ureg_ENDIF(tx->ureg);
2066     return D3D_OK;
2067 }
2068 
2069 static const char *sm1_declusage_names[] =
2070 {
2071     [D3DDECLUSAGE_POSITION] = "POSITION",
2072     [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
2073     [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
2074     [D3DDECLUSAGE_NORMAL] = "NORMAL",
2075     [D3DDECLUSAGE_PSIZE] = "PSIZE",
2076     [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
2077     [D3DDECLUSAGE_TANGENT] = "TANGENT",
2078     [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
2079     [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
2080     [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
2081     [D3DDECLUSAGE_COLOR] = "COLOR",
2082     [D3DDECLUSAGE_FOG] = "FOG",
2083     [D3DDECLUSAGE_DEPTH] = "DEPTH",
2084     [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
2085 };
2086 
2087 static inline unsigned
sm1_to_nine_declusage(struct sm1_semantic * dcl)2088 sm1_to_nine_declusage(struct sm1_semantic *dcl)
2089 {
2090     return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
2091 }
2092 
2093 static void
sm1_declusage_to_tgsi(struct tgsi_declaration_semantic * sem,bool tc,struct sm1_semantic * dcl)2094 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
2095                       bool tc,
2096                       struct sm1_semantic *dcl)
2097 {
2098     BYTE index = dcl->usage_idx;
2099 
2100     /* For everything that is not matching to a TGSI_SEMANTIC_****,
2101      * we match to a TGSI_SEMANTIC_GENERIC with index.
2102      *
2103      * The index can be anything UINT16 and usage_idx is BYTE,
2104      * so we can fit everything. It doesn't matter if indices
2105      * are close together or low.
2106      *
2107      *
2108      * POSITION >= 1: 10 * index + 7
2109      * COLOR >= 2: 10 * (index-1) + 8
2110      * FOG: 16
2111      * TEXCOORD[0..15]: index
2112      * BLENDWEIGHT: 10 * index + 19
2113      * BLENDINDICES: 10 * index + 20
2114      * NORMAL: 10 * index + 21
2115      * TANGENT: 10 * index + 22
2116      * BINORMAL: 10 * index + 23
2117      * TESSFACTOR: 10 * index + 24
2118      */
2119 
2120     switch (dcl->usage) {
2121     case D3DDECLUSAGE_POSITION:
2122     case D3DDECLUSAGE_POSITIONT:
2123     case D3DDECLUSAGE_DEPTH:
2124         if (index == 0) {
2125             sem->Name = TGSI_SEMANTIC_POSITION;
2126             sem->Index = 0;
2127         } else {
2128             sem->Name = TGSI_SEMANTIC_GENERIC;
2129             sem->Index = 10 * index + 7;
2130         }
2131         break;
2132     case D3DDECLUSAGE_COLOR:
2133         if (index < 2) {
2134             sem->Name = TGSI_SEMANTIC_COLOR;
2135             sem->Index = index;
2136         } else {
2137             sem->Name = TGSI_SEMANTIC_GENERIC;
2138             sem->Index = 10 * (index-1) + 8;
2139         }
2140         break;
2141     case D3DDECLUSAGE_FOG:
2142         assert(index == 0);
2143         sem->Name = TGSI_SEMANTIC_GENERIC;
2144         sem->Index = 16;
2145         break;
2146     case D3DDECLUSAGE_PSIZE:
2147         assert(index == 0);
2148         sem->Name = TGSI_SEMANTIC_PSIZE;
2149         sem->Index = 0;
2150         break;
2151     case D3DDECLUSAGE_TEXCOORD:
2152         assert(index < 16);
2153         if (index < 8 && tc)
2154             sem->Name = TGSI_SEMANTIC_TEXCOORD;
2155         else
2156             sem->Name = TGSI_SEMANTIC_GENERIC;
2157         sem->Index = index;
2158         break;
2159     case D3DDECLUSAGE_BLENDWEIGHT:
2160         sem->Name = TGSI_SEMANTIC_GENERIC;
2161         sem->Index = 10 * index + 19;
2162         break;
2163     case D3DDECLUSAGE_BLENDINDICES:
2164         sem->Name = TGSI_SEMANTIC_GENERIC;
2165         sem->Index = 10 * index + 20;
2166         break;
2167     case D3DDECLUSAGE_NORMAL:
2168         sem->Name = TGSI_SEMANTIC_GENERIC;
2169         sem->Index = 10 * index + 21;
2170         break;
2171     case D3DDECLUSAGE_TANGENT:
2172         sem->Name = TGSI_SEMANTIC_GENERIC;
2173         sem->Index = 10 * index + 22;
2174         break;
2175     case D3DDECLUSAGE_BINORMAL:
2176         sem->Name = TGSI_SEMANTIC_GENERIC;
2177         sem->Index = 10 * index + 23;
2178         break;
2179     case D3DDECLUSAGE_TESSFACTOR:
2180         sem->Name = TGSI_SEMANTIC_GENERIC;
2181         sem->Index = 10 * index + 24;
2182         break;
2183     case D3DDECLUSAGE_SAMPLE:
2184         sem->Name = TGSI_SEMANTIC_COUNT;
2185         sem->Index = 0;
2186         break;
2187     default:
2188         unreachable("Invalid DECLUSAGE.");
2189         break;
2190     }
2191 }
2192 
2193 #define NINED3DSTT_1D     (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
2194 #define NINED3DSTT_2D     (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
2195 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
2196 #define NINED3DSTT_CUBE   (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
2197 static inline unsigned
d3dstt_to_tgsi_tex(BYTE sampler_type)2198 d3dstt_to_tgsi_tex(BYTE sampler_type)
2199 {
2200     switch (sampler_type) {
2201     case NINED3DSTT_1D:     return TGSI_TEXTURE_1D;
2202     case NINED3DSTT_2D:     return TGSI_TEXTURE_2D;
2203     case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
2204     case NINED3DSTT_CUBE:   return TGSI_TEXTURE_CUBE;
2205     default:
2206         assert(0);
2207         return TGSI_TEXTURE_UNKNOWN;
2208     }
2209 }
2210 static inline unsigned
d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)2211 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
2212 {
2213     switch (sampler_type) {
2214     case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
2215     case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
2216     case NINED3DSTT_VOLUME:
2217     case NINED3DSTT_CUBE:
2218     default:
2219         assert(0);
2220         return TGSI_TEXTURE_UNKNOWN;
2221     }
2222 }
2223 static inline unsigned
ps1x_sampler_type(const struct nine_shader_info * info,unsigned stage)2224 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
2225 {
2226     bool shadow = !!(info->sampler_mask_shadow & (1 << stage));
2227     switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
2228     case 1: return shadow ? TGSI_TEXTURE_SHADOW1D : TGSI_TEXTURE_1D;
2229     case 0: return shadow ? TGSI_TEXTURE_SHADOW2D : TGSI_TEXTURE_2D;
2230     case 3: return TGSI_TEXTURE_3D;
2231     default:
2232         return TGSI_TEXTURE_CUBE;
2233     }
2234 }
2235 
2236 static const char *
sm1_sampler_type_name(BYTE sampler_type)2237 sm1_sampler_type_name(BYTE sampler_type)
2238 {
2239     switch (sampler_type) {
2240     case NINED3DSTT_1D:     return "1D";
2241     case NINED3DSTT_2D:     return "2D";
2242     case NINED3DSTT_VOLUME: return "VOLUME";
2243     case NINED3DSTT_CUBE:   return "CUBE";
2244     default:
2245         return "(D3DSTT_?)";
2246     }
2247 }
2248 
2249 static inline unsigned
nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic * sem)2250 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
2251 {
2252     switch (sem->Name) {
2253     case TGSI_SEMANTIC_POSITION:
2254     case TGSI_SEMANTIC_NORMAL:
2255         return TGSI_INTERPOLATE_LINEAR;
2256     case TGSI_SEMANTIC_BCOLOR:
2257     case TGSI_SEMANTIC_COLOR:
2258         return TGSI_INTERPOLATE_COLOR;
2259     case TGSI_SEMANTIC_FOG:
2260     case TGSI_SEMANTIC_GENERIC:
2261     case TGSI_SEMANTIC_TEXCOORD:
2262     case TGSI_SEMANTIC_CLIPDIST:
2263     case TGSI_SEMANTIC_CLIPVERTEX:
2264         return TGSI_INTERPOLATE_PERSPECTIVE;
2265     case TGSI_SEMANTIC_EDGEFLAG:
2266     case TGSI_SEMANTIC_FACE:
2267     case TGSI_SEMANTIC_INSTANCEID:
2268     case TGSI_SEMANTIC_PCOORD:
2269     case TGSI_SEMANTIC_PRIMID:
2270     case TGSI_SEMANTIC_PSIZE:
2271     case TGSI_SEMANTIC_VERTEXID:
2272         return TGSI_INTERPOLATE_CONSTANT;
2273     default:
2274         assert(0);
2275         return TGSI_INTERPOLATE_CONSTANT;
2276     }
2277 }
2278 
DECL_SPECIAL(DCL)2279 DECL_SPECIAL(DCL)
2280 {
2281     struct ureg_program *ureg = tx->ureg;
2282     bool is_input;
2283     bool is_sampler;
2284     struct tgsi_declaration_semantic tgsi;
2285     struct sm1_semantic sem;
2286     sm1_read_semantic(tx, &sem);
2287 
2288     is_input = sem.reg.file == D3DSPR_INPUT;
2289     is_sampler =
2290         sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
2291 
2292     DUMP("DCL ");
2293     sm1_dump_dst_param(&sem.reg);
2294     if (is_sampler)
2295         DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
2296     else
2297     if (tx->version.major >= 3)
2298         DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
2299     else
2300     if (sem.usage | sem.usage_idx)
2301         DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
2302     else
2303         DUMP("\n");
2304 
2305     if (is_sampler) {
2306         const unsigned m = 1 << sem.reg.idx;
2307         ureg_DECL_sampler(ureg, sem.reg.idx);
2308         tx->info->sampler_mask |= m;
2309         tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
2310             d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
2311             d3dstt_to_tgsi_tex(sem.sampler_type);
2312         return D3D_OK;
2313     }
2314 
2315     sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
2316     if (IS_VS) {
2317         if (is_input) {
2318             /* linkage outside of shader with vertex declaration */
2319             ureg_DECL_vs_input(ureg, sem.reg.idx);
2320             assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map));
2321             tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
2322             tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1);
2323             /* NOTE: preserving order in case of indirect access */
2324         } else
2325         if (tx->version.major >= 3) {
2326             /* SM2 output semantic determined by file */
2327             assert(sem.reg.mask != 0);
2328             if (sem.usage == D3DDECLUSAGE_POSITIONT)
2329                 tx->info->position_t = true;
2330             assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o));
2331             assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing");
2332             tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
2333                 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
2334             nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx);
2335             if ((tx->info->process_vertices || tx->info->clip_plane_emulation > 0) &&
2336                 sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) {
2337                 tx->regs.oPos_out = tx->regs.o[sem.reg.idx]; /* TODO: probably not good declare it twice */
2338                 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2339                 tx->regs.oPos = tx->regs.o[sem.reg.idx];
2340             }
2341 
2342             if (tgsi.Name == TGSI_SEMANTIC_PSIZE) {
2343                 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2344                 tx->regs.oPts = tx->regs.o[sem.reg.idx];
2345             }
2346         }
2347     } else {
2348         if (is_input && tx->version.major >= 3) {
2349             unsigned interp_flag;
2350             unsigned interp_location = 0;
2351             /* SM3 only, SM2 input semantic determined by file */
2352             assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v));
2353             assert(ureg_src_is_undef(tx->regs.v[sem.reg.idx]) && "Nine doesn't support yet packing");
2354             /* PositionT and tessfactor forbidden */
2355             if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR)
2356                 return D3DERR_INVALIDCALL;
2357 
2358             if (tgsi.Name == TGSI_SEMANTIC_POSITION) {
2359                 /* Position0 is forbidden (likely because vPos already does that) */
2360                 if (sem.usage == D3DDECLUSAGE_POSITION)
2361                     return D3DERR_INVALIDCALL;
2362                 /* Following code is for depth */
2363                 tx->regs.v[sem.reg.idx] = nine_get_position_input(tx);
2364                 return D3D_OK;
2365             }
2366 
2367             if (sem.reg.mod & NINED3DSPDM_CENTROID ||
2368                 (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
2369                 interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
2370             interp_flag = nine_tgsi_to_interp_mode(&tgsi);
2371             /* We replace TGSI_INTERPOLATE_COLOR because some drivers don't support it,
2372              * and those who support it do the same replacement we do */
2373             if (interp_flag == TGSI_INTERPOLATE_COLOR)
2374                 interp_flag = tx->info->color_flatshade ? TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
2375 
2376             tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_centroid(
2377                 ureg, tgsi.Name, tgsi.Index,
2378                 interp_flag,
2379                 interp_location, 0, 1);
2380         } else
2381         if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
2382             /* FragColor or FragDepth */
2383             assert(sem.reg.mask != 0);
2384             ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
2385                                     0, 1);
2386         }
2387     }
2388     return D3D_OK;
2389 }
2390 
DECL_SPECIAL(DEF)2391 DECL_SPECIAL(DEF)
2392 {
2393     tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
2394     return D3D_OK;
2395 }
2396 
DECL_SPECIAL(DEFB)2397 DECL_SPECIAL(DEFB)
2398 {
2399     tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2400     return D3D_OK;
2401 }
2402 
DECL_SPECIAL(DEFI)2403 DECL_SPECIAL(DEFI)
2404 {
2405     tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2406     return D3D_OK;
2407 }
2408 
DECL_SPECIAL(POW)2409 DECL_SPECIAL(POW)
2410 {
2411     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2412     struct ureg_src src[2] = {
2413         tx_src_param(tx, &tx->insn.src[0]),
2414         tx_src_param(tx, &tx->insn.src[1])
2415     };
2416     /* Anything^0 is 1, including 0^0.
2417      * Assume mul_zero_wins drivers already have
2418      * this behaviour. Emulate for the others. */
2419     if (tx->mul_zero_wins) {
2420         ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2421     } else {
2422         struct ureg_dst tmp = tx_scratch_scalar(tx);
2423         ureg_POW(tx->ureg, tmp, ureg_abs(src[0]), src[1]);
2424         ureg_CMP(tx->ureg, dst,
2425              ureg_negate(ureg_abs(ureg_scalar(src[1], TGSI_SWIZZLE_X))),
2426              tx_src_scalar(tmp), ureg_imm1f(tx->ureg, 1.0f));
2427     }
2428     return D3D_OK;
2429 }
2430 
2431 /* Tests results on Win 10:
2432  * NV (NVIDIA GeForce GT 635M)
2433  * AMD (AMD Radeon HD 7730M)
2434  * INTEL (Intel(R) HD Graphics 4000)
2435  * PS2 and PS3:
2436  * RCP and RSQ can generate inf on NV and AMD.
2437  * RCP and RSQ are clamped on INTEL (+- FLT_MAX),
2438  * NV: log not clamped
2439  * AMD: log(0) is -FLT_MAX (but log(inf) is inf)
2440  * INTEL: log(0) is -FLT_MAX and log(inf) is 127
2441  * All devices have 0*anything = 0
2442  *
2443  * INTEL VS2 and VS3: same behaviour.
2444  * Some differences VS2 and VS3 for constants defined with inf/NaN.
2445  * While PS3, VS3 and PS2 keep NaN and Inf shader constants without change,
2446  * VS2 seems to clamp to zero (may be test failure).
2447  * AMD VS2: unknown, VS3: very likely behaviour of PS3
2448  * NV VS2 and VS3: very likely behaviour of PS3
2449  * For both, Inf in VS becomes NaN is PS
2450  * "Very likely" because the test was less extensive.
2451  *
2452  * Thus all clamping can be removed for shaders 2 and 3,
2453  * as long as 0*anything = 0.
2454  * Else clamps to enforce 0*anything = 0 (anything being then
2455  * neither inf or NaN, the user being unlikely to pass them
2456  * as constant).
2457  * The status for VS1 and PS1 is unknown.
2458  */
2459 
DECL_SPECIAL(RCP)2460 DECL_SPECIAL(RCP)
2461 {
2462     struct ureg_program *ureg = tx->ureg;
2463     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2464     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2465     struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx);
2466     ureg_RCP(ureg, tmp, src);
2467     if (!tx->mul_zero_wins) {
2468         /* FLT_MAX has issues with Rayman */
2469         ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX/2.f), ureg_src(tmp));
2470         ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX/2.f), ureg_src(tmp));
2471     }
2472     return D3D_OK;
2473 }
2474 
DECL_SPECIAL(RSQ)2475 DECL_SPECIAL(RSQ)
2476 {
2477     struct ureg_program *ureg = tx->ureg;
2478     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2479     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2480     struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx);
2481     ureg_RSQ(ureg, tmp, ureg_abs(src));
2482     if (!tx->mul_zero_wins)
2483         ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2484     return D3D_OK;
2485 }
2486 
DECL_SPECIAL(LOG)2487 DECL_SPECIAL(LOG)
2488 {
2489     struct ureg_program *ureg = tx->ureg;
2490     struct ureg_dst tmp = tx_scratch_scalar(tx);
2491     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2492     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2493     ureg_LG2(ureg, tmp, ureg_abs(src));
2494     if (tx->mul_zero_wins) {
2495         ureg_MOV(ureg, dst, tx_src_scalar(tmp));
2496     } else {
2497         ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2498     }
2499     return D3D_OK;
2500 }
2501 
DECL_SPECIAL(LIT)2502 DECL_SPECIAL(LIT)
2503 {
2504     struct ureg_program *ureg = tx->ureg;
2505     struct ureg_dst tmp = tx_scratch(tx);
2506     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2507     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2508     ureg_LIT(ureg, tmp, src);
2509     /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2510      * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2511      * it 0^0 if src.w=0, which value is driver dependent. */
2512     ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
2513              ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
2514              ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
2515     ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
2516     return D3D_OK;
2517 }
2518 
DECL_SPECIAL(NRM)2519 DECL_SPECIAL(NRM)
2520 {
2521     struct ureg_program *ureg = tx->ureg;
2522     struct ureg_dst tmp = tx_scratch_scalar(tx);
2523     struct ureg_src nrm = tx_src_scalar(tmp);
2524     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2525     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2526     ureg_DP3(ureg, tmp, src, src);
2527     ureg_RSQ(ureg, tmp, nrm);
2528     if (!tx->mul_zero_wins)
2529         ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2530     ureg_MUL(ureg, dst, src, nrm);
2531     return D3D_OK;
2532 }
2533 
DECL_SPECIAL(DP2ADD)2534 DECL_SPECIAL(DP2ADD)
2535 {
2536     struct ureg_dst tmp = tx_scratch_scalar(tx);
2537     struct ureg_src dp2 = tx_src_scalar(tmp);
2538     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2539     struct ureg_src src[3];
2540     int i;
2541     for (i = 0; i < 3; ++i)
2542         src[i] = tx_src_param(tx, &tx->insn.src[i]);
2543     assert_replicate_swizzle(&src[2]);
2544 
2545     ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2546     ureg_ADD(tx->ureg, dst, src[2], dp2);
2547 
2548     return D3D_OK;
2549 }
2550 
DECL_SPECIAL(TEXCOORD)2551 DECL_SPECIAL(TEXCOORD)
2552 {
2553     struct ureg_program *ureg = tx->ureg;
2554     const unsigned s = tx->insn.dst[0].idx;
2555     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2556 
2557     tx_texcoord_alloc(tx, s);
2558     ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2559     ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2560 
2561     return D3D_OK;
2562 }
2563 
DECL_SPECIAL(TEXCOORD_ps14)2564 DECL_SPECIAL(TEXCOORD_ps14)
2565 {
2566     struct ureg_program *ureg = tx->ureg;
2567     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2568     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2569 
2570     assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2571 
2572     ureg_MOV(ureg, dst, src);
2573 
2574     return D3D_OK;
2575 }
2576 
DECL_SPECIAL(TEXKILL)2577 DECL_SPECIAL(TEXKILL)
2578 {
2579     struct ureg_src reg;
2580 
2581     if (tx->version.major > 1 || tx->version.minor > 3) {
2582         reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2583     } else {
2584         tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2585         reg = tx->regs.vT[tx->insn.dst[0].idx];
2586     }
2587     if (tx->version.major < 2)
2588         reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2589     ureg_KILL_IF(tx->ureg, reg);
2590 
2591     return D3D_OK;
2592 }
2593 
DECL_SPECIAL(TEXBEM)2594 DECL_SPECIAL(TEXBEM)
2595 {
2596     struct ureg_program *ureg = tx->ureg;
2597     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2598     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2599     struct ureg_dst tmp, tmp2, texcoord;
2600     struct ureg_src sample, m00, m01, m10, m11, c8m, c16m2;
2601     struct ureg_src bumpenvlscale, bumpenvloffset;
2602     const int m = tx->insn.dst[0].idx;
2603 
2604     assert(tx->version.major == 1);
2605 
2606     sample = ureg_DECL_sampler(ureg, m);
2607     tx->info->sampler_mask |= 1 << m;
2608 
2609     tx_texcoord_alloc(tx, m);
2610 
2611     tmp = tx_scratch(tx);
2612     tmp2 = tx_scratch(tx);
2613     texcoord = tx_scratch(tx);
2614     /*
2615      * Bump-env-matrix:
2616      * 00 is X
2617      * 01 is Y
2618      * 10 is Z
2619      * 11 is W
2620      */
2621     c8m = nine_special_constant_src(tx, m);
2622     c16m2 = nine_special_constant_src(tx, 8+m/2);
2623 
2624     m00 = NINE_APPLY_SWIZZLE(c8m, X);
2625     m01 = NINE_APPLY_SWIZZLE(c8m, Y);
2626     m10 = NINE_APPLY_SWIZZLE(c8m, Z);
2627     m11 = NINE_APPLY_SWIZZLE(c8m, W);
2628 
2629     /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
2630     if (m % 2 == 0) {
2631         bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, X);
2632         bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, Y);
2633     } else {
2634         bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, Z);
2635         bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, W);
2636     }
2637 
2638     apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
2639 
2640     /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R  */
2641     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2642              NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord));
2643     /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
2644     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2645              NINE_APPLY_SWIZZLE(src, Y),
2646              NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2647 
2648     /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
2649     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2650              NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord));
2651     /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
2652     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2653              NINE_APPLY_SWIZZLE(src, Y),
2654              NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2655 
2656     /* Now the texture coordinates are in tmp.xy */
2657 
2658     if (tx->insn.opcode == D3DSIO_TEXBEM) {
2659         ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2660     } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
2661         /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
2662         ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2663         ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(src, Z),
2664                  bumpenvlscale, bumpenvloffset);
2665         ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
2666     }
2667 
2668     tx->info->bumpenvmat_needed = 1;
2669 
2670     return D3D_OK;
2671 }
2672 
DECL_SPECIAL(TEXREG2AR)2673 DECL_SPECIAL(TEXREG2AR)
2674 {
2675     struct ureg_program *ureg = tx->ureg;
2676     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2677     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2678     struct ureg_src sample;
2679     const int m = tx->insn.dst[0].idx;
2680     ASSERTED const int n = tx->insn.src[0].idx;
2681     assert(m >= 0 && m > n);
2682 
2683     sample = ureg_DECL_sampler(ureg, m);
2684     tx->info->sampler_mask |= 1 << m;
2685     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(W,X,X,X)), sample);
2686 
2687     return D3D_OK;
2688 }
2689 
DECL_SPECIAL(TEXREG2GB)2690 DECL_SPECIAL(TEXREG2GB)
2691 {
2692     struct ureg_program *ureg = tx->ureg;
2693     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2694     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2695     struct ureg_src sample;
2696     const int m = tx->insn.dst[0].idx;
2697     ASSERTED const int n = tx->insn.src[0].idx;
2698     assert(m >= 0 && m > n);
2699 
2700     sample = ureg_DECL_sampler(ureg, m);
2701     tx->info->sampler_mask |= 1 << m;
2702     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2703 
2704     return D3D_OK;
2705 }
2706 
DECL_SPECIAL(TEXM3x2PAD)2707 DECL_SPECIAL(TEXM3x2PAD)
2708 {
2709     return D3D_OK; /* this is just padding */
2710 }
2711 
DECL_SPECIAL(TEXM3x2TEX)2712 DECL_SPECIAL(TEXM3x2TEX)
2713 {
2714     struct ureg_program *ureg = tx->ureg;
2715     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2716     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2717     struct ureg_src sample;
2718     const int m = tx->insn.dst[0].idx - 1;
2719     ASSERTED const int n = tx->insn.src[0].idx;
2720     assert(m >= 0 && m > n);
2721 
2722     tx_texcoord_alloc(tx, m);
2723     tx_texcoord_alloc(tx, m+1);
2724 
2725     /* performs the matrix multiplication */
2726     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2727     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2728 
2729     sample = ureg_DECL_sampler(ureg, m + 1);
2730     tx->info->sampler_mask |= 1 << (m + 1);
2731     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2732 
2733     return D3D_OK;
2734 }
2735 
DECL_SPECIAL(TEXM3x3PAD)2736 DECL_SPECIAL(TEXM3x3PAD)
2737 {
2738     return D3D_OK; /* this is just padding */
2739 }
2740 
DECL_SPECIAL(TEXM3x3SPEC)2741 DECL_SPECIAL(TEXM3x3SPEC)
2742 {
2743     struct ureg_program *ureg = tx->ureg;
2744     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2745     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2746     struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2747     struct ureg_src sample;
2748     struct ureg_dst tmp;
2749     const int m = tx->insn.dst[0].idx - 2;
2750     ASSERTED const int n = tx->insn.src[0].idx;
2751     assert(m >= 0 && m > n);
2752 
2753     tx_texcoord_alloc(tx, m);
2754     tx_texcoord_alloc(tx, m+1);
2755     tx_texcoord_alloc(tx, m+2);
2756 
2757     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2758     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2759     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src);
2760 
2761     sample = ureg_DECL_sampler(ureg, m + 2);
2762     tx->info->sampler_mask |= 1 << (m + 2);
2763     tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2764 
2765     /* At this step, dst = N = (u', w', z').
2766      * We want dst to be the texture sampled at (u'', w'', z''), with
2767      * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2768     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2769     ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2770     /* at this step tmp.x = 1/N.N */
2771     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2772     /* at this step tmp.y = N.E */
2773     ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2774     /* at this step tmp.x = N.E/N.N */
2775     ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2776     ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2777     /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2778     ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(E));
2779     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2780 
2781     return D3D_OK;
2782 }
2783 
DECL_SPECIAL(TEXREG2RGB)2784 DECL_SPECIAL(TEXREG2RGB)
2785 {
2786     struct ureg_program *ureg = tx->ureg;
2787     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2788     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2789     struct ureg_src sample;
2790     const int m = tx->insn.dst[0].idx;
2791     ASSERTED const int n = tx->insn.src[0].idx;
2792     assert(m >= 0 && m > n);
2793 
2794     sample = ureg_DECL_sampler(ureg, m);
2795     tx->info->sampler_mask |= 1 << m;
2796     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), src, sample);
2797 
2798     return D3D_OK;
2799 }
2800 
DECL_SPECIAL(TEXDP3TEX)2801 DECL_SPECIAL(TEXDP3TEX)
2802 {
2803     struct ureg_program *ureg = tx->ureg;
2804     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2805     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2806     struct ureg_dst tmp;
2807     struct ureg_src sample;
2808     const int m = tx->insn.dst[0].idx;
2809     ASSERTED const int n = tx->insn.src[0].idx;
2810     assert(m >= 0 && m > n);
2811 
2812     tx_texcoord_alloc(tx, m);
2813 
2814     tmp = tx_scratch(tx);
2815     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2816     ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2817 
2818     sample = ureg_DECL_sampler(ureg, m);
2819     tx->info->sampler_mask |= 1 << m;
2820     ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2821 
2822     return D3D_OK;
2823 }
2824 
DECL_SPECIAL(TEXM3x2DEPTH)2825 DECL_SPECIAL(TEXM3x2DEPTH)
2826 {
2827     struct ureg_program *ureg = tx->ureg;
2828     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2829     struct ureg_dst tmp;
2830     const int m = tx->insn.dst[0].idx - 1;
2831     ASSERTED const int n = tx->insn.src[0].idx;
2832     assert(m >= 0 && m > n);
2833 
2834     tx_texcoord_alloc(tx, m);
2835     tx_texcoord_alloc(tx, m+1);
2836 
2837     tmp = tx_scratch(tx);
2838 
2839     /* performs the matrix multiplication */
2840     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2841     ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2842 
2843     ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2844     /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2845     ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2846     /* res = 'w' == 0 ? 1.0 : z/w */
2847     ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2848              ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2849     /* replace the depth for depth testing with the result */
2850     tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2851                                               TGSI_WRITEMASK_Z, 0, 1);
2852     ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2853     /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2854     return D3D_OK;
2855 }
2856 
DECL_SPECIAL(TEXDP3)2857 DECL_SPECIAL(TEXDP3)
2858 {
2859     struct ureg_program *ureg = tx->ureg;
2860     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2861     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2862     const int m = tx->insn.dst[0].idx;
2863     ASSERTED const int n = tx->insn.src[0].idx;
2864     assert(m >= 0 && m > n);
2865 
2866     tx_texcoord_alloc(tx, m);
2867 
2868     ureg_DP3(ureg, dst, tx->regs.vT[m], src);
2869 
2870     return D3D_OK;
2871 }
2872 
DECL_SPECIAL(TEXM3x3)2873 DECL_SPECIAL(TEXM3x3)
2874 {
2875     struct ureg_program *ureg = tx->ureg;
2876     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2877     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2878     struct ureg_src sample;
2879     struct ureg_dst E, tmp;
2880     const int m = tx->insn.dst[0].idx - 2;
2881     ASSERTED const int n = tx->insn.src[0].idx;
2882     assert(m >= 0 && m > n);
2883 
2884     tx_texcoord_alloc(tx, m);
2885     tx_texcoord_alloc(tx, m+1);
2886     tx_texcoord_alloc(tx, m+2);
2887 
2888     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2889     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2890     ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src);
2891 
2892     switch (tx->insn.opcode) {
2893     case D3DSIO_TEXM3x3:
2894         ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2895         break;
2896     case D3DSIO_TEXM3x3TEX:
2897         sample = ureg_DECL_sampler(ureg, m + 2);
2898         tx->info->sampler_mask |= 1 << (m + 2);
2899         ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2900         break;
2901     case D3DSIO_TEXM3x3VSPEC:
2902         sample = ureg_DECL_sampler(ureg, m + 2);
2903         tx->info->sampler_mask |= 1 << (m + 2);
2904         E = tx_scratch(tx);
2905         tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2906         ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2907         ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2908         ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2909         /* At this step, dst = N = (u', w', z').
2910          * We want dst to be the texture sampled at (u'', w'', z''), with
2911          * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2912         ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2913         ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2914         /* at this step tmp.x = 1/N.N */
2915         ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2916         /* at this step tmp.y = N.E */
2917         ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2918         /* at this step tmp.x = N.E/N.N */
2919         ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2920         ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2921         /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2922         ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(ureg_src(E)));
2923         ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2924         break;
2925     default:
2926         return D3DERR_INVALIDCALL;
2927     }
2928     return D3D_OK;
2929 }
2930 
DECL_SPECIAL(TEXDEPTH)2931 DECL_SPECIAL(TEXDEPTH)
2932 {
2933     struct ureg_program *ureg = tx->ureg;
2934     struct ureg_dst r5;
2935     struct ureg_src r5r, r5g;
2936 
2937     assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2938 
2939     /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2940      * r5 won't be used afterward, thus we can use r5.ba */
2941     r5 = tx->regs.r[5];
2942     r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2943     r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2944 
2945     ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2946     ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2947     /* r5.r = r/g */
2948     ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2949              r5r, ureg_imm1f(ureg, 1.0f));
2950     /* replace the depth for depth testing with the result */
2951     tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2952                                               TGSI_WRITEMASK_Z, 0, 1);
2953     ureg_MOV(ureg, tx->regs.oDepth, r5r);
2954 
2955     return D3D_OK;
2956 }
2957 
DECL_SPECIAL(BEM)2958 DECL_SPECIAL(BEM)
2959 {
2960     struct ureg_program *ureg = tx->ureg;
2961     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2962     struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
2963     struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
2964     struct ureg_src m00, m01, m10, m11, c8m;
2965     const int m = tx->insn.dst[0].idx;
2966     struct ureg_dst tmp = tx_scratch(tx);
2967     /*
2968      * Bump-env-matrix:
2969      * 00 is X
2970      * 01 is Y
2971      * 10 is Z
2972      * 11 is W
2973      */
2974     c8m = nine_special_constant_src(tx, m);
2975     m00 = NINE_APPLY_SWIZZLE(c8m, X);
2976     m01 = NINE_APPLY_SWIZZLE(c8m, Y);
2977     m10 = NINE_APPLY_SWIZZLE(c8m, Z);
2978     m11 = NINE_APPLY_SWIZZLE(c8m, W);
2979     /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r  */
2980     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2981              NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
2982     /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
2983     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2984              NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2985 
2986     /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
2987     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2988              NINE_APPLY_SWIZZLE(src1, X), src0);
2989     /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
2990     ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2991              NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2992     ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
2993 
2994     tx->info->bumpenvmat_needed = 1;
2995 
2996     return D3D_OK;
2997 }
2998 
DECL_SPECIAL(TEXLD)2999 DECL_SPECIAL(TEXLD)
3000 {
3001     struct ureg_program *ureg = tx->ureg;
3002     unsigned target;
3003     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3004     struct ureg_src src[2] = {
3005         tx_src_param(tx, &tx->insn.src[0]),
3006         tx_src_param(tx, &tx->insn.src[1])
3007     };
3008     assert(tx->insn.src[1].idx >= 0 &&
3009            tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
3010     target = tx->sampler_targets[tx->insn.src[1].idx];
3011 
3012     if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
3013         return D3D_OK;
3014 
3015     switch (tx->insn.flags) {
3016     case 0:
3017         ureg_TEX(ureg, dst, target, src[0], src[1]);
3018         break;
3019     case NINED3DSI_TEXLD_PROJECT:
3020         ureg_TXP(ureg, dst, target, src[0], src[1]);
3021         break;
3022     case NINED3DSI_TEXLD_BIAS:
3023         ureg_TXB(ureg, dst, target, src[0], src[1]);
3024         break;
3025     default:
3026         assert(0);
3027         return D3DERR_INVALIDCALL;
3028     }
3029     return D3D_OK;
3030 }
3031 
DECL_SPECIAL(TEXLD_14)3032 DECL_SPECIAL(TEXLD_14)
3033 {
3034     struct ureg_program *ureg = tx->ureg;
3035     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3036     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
3037     const unsigned s = tx->insn.dst[0].idx;
3038     const unsigned t = ps1x_sampler_type(tx->info, s);
3039 
3040     tx->info->sampler_mask |= 1 << s;
3041     ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
3042 
3043     return D3D_OK;
3044 }
3045 
DECL_SPECIAL(TEX)3046 DECL_SPECIAL(TEX)
3047 {
3048     struct ureg_program *ureg = tx->ureg;
3049     const unsigned s = tx->insn.dst[0].idx;
3050     const unsigned t = ps1x_sampler_type(tx->info, s);
3051     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3052     struct ureg_src src[2];
3053 
3054     tx_texcoord_alloc(tx, s);
3055 
3056     src[0] = tx->regs.vT[s];
3057     src[1] = ureg_DECL_sampler(ureg, s);
3058     tx->info->sampler_mask |= 1 << s;
3059 
3060     TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
3061 
3062     return D3D_OK;
3063 }
3064 
DECL_SPECIAL(TEXLDD)3065 DECL_SPECIAL(TEXLDD)
3066 {
3067     unsigned target;
3068     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3069     struct ureg_src src[4] = {
3070         tx_src_param(tx, &tx->insn.src[0]),
3071         tx_src_param(tx, &tx->insn.src[1]),
3072         tx_src_param(tx, &tx->insn.src[2]),
3073         tx_src_param(tx, &tx->insn.src[3])
3074     };
3075     assert(tx->insn.src[1].idx >= 0 &&
3076            tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
3077     target = tx->sampler_targets[tx->insn.src[1].idx];
3078 
3079     if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
3080         return D3D_OK;
3081 
3082     ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
3083     return D3D_OK;
3084 }
3085 
DECL_SPECIAL(TEXLDL)3086 DECL_SPECIAL(TEXLDL)
3087 {
3088     unsigned target;
3089     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3090     struct ureg_src src[2] = {
3091        tx_src_param(tx, &tx->insn.src[0]),
3092        tx_src_param(tx, &tx->insn.src[1])
3093     };
3094     assert(tx->insn.src[1].idx >= 0 &&
3095            tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
3096     target = tx->sampler_targets[tx->insn.src[1].idx];
3097 
3098     if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
3099         return D3D_OK;
3100 
3101     ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
3102     return D3D_OK;
3103 }
3104 
DECL_SPECIAL(SETP)3105 DECL_SPECIAL(SETP)
3106 {
3107     const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
3108     struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3109     struct ureg_src src[2] = {
3110        tx_src_param(tx, &tx->insn.src[0]),
3111        tx_src_param(tx, &tx->insn.src[1])
3112     };
3113     ureg_insn(tx->ureg, cmp_op, &dst, 1, src, 2, 0);
3114     return D3D_OK;
3115 }
3116 
DECL_SPECIAL(BREAKP)3117 DECL_SPECIAL(BREAKP)
3118 {
3119     struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
3120     ureg_IF(tx->ureg, src, tx_cond(tx));
3121     ureg_BRK(tx->ureg);
3122     tx_endcond(tx);
3123     ureg_ENDIF(tx->ureg);
3124     return D3D_OK;
3125 }
3126 
DECL_SPECIAL(PHASE)3127 DECL_SPECIAL(PHASE)
3128 {
3129     return D3D_OK; /* we don't care about phase */
3130 }
3131 
DECL_SPECIAL(COMMENT)3132 DECL_SPECIAL(COMMENT)
3133 {
3134     return D3D_OK; /* nothing to do */
3135 }
3136 
3137 
3138 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
3139     { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
3140 
3141 static const struct sm1_op_info inst_table[] =
3142 {
3143     _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP)), /* 0 */
3144     _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
3145     _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
3146     _OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */
3147     _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
3148     _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
3149     _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RCP)), /* 6 */
3150     _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
3151     _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
3152     _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
3153     _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
3154     _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
3155     _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
3156     _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
3157     _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
3158     _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
3159     _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
3160     _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
3161     _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
3162     _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
3163 
3164     _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
3165     _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
3166     _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
3167     _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
3168     _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
3169 
3170     _OPI(CALL,    CAL,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
3171     _OPI(CALLNZ,  CAL,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
3172     _OPI(LOOP,    BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
3173     _OPI(RET,     RET,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
3174     _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
3175     _OPI(LABEL,   NOP,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
3176 
3177     _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
3178 
3179     _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
3180     _OPI(CRS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(XPD)), /* XXX: .w */
3181     _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
3182     _OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)),
3183     _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
3184 
3185     _OPI(SINCOS, NOP, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
3186     _OPI(SINCOS, NOP, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
3187 
3188     /* More flow control */
3189     _OPI(REP,    NOP,    V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
3190     _OPI(ENDREP, NOP,    V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
3191     _OPI(IF,     IF,     V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
3192     _OPI(IFC,    IF,     V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
3193     _OPI(ELSE,   ELSE,   V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
3194     _OPI(ENDIF,  ENDIF,  V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
3195     _OPI(BREAK,  BRK,    V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
3196     _OPI(BREAKC, NOP,    V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
3197     /* we don't write to the address register, but a normal register (copied
3198      * when needed to the address register), thus we don't use ARR */
3199     _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
3200 
3201     _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
3202     _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
3203 
3204     _OPI(TEXCOORD,     NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
3205     _OPI(TEXCOORD,     MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
3206     _OPI(TEXKILL,      KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
3207     _OPI(TEX,          TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
3208     _OPI(TEX,          TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
3209     _OPI(TEX,          TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
3210     _OPI(TEXBEM,       TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
3211     _OPI(TEXBEML,      TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
3212     _OPI(TEXREG2AR,    TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
3213     _OPI(TEXREG2GB,    TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
3214     _OPI(TEXM3x2PAD,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
3215     _OPI(TEXM3x2TEX,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
3216     _OPI(TEXM3x3PAD,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
3217     _OPI(TEXM3x3TEX,   TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3218     _OPI(TEXM3x3SPEC,  TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
3219     _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3220 
3221     _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
3222     _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
3223     _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
3224     _OPI(CND,  NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
3225 
3226     _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
3227 
3228     /* More tex stuff */
3229     _OPI(TEXREG2RGB,   TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
3230     _OPI(TEXDP3TEX,    TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
3231     _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
3232     _OPI(TEXDP3,       TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
3233     _OPI(TEXM3x3,      TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3234     _OPI(TEXDEPTH,     TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
3235 
3236     /* Misc */
3237     _OPI(CMP,    CMP,  V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
3238     _OPI(BEM,    NOP,  V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
3239     _OPI(DP2ADD, NOP,  V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
3240     _OPI(DSX,    DDX,  V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
3241     _OPI(DSY,    DDY,  V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
3242     _OPI(TEXLDD, TXD,  V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
3243     _OPI(SETP,   NOP,  V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
3244     _OPI(TEXLDL, TXL,  V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
3245     _OPI(BREAKP, BRK,  V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
3246 };
3247 
3248 static const struct sm1_op_info inst_phase =
3249     _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
3250 
3251 static const struct sm1_op_info inst_comment =
3252     _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
3253 
3254 static void
create_op_info_map(struct shader_translator * tx)3255 create_op_info_map(struct shader_translator *tx)
3256 {
3257     const unsigned version = (tx->version.major << 8) | tx->version.minor;
3258     unsigned i;
3259 
3260     for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i)
3261         tx->op_info_map[i] = -1;
3262 
3263     if (tx->processor == PIPE_SHADER_VERTEX) {
3264         for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3265             assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3266             if (inst_table[i].vert_version.min <= version &&
3267                 inst_table[i].vert_version.max >= version)
3268                 tx->op_info_map[inst_table[i].sio] = i;
3269         }
3270     } else {
3271         for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3272             assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3273             if (inst_table[i].frag_version.min <= version &&
3274                 inst_table[i].frag_version.max >= version)
3275                 tx->op_info_map[inst_table[i].sio] = i;
3276         }
3277     }
3278 }
3279 
3280 static inline HRESULT
NineTranslateInstruction_Generic(struct shader_translator * tx)3281 NineTranslateInstruction_Generic(struct shader_translator *tx)
3282 {
3283     struct ureg_dst dst[1];
3284     struct ureg_src src[4];
3285     unsigned i;
3286 
3287     for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i)
3288         dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
3289     for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i)
3290         src[i] = tx_src_param(tx, &tx->insn.src[i]);
3291 
3292     ureg_insn(tx->ureg, tx->insn.info->opcode,
3293               dst, tx->insn.ndst,
3294               src, tx->insn.nsrc, 0);
3295     return D3D_OK;
3296 }
3297 
3298 static inline DWORD
TOKEN_PEEK(struct shader_translator * tx)3299 TOKEN_PEEK(struct shader_translator *tx)
3300 {
3301     return *(tx->parse);
3302 }
3303 
3304 static inline DWORD
TOKEN_NEXT(struct shader_translator * tx)3305 TOKEN_NEXT(struct shader_translator *tx)
3306 {
3307     return *(tx->parse)++;
3308 }
3309 
3310 static inline void
TOKEN_JUMP(struct shader_translator * tx)3311 TOKEN_JUMP(struct shader_translator *tx)
3312 {
3313     if (tx->parse_next && tx->parse != tx->parse_next) {
3314         WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
3315         tx->parse = tx->parse_next;
3316     }
3317 }
3318 
3319 static inline bool
sm1_parse_eof(struct shader_translator * tx)3320 sm1_parse_eof(struct shader_translator *tx)
3321 {
3322     return TOKEN_PEEK(tx) == NINED3DSP_END;
3323 }
3324 
3325 static void
sm1_read_version(struct shader_translator * tx)3326 sm1_read_version(struct shader_translator *tx)
3327 {
3328     const DWORD tok = TOKEN_NEXT(tx);
3329 
3330     tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
3331     tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
3332 
3333     switch (tok >> 16) {
3334     case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break;
3335     case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break;
3336     default:
3337        DBG("Invalid shader type: %x\n", tok);
3338        tx->processor = ~0;
3339        break;
3340     }
3341 }
3342 
3343 /* This is just to check if we parsed the instruction properly. */
3344 static void
sm1_parse_get_skip(struct shader_translator * tx)3345 sm1_parse_get_skip(struct shader_translator *tx)
3346 {
3347     const DWORD tok = TOKEN_PEEK(tx);
3348 
3349     if (tx->version.major >= 2) {
3350         tx->parse_next = tx->parse + 1 /* this */ +
3351             ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
3352     } else {
3353         tx->parse_next = NULL; /* TODO: determine from param count */
3354     }
3355 }
3356 
3357 static void
sm1_print_comment(const char * comment,UINT size)3358 sm1_print_comment(const char *comment, UINT size)
3359 {
3360     if (!size)
3361         return;
3362     /* TODO */
3363 }
3364 
3365 static void
sm1_parse_comments(struct shader_translator * tx,BOOL print)3366 sm1_parse_comments(struct shader_translator *tx, BOOL print)
3367 {
3368     DWORD tok = TOKEN_PEEK(tx);
3369 
3370     while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
3371     {
3372         const char *comment = "";
3373         UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
3374         tx->parse += size + 1;
3375 
3376         if (print)
3377             sm1_print_comment(comment, size);
3378 
3379         tok = TOKEN_PEEK(tx);
3380     }
3381 }
3382 
3383 static void
sm1_parse_get_param(struct shader_translator * tx,DWORD * reg,DWORD * rel)3384 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
3385 {
3386     *reg = TOKEN_NEXT(tx);
3387 
3388     if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
3389     {
3390         if (tx->version.major < 2)
3391             *rel = (1 << 31) |
3392                 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
3393                 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT)  & D3DSP_REGTYPE_MASK) |
3394                 D3DSP_NOSWIZZLE;
3395         else
3396             *rel = TOKEN_NEXT(tx);
3397     }
3398 }
3399 
3400 static void
sm1_parse_dst_param(struct sm1_dst_param * dst,DWORD tok)3401 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
3402 {
3403     int8_t shift;
3404     dst->file =
3405         (tok & D3DSP_REGTYPE_MASK)  >> D3DSP_REGTYPE_SHIFT |
3406         (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
3407     dst->type = TGSI_RETURN_TYPE_FLOAT;
3408     dst->idx = tok & D3DSP_REGNUM_MASK;
3409     dst->rel = NULL;
3410     dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
3411     dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
3412     shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
3413     dst->shift = (shift & 0x7) - (shift & 0x8);
3414 }
3415 
3416 static void
sm1_parse_src_param(struct sm1_src_param * src,DWORD tok)3417 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
3418 {
3419     src->file =
3420         ((tok & D3DSP_REGTYPE_MASK)  >> D3DSP_REGTYPE_SHIFT) |
3421         ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
3422     src->type = TGSI_RETURN_TYPE_FLOAT;
3423     src->idx = tok & D3DSP_REGNUM_MASK;
3424     src->rel = NULL;
3425     src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
3426     src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
3427 
3428     switch (src->file) {
3429     case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
3430     case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
3431     case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
3432     default:
3433         break;
3434     }
3435 }
3436 
3437 static void
sm1_parse_immediate(struct shader_translator * tx,struct sm1_src_param * imm)3438 sm1_parse_immediate(struct shader_translator *tx,
3439                     struct sm1_src_param *imm)
3440 {
3441     imm->file = NINED3DSPR_IMMEDIATE;
3442     imm->idx = INT_MIN;
3443     imm->rel = NULL;
3444     imm->swizzle = NINED3DSP_NOSWIZZLE;
3445     imm->mod = 0;
3446     switch (tx->insn.opcode) {
3447     case D3DSIO_DEF:
3448         imm->type = NINED3DSPTYPE_FLOAT4;
3449         memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3450         tx->parse += 4;
3451         break;
3452     case D3DSIO_DEFI:
3453         imm->type = NINED3DSPTYPE_INT4;
3454         memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3455         tx->parse += 4;
3456         break;
3457     case D3DSIO_DEFB:
3458         imm->type = NINED3DSPTYPE_BOOL;
3459         memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
3460         tx->parse += 1;
3461         break;
3462     default:
3463        assert(0);
3464        break;
3465     }
3466 }
3467 
3468 static void
sm1_read_dst_param(struct shader_translator * tx,struct sm1_dst_param * dst,struct sm1_src_param * rel)3469 sm1_read_dst_param(struct shader_translator *tx,
3470                    struct sm1_dst_param *dst,
3471                    struct sm1_src_param *rel)
3472 {
3473     DWORD tok_dst, tok_rel = 0;
3474 
3475     sm1_parse_get_param(tx, &tok_dst, &tok_rel);
3476     sm1_parse_dst_param(dst, tok_dst);
3477     if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
3478         sm1_parse_src_param(rel, tok_rel);
3479         dst->rel = rel;
3480     }
3481 }
3482 
3483 static void
sm1_read_src_param(struct shader_translator * tx,struct sm1_src_param * src,struct sm1_src_param * rel)3484 sm1_read_src_param(struct shader_translator *tx,
3485                    struct sm1_src_param *src,
3486                    struct sm1_src_param *rel)
3487 {
3488     DWORD tok_src, tok_rel = 0;
3489 
3490     sm1_parse_get_param(tx, &tok_src, &tok_rel);
3491     sm1_parse_src_param(src, tok_src);
3492     if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
3493         assert(rel);
3494         sm1_parse_src_param(rel, tok_rel);
3495         src->rel = rel;
3496     }
3497 }
3498 
3499 static void
sm1_read_semantic(struct shader_translator * tx,struct sm1_semantic * sem)3500 sm1_read_semantic(struct shader_translator *tx,
3501                   struct sm1_semantic *sem)
3502 {
3503     const DWORD tok_usg = TOKEN_NEXT(tx);
3504     const DWORD tok_dst = TOKEN_NEXT(tx);
3505 
3506     sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
3507     sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
3508     sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
3509 
3510     sm1_parse_dst_param(&sem->reg, tok_dst);
3511 }
3512 
3513 static void
sm1_parse_instruction(struct shader_translator * tx)3514 sm1_parse_instruction(struct shader_translator *tx)
3515 {
3516     struct sm1_instruction *insn = &tx->insn;
3517     HRESULT hr;
3518     DWORD tok;
3519     const struct sm1_op_info *info = NULL;
3520     unsigned i;
3521 
3522     sm1_parse_comments(tx, true);
3523     sm1_parse_get_skip(tx);
3524 
3525     tok = TOKEN_NEXT(tx);
3526 
3527     insn->opcode = tok & D3DSI_OPCODE_MASK;
3528     insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
3529     insn->coissue = !!(tok & D3DSI_COISSUE);
3530     insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
3531 
3532     if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) {
3533         int k = tx->op_info_map[insn->opcode];
3534         if (k >= 0) {
3535             assert(k < ARRAY_SIZE(inst_table));
3536             info = &inst_table[k];
3537         }
3538     } else {
3539        if (insn->opcode == D3DSIO_PHASE)   info = &inst_phase;
3540        if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
3541     }
3542     if (!info) {
3543        DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
3544        TOKEN_JUMP(tx);
3545        return;
3546     }
3547     insn->info = info;
3548     insn->ndst = info->ndst;
3549     insn->nsrc = info->nsrc;
3550 
3551     /* check version */
3552     {
3553         unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
3554         unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
3555         unsigned ver = (tx->version.major << 8) | tx->version.minor;
3556         if (ver < min || ver > max) {
3557             DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
3558                 min, ver, max);
3559             return;
3560         }
3561     }
3562 
3563     for (i = 0; i < insn->ndst; ++i)
3564         sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
3565     if (insn->predicated)
3566         sm1_read_src_param(tx, &insn->pred, NULL);
3567     for (i = 0; i < insn->nsrc; ++i)
3568         sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
3569 
3570     /* parse here so we can dump them before processing */
3571     if (insn->opcode == D3DSIO_DEF ||
3572         insn->opcode == D3DSIO_DEFI ||
3573         insn->opcode == D3DSIO_DEFB)
3574         sm1_parse_immediate(tx, &tx->insn.src[0]);
3575 
3576     sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
3577     sm1_instruction_check(insn);
3578 
3579     if (insn->predicated) {
3580         tx->predicated_activated = true;
3581         if (ureg_dst_is_undef(tx->regs.predicate_tmp)) {
3582             tx->regs.predicate_tmp = ureg_DECL_temporary(tx->ureg);
3583             tx->regs.predicate_dst = ureg_DECL_temporary(tx->ureg);
3584         }
3585     }
3586 
3587     if (info->handler)
3588         hr = info->handler(tx);
3589     else
3590         hr = NineTranslateInstruction_Generic(tx);
3591     tx_apply_dst0_modifiers(tx);
3592 
3593     if (insn->predicated) {
3594         tx->predicated_activated = false;
3595         /* TODO: predicate might be allowed on outputs,
3596          * which cannot be src. Workaround it. */
3597         ureg_CMP(tx->ureg, tx->regs.predicate_dst,
3598                  ureg_negate(tx_src_param(tx, &insn->pred)),
3599                  ureg_src(tx->regs.predicate_tmp),
3600                  ureg_src(tx->regs.predicate_dst));
3601     }
3602 
3603     if (hr != D3D_OK)
3604         tx->failure = true;
3605     tx->num_scratch = 0; /* reset */
3606 
3607     TOKEN_JUMP(tx);
3608 }
3609 
3610 #define GET_CAP(n) screen->get_param( \
3611       screen, PIPE_CAP_##n)
3612 #define GET_SHADER_CAP(n) screen->get_shader_param( \
3613       screen, info->type, PIPE_SHADER_CAP_##n)
3614 
3615 static HRESULT
tx_ctor(struct shader_translator * tx,struct pipe_screen * screen,struct nine_shader_info * info)3616 tx_ctor(struct shader_translator *tx, struct pipe_screen *screen, struct nine_shader_info *info)
3617 {
3618     unsigned i;
3619 
3620     memset(tx, 0, sizeof(*tx));
3621 
3622     tx->info = info;
3623 
3624     tx->byte_code = info->byte_code;
3625     tx->parse = info->byte_code;
3626 
3627     for (i = 0; i < ARRAY_SIZE(info->input_map); ++i)
3628         info->input_map[i] = NINE_DECLUSAGE_NONE;
3629     info->num_inputs = 0;
3630 
3631     info->position_t = false;
3632     info->point_size = false;
3633 
3634     memset(tx->slots_used, 0, sizeof(tx->slots_used));
3635     memset(info->int_slots_used, 0, sizeof(info->int_slots_used));
3636     memset(info->bool_slots_used, 0, sizeof(info->bool_slots_used));
3637 
3638     tx->info->const_float_slots = 0;
3639     tx->info->const_int_slots = 0;
3640     tx->info->const_bool_slots = 0;
3641 
3642     info->sampler_mask = 0x0;
3643     info->rt_mask = 0x0;
3644 
3645     info->lconstf.data = NULL;
3646     info->lconstf.ranges = NULL;
3647 
3648     info->bumpenvmat_needed = 0;
3649 
3650     for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) {
3651         tx->regs.rL[i] = ureg_dst_undef();
3652     }
3653     tx->regs.address = ureg_dst_undef();
3654     tx->regs.a0 = ureg_dst_undef();
3655     tx->regs.p = ureg_dst_undef();
3656     tx->regs.oDepth = ureg_dst_undef();
3657     tx->regs.vPos = ureg_src_undef();
3658     tx->regs.vFace = ureg_src_undef();
3659     for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i)
3660         tx->regs.o[i] = ureg_dst_undef();
3661     for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i)
3662         tx->regs.oCol[i] = ureg_dst_undef();
3663     for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i)
3664         tx->regs.vC[i] = ureg_src_undef();
3665     for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i)
3666         tx->regs.vT[i] = ureg_src_undef();
3667 
3668     sm1_read_version(tx);
3669 
3670     info->version = (tx->version.major << 4) | tx->version.minor;
3671 
3672     tx->num_outputs = 0;
3673 
3674     create_op_info_map(tx);
3675 
3676     tx->ureg = ureg_create(info->type);
3677     if (!tx->ureg) {
3678         return E_OUTOFMEMORY;
3679     }
3680 
3681     tx->native_integers = GET_SHADER_CAP(INTEGERS);
3682     tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3683     tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3684     tx->shift_wpos = !GET_CAP(FS_COORD_PIXEL_CENTER_INTEGER);
3685     tx->texcoord_sn = tx->want_texcoord ?
3686         TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3687     tx->wpos_is_sysval = GET_CAP(FS_POSITION_IS_SYSVAL);
3688     tx->face_is_sysval_integer = GET_CAP(FS_FACE_IS_INTEGER_SYSVAL);
3689     tx->no_vs_window_space = !GET_CAP(VS_WINDOW_SPACE_POSITION);
3690     tx->mul_zero_wins = GET_CAP(LEGACY_MATH_RULES);
3691 
3692     if (info->emulate_features) {
3693         tx->shift_wpos = true;
3694         tx->no_vs_window_space = true;
3695         tx->mul_zero_wins = false;
3696     }
3697 
3698     if (IS_VS) {
3699         tx->num_constf_allowed = NINE_MAX_CONST_F;
3700     } else if (tx->version.major < 2) {/* IS_PS v1 */
3701         tx->num_constf_allowed = 8;
3702     } else if (tx->version.major == 2) {/* IS_PS v2 */
3703         tx->num_constf_allowed = 32;
3704     } else {/* IS_PS v3 */
3705         tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
3706     }
3707 
3708     if (tx->version.major < 2) {
3709         tx->num_consti_allowed = 0;
3710         tx->num_constb_allowed = 0;
3711     } else {
3712         tx->num_consti_allowed = NINE_MAX_CONST_I;
3713         tx->num_constb_allowed = NINE_MAX_CONST_B;
3714     }
3715 
3716     if (info->swvp_on) {
3717         /* TODO: The values tx->version.major == 1 */
3718         tx->num_constf_allowed = 8192;
3719         tx->num_consti_allowed = 2048;
3720         tx->num_constb_allowed = 2048;
3721     }
3722 
3723     /* VS must always write position. Declare it here to make it the 1st output.
3724      * (Some drivers like nv50 are buggy and rely on that.)
3725      */
3726     if (IS_VS) {
3727         tx->regs.oPos_out = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3728     } else {
3729         ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3730         if (!tx->shift_wpos)
3731             ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3732     }
3733 
3734     if (tx->mul_zero_wins)
3735        ureg_property(tx->ureg, TGSI_PROPERTY_LEGACY_MATH_RULES, 1);
3736 
3737     /* Add additional definition of constants */
3738     if (info->add_constants_defs.c_combination) {
3739         unsigned i;
3740 
3741         assert(info->add_constants_defs.int_const_added);
3742         assert(info->add_constants_defs.bool_const_added);
3743         /* We only add constants that are used by the shader
3744          * and that are not defined in the shader */
3745         for (i = 0; i < NINE_MAX_CONST_I; ++i) {
3746             if ((*info->add_constants_defs.int_const_added)[i]) {
3747                 DBG("Defining const i%i : { %i %i %i %i }\n", i,
3748                     info->add_constants_defs.c_combination->const_i[i][0],
3749                     info->add_constants_defs.c_combination->const_i[i][1],
3750                     info->add_constants_defs.c_combination->const_i[i][2],
3751                     info->add_constants_defs.c_combination->const_i[i][3]);
3752                 tx_set_lconsti(tx, i, info->add_constants_defs.c_combination->const_i[i]);
3753             }
3754         }
3755         for (i = 0; i < NINE_MAX_CONST_B; ++i) {
3756             if ((*info->add_constants_defs.bool_const_added)[i]) {
3757                 DBG("Defining const b%i : %i\n", i, (int)(info->add_constants_defs.c_combination->const_b[i] != 0));
3758                 tx_set_lconstb(tx, i, info->add_constants_defs.c_combination->const_b[i]);
3759             }
3760         }
3761     }
3762     return D3D_OK;
3763 }
3764 
3765 static void
tx_dtor(struct shader_translator * tx)3766 tx_dtor(struct shader_translator *tx)
3767 {
3768     if (tx->slot_map)
3769         FREE(tx->slot_map);
3770     if (tx->num_inst_labels)
3771         FREE(tx->inst_labels);
3772     FREE(tx->lconstf);
3773     FREE(tx->regs.r);
3774     FREE(tx);
3775 }
3776 
3777 /* CONST[0].xyz = width/2, -height/2, zmax-zmin
3778  * CONST[1].xyz = x+width/2, y+height/2, zmin */
3779 static void
shader_add_vs_viewport_transform(struct shader_translator * tx)3780 shader_add_vs_viewport_transform(struct shader_translator *tx)
3781 {
3782     struct ureg_program *ureg = tx->ureg;
3783     struct ureg_src c0 = ureg_src_register(TGSI_FILE_CONSTANT, 0);
3784     struct ureg_src c1 = ureg_src_register(TGSI_FILE_CONSTANT, 1);
3785     /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/
3786 
3787     c0 = ureg_src_dimension(c0, 4);
3788     c1 = ureg_src_dimension(c1, 4);
3789     /* TODO: find out when we need to apply the viewport transformation or not.
3790      * Likely will be XYZ vs XYZRHW in vdecl_out
3791      * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0);
3792      * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1);
3793      */
3794     ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos));
3795 }
3796 
3797 static void
shader_add_ps_fog_stage(struct shader_translator * tx,struct ureg_dst dst_col,struct ureg_src src_col)3798 shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_dst dst_col, struct ureg_src src_col)
3799 {
3800     struct ureg_program *ureg = tx->ureg;
3801     struct ureg_src fog_end, fog_coeff, fog_density, fog_params;
3802     struct ureg_src fog_vs, fog_color;
3803     struct ureg_dst fog_factor, depth;
3804 
3805     if (!tx->info->fog_enable) {
3806         ureg_MOV(ureg, dst_col, src_col);
3807         return;
3808     }
3809 
3810     if (tx->info->fog_mode != D3DFOG_NONE) {
3811         depth = tx_scratch_scalar(tx);
3812         if (tx->info->zfog)
3813             ureg_MOV(ureg, depth, ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_Z));
3814         else /* wfog: use w. position's w contains 1/w */
3815             ureg_RCP(ureg, depth, ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_W));
3816     }
3817 
3818     fog_color = nine_special_constant_src(tx, 12);
3819     fog_params = nine_special_constant_src(tx, 13);
3820     fog_factor = tx_scratch_scalar(tx);
3821 
3822     if (tx->info->fog_mode == D3DFOG_LINEAR) {
3823         fog_end = NINE_APPLY_SWIZZLE(fog_params, X);
3824         fog_coeff = NINE_APPLY_SWIZZLE(fog_params, Y);
3825         ureg_ADD(ureg, fog_factor, fog_end, ureg_negate(ureg_src(depth)));
3826         ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
3827     } else if (tx->info->fog_mode == D3DFOG_EXP) {
3828         fog_density = NINE_APPLY_SWIZZLE(fog_params, X);
3829         ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density);
3830         ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3831         ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3832     } else if (tx->info->fog_mode == D3DFOG_EXP2) {
3833         fog_density = NINE_APPLY_SWIZZLE(fog_params, X);
3834         ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density);
3835         ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
3836         ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3837         ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3838     } else {
3839         fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 16,
3840                                             TGSI_INTERPOLATE_PERSPECTIVE),
3841                                             TGSI_SWIZZLE_X);
3842         ureg_MOV(ureg, fog_factor, fog_vs);
3843     }
3844 
3845     ureg_LRP(ureg, ureg_writemask(dst_col, TGSI_WRITEMASK_XYZ),
3846              tx_src_scalar(fog_factor), src_col, fog_color);
3847     ureg_MOV(ureg, ureg_writemask(dst_col, TGSI_WRITEMASK_W), src_col);
3848 }
3849 
3850 static void
shader_add_ps_alpha_test_stage(struct shader_translator * tx,struct ureg_src src_color)3851 shader_add_ps_alpha_test_stage(struct shader_translator *tx, struct ureg_src src_color)
3852 {
3853     struct ureg_program *ureg = tx->ureg;
3854     unsigned cmp_op;
3855     struct ureg_src src[2];
3856     struct ureg_dst tmp = tx_scratch(tx);
3857     if (tx->info->alpha_test_emulation == PIPE_FUNC_ALWAYS)
3858         return;
3859     if (tx->info->alpha_test_emulation == PIPE_FUNC_NEVER) {
3860         ureg_KILL(ureg);
3861         return;
3862     }
3863     cmp_op = pipe_comp_to_tgsi_opposite(tx->info->alpha_test_emulation);
3864     src[0] = ureg_scalar(src_color, TGSI_SWIZZLE_W); /* Read color alpha channel */
3865     src[1] = ureg_scalar(nine_special_constant_src(tx, 14), TGSI_SWIZZLE_X); /* Read alphatest */
3866     ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
3867     ureg_KILL_IF(tx->ureg, ureg_negate(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X))); /* if opposite test passes, discard */
3868 }
3869 
parse_shader(struct shader_translator * tx)3870 static void parse_shader(struct shader_translator *tx)
3871 {
3872     struct nine_shader_info *info = tx->info;
3873 
3874     while (!sm1_parse_eof(tx) && !tx->failure)
3875         sm1_parse_instruction(tx);
3876     tx->parse++; /* for byte_size */
3877 
3878     if (tx->failure)
3879         return;
3880 
3881     if (IS_PS) {
3882         struct ureg_dst oCol0 = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 0);
3883         struct ureg_dst tmp_oCol0;
3884         if (tx->version.major < 3) {
3885             tmp_oCol0 = ureg_DECL_temporary(tx->ureg);
3886             if (tx->version.major < 2) {
3887                 assert(tx->num_temp); /* there must be color output */
3888                 info->rt_mask |= 0x1;
3889                 shader_add_ps_fog_stage(tx, tmp_oCol0, ureg_src(tx->regs.r[0]));
3890             } else {
3891                 shader_add_ps_fog_stage(tx, tmp_oCol0, ureg_src(tx->regs.oCol[0]));
3892             }
3893         } else {
3894             assert(!ureg_dst_is_undef(tx->regs.oCol[0]));
3895             tmp_oCol0 = tx->regs.oCol[0];
3896         }
3897         shader_add_ps_alpha_test_stage(tx, ureg_src(tmp_oCol0));
3898         ureg_MOV(tx->ureg, oCol0, ureg_src(tmp_oCol0));
3899     }
3900 
3901     if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
3902         tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16);
3903         ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
3904     }
3905 
3906     if (info->position_t) {
3907         if (tx->no_vs_window_space) {
3908             ERR("POSITIONT is not yet implemented for your device.\n");
3909         } else {
3910             ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, true);
3911         }
3912     }
3913 
3914     if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
3915         struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
3916         ureg_MAX(tx->ureg, ureg_writemask(tx->regs.oPts, TGSI_WRITEMASK_X), ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
3917         ureg_MIN(tx->ureg, ureg_writemask(oPts, TGSI_WRITEMASK_X), ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
3918         info->point_size = true;
3919     } else if (IS_VS && tx->always_output_pointsize) {
3920         struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
3921         ureg_MOV(tx->ureg, ureg_writemask(oPts, TGSI_WRITEMASK_X), nine_special_constant_src(tx, 8));
3922         info->point_size = true;
3923     }
3924 
3925     if (IS_VS && tx->info->clip_plane_emulation > 0) {
3926         struct ureg_dst clipdist[2] = {ureg_dst_undef(), ureg_dst_undef()};
3927         int num_clipdist = ffs(tx->info->clip_plane_emulation);
3928         int i;
3929         /* TODO: handle undefined channels of oPos (w is not always written to I think. default is 1) *
3930          * Note in d3d9 it's not possible to output clipvert, so we don't need to check
3931          * for its existence */
3932         clipdist[0] = ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_CLIPDIST, 0, ((1 << num_clipdist) - 1) & 0xf, 0, 1);
3933         if (num_clipdist >= 5)
3934             clipdist[1] = ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_CLIPDIST, 1, ((1 << (num_clipdist - 4)) - 1) & 0xf, 0, 1);
3935         ureg_property(tx->ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED, num_clipdist);
3936         for (i = 0; i < num_clipdist; i++) {
3937             assert(!ureg_dst_is_undef(clipdist[i>>2]));
3938             if (!(tx->info->clip_plane_emulation & (1 << i)))
3939                 ureg_MOV(tx->ureg, ureg_writemask(clipdist[i>>2], 1 << (i & 0x2)), ureg_imm1f(tx->ureg, 0.f));
3940             else
3941                 ureg_DP4(tx->ureg, ureg_writemask(clipdist[i>>2], 1 << (i & 0x2)),
3942                          ureg_src(tx->regs.oPos), nine_special_constant_src(tx, i));
3943         }
3944 
3945         ureg_MOV(tx->ureg, tx->regs.oPos_out, ureg_src(tx->regs.oPos));
3946     }
3947 
3948     if (info->process_vertices)
3949         shader_add_vs_viewport_transform(tx);
3950 
3951     ureg_END(tx->ureg);
3952 }
3953 
3954 #define NINE_SHADER_DEBUG_OPTION_NO_NIR_VS        (1 << 2)
3955 #define NINE_SHADER_DEBUG_OPTION_NO_NIR_PS        (1 << 3)
3956 #define NINE_SHADER_DEBUG_OPTION_DUMP_NIR         (1 << 4)
3957 #define NINE_SHADER_DEBUG_OPTION_DUMP_TGSI        (1 << 5)
3958 
3959 static const struct debug_named_value nine_shader_debug_options[] = {
3960     { "no_nir_vs", NINE_SHADER_DEBUG_OPTION_NO_NIR_VS, "Never use NIR for vertex shaders even if the driver prefers it." },
3961     { "no_nir_ps", NINE_SHADER_DEBUG_OPTION_NO_NIR_PS, "Never use NIR for pixel shaders even if the driver prefers it." },
3962     { "dump_nir", NINE_SHADER_DEBUG_OPTION_DUMP_NIR, "Print translated NIR shaders." },
3963     { "dump_tgsi", NINE_SHADER_DEBUG_OPTION_DUMP_TGSI, "Print TGSI shaders." },
3964     DEBUG_NAMED_VALUE_END /* must be last */
3965 };
3966 
3967 static inline bool
nine_shader_get_debug_flag(uint64_t flag)3968 nine_shader_get_debug_flag(uint64_t flag)
3969 {
3970     static uint64_t flags = 0;
3971     static bool first_run = true;
3972 
3973     if (unlikely(first_run)) {
3974         first_run = false;
3975         flags = debug_get_flags_option("NINE_SHADER", nine_shader_debug_options, 0);
3976 
3977         // Check old TGSI dump envvar too
3978         if (debug_get_bool_option("NINE_TGSI_DUMP", false)) {
3979             flags |= NINE_SHADER_DEBUG_OPTION_DUMP_TGSI;
3980         }
3981     }
3982 
3983     return !!(flags & flag);
3984 }
3985 
3986 static void
nine_pipe_nir_shader_state_from_tgsi(struct pipe_shader_state * state,const struct tgsi_token * tgsi_tokens,struct pipe_screen * screen)3987 nine_pipe_nir_shader_state_from_tgsi(struct pipe_shader_state *state, const struct tgsi_token *tgsi_tokens,
3988                                      struct pipe_screen *screen)
3989 {
3990     struct nir_shader *nir = tgsi_to_nir(tgsi_tokens, screen, screen->get_disk_shader_cache != NULL);
3991 
3992     if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_NIR))) {
3993         nir_print_shader(nir, stdout);
3994     }
3995 
3996     state->type = PIPE_SHADER_IR_NIR;
3997     state->tokens = NULL;
3998     state->ir.nir = nir;
3999     memset(&state->stream_output, 0, sizeof(state->stream_output));
4000 }
4001 
4002 static void *
nine_ureg_create_shader(struct ureg_program * ureg,struct pipe_context * pipe,const struct pipe_stream_output_info * so)4003 nine_ureg_create_shader(struct ureg_program                  *ureg,
4004                         struct pipe_context                  *pipe,
4005                         const struct pipe_stream_output_info   *so)
4006 {
4007     struct pipe_shader_state state;
4008     const struct tgsi_token *tgsi_tokens;
4009     struct pipe_screen *screen = pipe->screen;
4010 
4011     tgsi_tokens = ureg_finalize(ureg);
4012     if (!tgsi_tokens)
4013         return NULL;
4014 
4015     assert(((struct tgsi_header *) &tgsi_tokens[0])->HeaderSize >= 2);
4016     enum pipe_shader_type shader_type = ((struct tgsi_processor *) &tgsi_tokens[1])->Processor;
4017 
4018     bool use_nir = true;
4019 
4020     /* Allow user to override preferred IR, this is very useful for debugging */
4021     if (unlikely(shader_type == PIPE_SHADER_VERTEX && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_VS)))
4022         use_nir = false;
4023     if (unlikely(shader_type == PIPE_SHADER_FRAGMENT && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_PS)))
4024         use_nir = false;
4025 
4026     DUMP("shader type: %s, selected IR: %s\n",
4027          shader_type == PIPE_SHADER_VERTEX ? "VS" : "PS",
4028          use_nir ? "NIR" : "TGSI");
4029 
4030     if (use_nir) {
4031         nine_pipe_nir_shader_state_from_tgsi(&state, tgsi_tokens, screen);
4032     } else {
4033         pipe_shader_state_from_tgsi(&state, tgsi_tokens);
4034     }
4035 
4036     assert(state.tokens || state.ir.nir);
4037 
4038     if (so)
4039         state.stream_output = *so;
4040 
4041     switch (shader_type) {
4042     case PIPE_SHADER_VERTEX:
4043         return pipe->create_vs_state(pipe, &state);
4044     case PIPE_SHADER_FRAGMENT:
4045         return pipe->create_fs_state(pipe, &state);
4046     default:
4047         unreachable("unsupported shader type");
4048     }
4049 }
4050 
4051 
4052 void *
nine_create_shader_with_so_and_destroy(struct ureg_program * p,struct pipe_context * pipe,const struct pipe_stream_output_info * so)4053 nine_create_shader_with_so_and_destroy(struct ureg_program                   *p,
4054                                        struct pipe_context                *pipe,
4055                                        const struct pipe_stream_output_info *so)
4056 {
4057     void *result = nine_ureg_create_shader(p, pipe, so);
4058     ureg_destroy(p);
4059     return result;
4060 }
4061 
4062 HRESULT
nine_translate_shader(struct NineDevice9 * device,struct nine_shader_info * info,struct pipe_context * pipe)4063 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, struct pipe_context *pipe)
4064 {
4065     struct shader_translator *tx;
4066     HRESULT hr = D3D_OK;
4067     const unsigned processor = info->type;
4068     struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen;
4069     unsigned *const_ranges = NULL;
4070 
4071     user_assert(processor != ~0, D3DERR_INVALIDCALL);
4072 
4073     tx = MALLOC_STRUCT(shader_translator);
4074     if (!tx)
4075         return E_OUTOFMEMORY;
4076 
4077     info->emulate_features = device->driver_caps.shader_emulate_features;
4078 
4079     if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) {
4080         hr = E_OUTOFMEMORY;
4081         goto out;
4082     }
4083     tx->always_output_pointsize = device->driver_caps.always_output_pointsize;
4084 
4085     assert(IS_VS || !info->swvp_on);
4086 
4087     if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
4088         hr = D3DERR_INVALIDCALL;
4089         DBG("Unsupported shader version: %u.%u !\n",
4090             tx->version.major, tx->version.minor);
4091         goto out;
4092     }
4093     if (tx->processor != processor) {
4094         hr = D3DERR_INVALIDCALL;
4095         DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
4096         goto out;
4097     }
4098     DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS",
4099          tx->version.major, tx->version.minor);
4100 
4101     parse_shader(tx);
4102 
4103     if (tx->failure) {
4104         /* For VS shaders, we print the warning later,
4105          * we first try with swvp. */
4106         if (IS_PS)
4107             ERR("Encountered buggy shader\n");
4108         ureg_destroy(tx->ureg);
4109         hr = D3DERR_INVALIDCALL;
4110         goto out;
4111     }
4112 
4113     /* Recompile after compacting constant slots if possible */
4114     if (!tx->indirect_const_access && !info->swvp_on && tx->num_slots > 0) {
4115         unsigned *slot_map;
4116         unsigned c;
4117         int i, j, num_ranges, prev;
4118 
4119         DBG("Recompiling shader for constant compaction\n");
4120         ureg_destroy(tx->ureg);
4121 
4122         if (tx->num_inst_labels)
4123             FREE(tx->inst_labels);
4124         FREE(tx->lconstf);
4125         FREE(tx->regs.r);
4126 
4127         num_ranges = 0;
4128         prev = -2;
4129         for (i = 0; i < NINE_MAX_CONST_ALL_VS; i++) {
4130             if (tx->slots_used[i]) {
4131                 if (prev != i - 1)
4132                     num_ranges++;
4133                 prev = i;
4134             }
4135         }
4136         slot_map = MALLOC(NINE_MAX_CONST_ALL_VS * sizeof(unsigned));
4137         const_ranges = CALLOC(num_ranges + 1, 2 * sizeof(unsigned)); /* ranges stop when last is of size 0 */
4138         if (!slot_map || !const_ranges) {
4139             hr = E_OUTOFMEMORY;
4140             goto out;
4141         }
4142         c = 0;
4143         j = -1;
4144         prev = -2;
4145         for (i = 0; i < NINE_MAX_CONST_ALL_VS; i++) {
4146             if (tx->slots_used[i]) {
4147                 if (prev != i - 1)
4148                     j++;
4149                 /* Initialize first slot of the range */
4150                 if (!const_ranges[2*j+1])
4151                     const_ranges[2*j] = i;
4152                 const_ranges[2*j+1]++;
4153                 prev = i;
4154                 slot_map[i] = c++;
4155             }
4156         }
4157 
4158         if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) {
4159             hr = E_OUTOFMEMORY;
4160             goto out;
4161         }
4162         tx->always_output_pointsize = device->driver_caps.always_output_pointsize;
4163         tx->slot_map = slot_map;
4164         parse_shader(tx);
4165         assert(!tx->failure);
4166 #if !defined(NDEBUG)
4167         i = 0;
4168         j = 0;
4169         while (const_ranges[i*2+1] != 0) {
4170             j += const_ranges[i*2+1];
4171             i++;
4172         }
4173         assert(j == tx->num_slots);
4174 #endif
4175     }
4176 
4177     /* record local constants */
4178     if (tx->num_lconstf && tx->indirect_const_access) {
4179         struct nine_range *ranges;
4180         float *data;
4181         int *indices;
4182         unsigned i, k, n;
4183 
4184         hr = E_OUTOFMEMORY;
4185 
4186         data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
4187         if (!data)
4188             goto out;
4189         info->lconstf.data = data;
4190 
4191         indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
4192         if (!indices)
4193             goto out;
4194 
4195         /* lazy sort, num_lconstf should be small */
4196         for (n = 0; n < tx->num_lconstf; ++n) {
4197             for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
4198                 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
4199                     k = i;
4200             }
4201             indices[n] = tx->lconstf[k].idx;
4202             memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float));
4203             tx->lconstf[k].idx = INT_MAX;
4204         }
4205 
4206         /* count ranges */
4207         for (n = 1, i = 1; i < tx->num_lconstf; ++i)
4208             if (indices[i] != indices[i - 1] + 1)
4209                 ++n;
4210         ranges = MALLOC(n * sizeof(ranges[0]));
4211         if (!ranges) {
4212             FREE(indices);
4213             goto out;
4214         }
4215         info->lconstf.ranges = ranges;
4216 
4217         k = 0;
4218         ranges[k].bgn = indices[0];
4219         for (i = 1; i < tx->num_lconstf; ++i) {
4220             if (indices[i] != indices[i - 1] + 1) {
4221                 ranges[k].next = &ranges[k + 1];
4222                 ranges[k].end = indices[i - 1] + 1;
4223                 ++k;
4224                 ranges[k].bgn = indices[i];
4225             }
4226         }
4227         ranges[k].end = indices[i - 1] + 1;
4228         ranges[k].next = NULL;
4229         assert(n == (k + 1));
4230 
4231         FREE(indices);
4232         hr = D3D_OK;
4233     }
4234 
4235     /* r500 */
4236     if (info->const_float_slots > device->max_vs_const_f &&
4237         (info->const_int_slots || info->const_bool_slots) &&
4238         !info->swvp_on)
4239         ERR("Overlapping constant slots. The shader is likely to be buggy\n");
4240 
4241 
4242     if (tx->indirect_const_access) { /* vs only */
4243         info->const_float_slots = device->max_vs_const_f;
4244         tx->num_slots = MAX2(tx->num_slots, device->max_vs_const_f);
4245     }
4246 
4247     if (!info->swvp_on) {
4248         info->const_used_size = sizeof(float[4]) * tx->num_slots;
4249         if (tx->num_slots)
4250             ureg_DECL_constant2D(tx->ureg, 0, tx->num_slots-1, 0);
4251     } else {
4252          ureg_DECL_constant2D(tx->ureg, 0, 4095, 0);
4253          ureg_DECL_constant2D(tx->ureg, 0, 4095, 1);
4254          ureg_DECL_constant2D(tx->ureg, 0, 2047, 2);
4255          ureg_DECL_constant2D(tx->ureg, 0, 511, 3);
4256     }
4257 
4258     if (info->process_vertices)
4259         ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */
4260 
4261     if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_TGSI))) {
4262         const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, NULL);
4263         tgsi_dump(toks, 0);
4264         ureg_free_tokens(toks);
4265     }
4266 
4267     if (info->process_vertices) {
4268         NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out,
4269                                                     tx->output_info,
4270                                                     tx->num_outputs,
4271                                                     &(info->so));
4272         info->cso = nine_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so));
4273     } else
4274         info->cso = nine_create_shader_with_so_and_destroy(tx->ureg, pipe, NULL);
4275     if (!info->cso) {
4276         hr = D3DERR_DRIVERINTERNALERROR;
4277         FREE(info->lconstf.data);
4278         FREE(info->lconstf.ranges);
4279         goto out;
4280     }
4281 
4282     info->const_ranges = const_ranges;
4283     const_ranges = NULL;
4284     info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
4285 out:
4286     if (const_ranges)
4287         FREE(const_ranges);
4288     tx_dtor(tx);
4289     return hr;
4290 }
4291