1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "nine_shader.h"
25
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29 #include "vertexdeclaration9.h"
30
31 #include "util/macros.h"
32 #include "util/u_memory.h"
33 #include "util/u_inlines.h"
34 #include "pipe/p_shader_tokens.h"
35 #include "tgsi/tgsi_ureg.h"
36 #include "tgsi/tgsi_dump.h"
37
38 #define DBG_CHANNEL DBG_SHADER
39
40 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
41
42
43 struct shader_translator;
44
45 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
46
47 static inline const char *d3dsio_to_string(unsigned opcode);
48
49
50 #define NINED3D_SM1_VS 0xfffe
51 #define NINED3D_SM1_PS 0xffff
52
53 #define NINE_MAX_COND_DEPTH 64
54 #define NINE_MAX_LOOP_DEPTH 64
55
56 #define NINED3DSP_END 0x0000ffff
57
58 #define NINED3DSPTYPE_FLOAT4 0
59 #define NINED3DSPTYPE_INT4 1
60 #define NINED3DSPTYPE_BOOL 2
61
62 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
63
64 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
65 #define NINED3DSP_WRITEMASK_SHIFT 16
66
67 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
68
69 #define NINED3DSHADER_REL_OP_GT 1
70 #define NINED3DSHADER_REL_OP_EQ 2
71 #define NINED3DSHADER_REL_OP_GE 3
72 #define NINED3DSHADER_REL_OP_LT 4
73 #define NINED3DSHADER_REL_OP_NE 5
74 #define NINED3DSHADER_REL_OP_LE 6
75
76 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
77 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
78
79 #define NINED3DSI_TEXLD_PROJECT 0x1
80 #define NINED3DSI_TEXLD_BIAS 0x2
81
82 #define NINED3DSP_WRITEMASK_0 0x1
83 #define NINED3DSP_WRITEMASK_1 0x2
84 #define NINED3DSP_WRITEMASK_2 0x4
85 #define NINED3DSP_WRITEMASK_3 0x8
86 #define NINED3DSP_WRITEMASK_ALL 0xf
87
88 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
89
90 #define NINE_SWIZZLE4(x,y,z,w) \
91 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
92
93 #define NINE_CONSTANT_SRC(index) \
94 ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, index), 0)
95
96 #define NINE_APPLY_SWIZZLE(src, s) \
97 ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
98
99 #define NINE_CONSTANT_SRC_SWIZZLE(index, s) \
100 NINE_APPLY_SWIZZLE(NINE_CONSTANT_SRC(index), s)
101
102 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
103 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
104 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
105
106 /*
107 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
108 * BIAS <= PS 1.4 (x-0.5)
109 * BIASNEG <= PS 1.4 (-(x-0.5))
110 * SIGN <= PS 1.4 (2(x-0.5))
111 * SIGNNEG <= PS 1.4 (-2(x-0.5))
112 * COMP <= PS 1.4 (1-x)
113 * X2 = PS 1.4 (2x)
114 * X2NEG = PS 1.4 (-2x)
115 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
116 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
117 * ABS >= SM 3.0 (abs(x))
118 * ABSNEG >= SM 3.0 (-abs(x))
119 * NOT >= SM 2.0 pedication only
120 */
121 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
125 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
126 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
127 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
128 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
129 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
130 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
131 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
132 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
133 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
134 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
135
136 static const char *sm1_mod_str[] =
137 {
138 [NINED3DSPSM_NONE] = "",
139 [NINED3DSPSM_NEG] = "-",
140 [NINED3DSPSM_BIAS] = "bias",
141 [NINED3DSPSM_BIASNEG] = "biasneg",
142 [NINED3DSPSM_SIGN] = "sign",
143 [NINED3DSPSM_SIGNNEG] = "signneg",
144 [NINED3DSPSM_COMP] = "comp",
145 [NINED3DSPSM_X2] = "x2",
146 [NINED3DSPSM_X2NEG] = "x2neg",
147 [NINED3DSPSM_DZ] = "dz",
148 [NINED3DSPSM_DW] = "dw",
149 [NINED3DSPSM_ABS] = "abs",
150 [NINED3DSPSM_ABSNEG] = "-abs",
151 [NINED3DSPSM_NOT] = "not"
152 };
153
154 static void
sm1_dump_writemask(BYTE mask)155 sm1_dump_writemask(BYTE mask)
156 {
157 if (mask & 1) DUMP("x"); else DUMP("_");
158 if (mask & 2) DUMP("y"); else DUMP("_");
159 if (mask & 4) DUMP("z"); else DUMP("_");
160 if (mask & 8) DUMP("w"); else DUMP("_");
161 }
162
163 static void
sm1_dump_swizzle(BYTE s)164 sm1_dump_swizzle(BYTE s)
165 {
166 char c[4] = { 'x', 'y', 'z', 'w' };
167 DUMP("%c%c%c%c",
168 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
169 }
170
171 static const char sm1_file_char[] =
172 {
173 [D3DSPR_TEMP] = 'r',
174 [D3DSPR_INPUT] = 'v',
175 [D3DSPR_CONST] = 'c',
176 [D3DSPR_ADDR] = 'A',
177 [D3DSPR_RASTOUT] = 'R',
178 [D3DSPR_ATTROUT] = 'D',
179 [D3DSPR_OUTPUT] = 'o',
180 [D3DSPR_CONSTINT] = 'I',
181 [D3DSPR_COLOROUT] = 'C',
182 [D3DSPR_DEPTHOUT] = 'D',
183 [D3DSPR_SAMPLER] = 's',
184 [D3DSPR_CONST2] = 'c',
185 [D3DSPR_CONST3] = 'c',
186 [D3DSPR_CONST4] = 'c',
187 [D3DSPR_CONSTBOOL] = 'B',
188 [D3DSPR_LOOP] = 'L',
189 [D3DSPR_TEMPFLOAT16] = 'h',
190 [D3DSPR_MISCTYPE] = 'M',
191 [D3DSPR_LABEL] = 'X',
192 [D3DSPR_PREDICATE] = 'p'
193 };
194
195 static void
sm1_dump_reg(BYTE file,INT index)196 sm1_dump_reg(BYTE file, INT index)
197 {
198 switch (file) {
199 case D3DSPR_LOOP:
200 DUMP("aL");
201 break;
202 case D3DSPR_COLOROUT:
203 DUMP("oC%i", index);
204 break;
205 case D3DSPR_DEPTHOUT:
206 DUMP("oDepth");
207 break;
208 case D3DSPR_RASTOUT:
209 DUMP("oRast%i", index);
210 break;
211 case D3DSPR_CONSTINT:
212 DUMP("iconst[%i]", index);
213 break;
214 case D3DSPR_CONSTBOOL:
215 DUMP("bconst[%i]", index);
216 break;
217 default:
218 DUMP("%c%i", sm1_file_char[file], index);
219 break;
220 }
221 }
222
223 struct sm1_src_param
224 {
225 INT idx;
226 struct sm1_src_param *rel;
227 BYTE file;
228 BYTE swizzle;
229 BYTE mod;
230 BYTE type;
231 union {
232 DWORD d[4];
233 float f[4];
234 int i[4];
235 BOOL b;
236 } imm;
237 };
238 static void
239 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
240
241 struct sm1_dst_param
242 {
243 INT idx;
244 struct sm1_src_param *rel;
245 BYTE file;
246 BYTE mask;
247 BYTE mod;
248 int8_t shift; /* sint4 */
249 BYTE type;
250 };
251
252 static inline void
assert_replicate_swizzle(const struct ureg_src * reg)253 assert_replicate_swizzle(const struct ureg_src *reg)
254 {
255 assert(reg->SwizzleY == reg->SwizzleX &&
256 reg->SwizzleZ == reg->SwizzleX &&
257 reg->SwizzleW == reg->SwizzleX);
258 }
259
260 static void
sm1_dump_immediate(const struct sm1_src_param * param)261 sm1_dump_immediate(const struct sm1_src_param *param)
262 {
263 switch (param->type) {
264 case NINED3DSPTYPE_FLOAT4:
265 DUMP("{ %f %f %f %f }",
266 param->imm.f[0], param->imm.f[1],
267 param->imm.f[2], param->imm.f[3]);
268 break;
269 case NINED3DSPTYPE_INT4:
270 DUMP("{ %i %i %i %i }",
271 param->imm.i[0], param->imm.i[1],
272 param->imm.i[2], param->imm.i[3]);
273 break;
274 case NINED3DSPTYPE_BOOL:
275 DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
276 break;
277 default:
278 assert(0);
279 break;
280 }
281 }
282
283 static void
sm1_dump_src_param(const struct sm1_src_param * param)284 sm1_dump_src_param(const struct sm1_src_param *param)
285 {
286 if (param->file == NINED3DSPR_IMMEDIATE) {
287 assert(!param->mod &&
288 !param->rel &&
289 param->swizzle == NINED3DSP_NOSWIZZLE);
290 sm1_dump_immediate(param);
291 return;
292 }
293
294 if (param->mod)
295 DUMP("%s(", sm1_mod_str[param->mod]);
296 if (param->rel) {
297 DUMP("%c[", sm1_file_char[param->file]);
298 sm1_dump_src_param(param->rel);
299 DUMP("+%i]", param->idx);
300 } else {
301 sm1_dump_reg(param->file, param->idx);
302 }
303 if (param->mod)
304 DUMP(")");
305 if (param->swizzle != NINED3DSP_NOSWIZZLE) {
306 DUMP(".");
307 sm1_dump_swizzle(param->swizzle);
308 }
309 }
310
311 static void
sm1_dump_dst_param(const struct sm1_dst_param * param)312 sm1_dump_dst_param(const struct sm1_dst_param *param)
313 {
314 if (param->mod & NINED3DSPDM_SATURATE)
315 DUMP("sat ");
316 if (param->mod & NINED3DSPDM_PARTIALP)
317 DUMP("pp ");
318 if (param->mod & NINED3DSPDM_CENTROID)
319 DUMP("centroid ");
320 if (param->shift < 0)
321 DUMP("/%u ", 1 << -param->shift);
322 if (param->shift > 0)
323 DUMP("*%u ", 1 << param->shift);
324
325 if (param->rel) {
326 DUMP("%c[", sm1_file_char[param->file]);
327 sm1_dump_src_param(param->rel);
328 DUMP("+%i]", param->idx);
329 } else {
330 sm1_dump_reg(param->file, param->idx);
331 }
332 if (param->mask != NINED3DSP_WRITEMASK_ALL) {
333 DUMP(".");
334 sm1_dump_writemask(param->mask);
335 }
336 }
337
338 struct sm1_semantic
339 {
340 struct sm1_dst_param reg;
341 BYTE sampler_type;
342 D3DDECLUSAGE usage;
343 BYTE usage_idx;
344 };
345
346 struct sm1_op_info
347 {
348 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
349 * should be ignored completely */
350 unsigned sio;
351 unsigned opcode; /* TGSI_OPCODE_x */
352
353 /* versions are still set even handler is set */
354 struct {
355 unsigned min;
356 unsigned max;
357 } vert_version, frag_version;
358
359 /* number of regs parsed outside of special handler */
360 unsigned ndst;
361 unsigned nsrc;
362
363 /* some instructions don't map perfectly, so use a special handler */
364 translate_instruction_func handler;
365 };
366
367 struct sm1_instruction
368 {
369 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
370 BYTE flags;
371 BOOL coissue;
372 BOOL predicated;
373 BYTE ndst;
374 BYTE nsrc;
375 struct sm1_src_param src[4];
376 struct sm1_src_param src_rel[4];
377 struct sm1_src_param pred;
378 struct sm1_src_param dst_rel[1];
379 struct sm1_dst_param dst[1];
380
381 struct sm1_op_info *info;
382 };
383
384 static void
sm1_dump_instruction(struct sm1_instruction * insn,unsigned indent)385 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
386 {
387 unsigned i;
388
389 /* no info stored for these: */
390 if (insn->opcode == D3DSIO_DCL)
391 return;
392 for (i = 0; i < indent; ++i)
393 DUMP(" ");
394
395 if (insn->predicated) {
396 DUMP("@");
397 sm1_dump_src_param(&insn->pred);
398 DUMP(" ");
399 }
400 DUMP("%s", d3dsio_to_string(insn->opcode));
401 if (insn->flags) {
402 switch (insn->opcode) {
403 case D3DSIO_TEX:
404 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
405 break;
406 default:
407 DUMP("_%x", insn->flags);
408 break;
409 }
410 }
411 if (insn->coissue)
412 DUMP("_co");
413 DUMP(" ");
414
415 for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) {
416 sm1_dump_dst_param(&insn->dst[i]);
417 DUMP(" ");
418 }
419
420 for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) {
421 sm1_dump_src_param(&insn->src[i]);
422 DUMP(" ");
423 }
424 if (insn->opcode == D3DSIO_DEF ||
425 insn->opcode == D3DSIO_DEFI ||
426 insn->opcode == D3DSIO_DEFB)
427 sm1_dump_immediate(&insn->src[0]);
428
429 DUMP("\n");
430 }
431
432 struct sm1_local_const
433 {
434 INT idx;
435 struct ureg_src reg;
436 float f[4]; /* for indirect addressing of float constants */
437 };
438
439 struct shader_translator
440 {
441 const DWORD *byte_code;
442 const DWORD *parse;
443 const DWORD *parse_next;
444
445 struct ureg_program *ureg;
446
447 /* shader version */
448 struct {
449 BYTE major;
450 BYTE minor;
451 } version;
452 unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */
453 unsigned num_constf_allowed;
454 unsigned num_consti_allowed;
455 unsigned num_constb_allowed;
456
457 boolean native_integers;
458 boolean inline_subroutines;
459 boolean want_texcoord;
460 boolean shift_wpos;
461 boolean wpos_is_sysval;
462 boolean face_is_sysval_integer;
463 unsigned texcoord_sn;
464
465 struct sm1_instruction insn; /* current instruction */
466
467 struct {
468 struct ureg_dst *r;
469 struct ureg_dst oPos;
470 struct ureg_dst oPos_out; /* the real output when doing streamout */
471 struct ureg_dst oFog;
472 struct ureg_dst oPts;
473 struct ureg_dst oCol[4];
474 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
475 struct ureg_dst oDepth;
476 struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
477 struct ureg_src v_consecutive; /* copy in temp array of ps inputs for rel addressing */
478 struct ureg_src vPos;
479 struct ureg_src vFace;
480 struct ureg_src s;
481 struct ureg_dst p;
482 struct ureg_dst address;
483 struct ureg_dst a0;
484 struct ureg_dst tS[8]; /* texture stage registers */
485 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
486 struct ureg_dst t[5]; /* scratch TEMPs */
487 struct ureg_src vC[2]; /* PS color in */
488 struct ureg_src vT[8]; /* PS texcoord in */
489 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
490 } regs;
491 unsigned num_temp; /* ARRAY_SIZE(regs.r) */
492 unsigned num_scratch;
493 unsigned loop_depth;
494 unsigned loop_depth_max;
495 unsigned cond_depth;
496 unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
497 unsigned cond_labels[NINE_MAX_COND_DEPTH];
498 boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
499
500 unsigned *inst_labels; /* LABEL op */
501 unsigned num_inst_labels;
502
503 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
504
505 struct sm1_local_const *lconstf;
506 unsigned num_lconstf;
507 struct sm1_local_const *lconsti;
508 unsigned num_lconsti;
509 struct sm1_local_const *lconstb;
510 unsigned num_lconstb;
511
512 boolean indirect_const_access;
513 boolean failure;
514
515 struct nine_vs_output_info output_info[16];
516 int num_outputs;
517
518 struct nine_shader_info *info;
519
520 int16_t op_info_map[D3DSIO_BREAKP + 1];
521 };
522
523 #define IS_VS (tx->processor == PIPE_SHADER_VERTEX)
524 #define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT)
525
526 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
527
528 static void
529 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
530
531 static void
sm1_instruction_check(const struct sm1_instruction * insn)532 sm1_instruction_check(const struct sm1_instruction *insn)
533 {
534 if (insn->opcode == D3DSIO_CRS)
535 {
536 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
537 {
538 DBG("CRS.mask.w\n");
539 }
540 }
541 }
542
543 static void
nine_record_outputs(struct shader_translator * tx,BYTE Usage,BYTE UsageIndex,int mask,int output_index)544 nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex,
545 int mask, int output_index)
546 {
547 tx->output_info[tx->num_outputs].output_semantic = Usage;
548 tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex;
549 tx->output_info[tx->num_outputs].mask = mask;
550 tx->output_info[tx->num_outputs].output_index = output_index;
551 tx->num_outputs++;
552 }
553
554 static boolean
tx_lconstf(struct shader_translator * tx,struct ureg_src * src,INT index)555 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
556 {
557 INT i;
558
559 if (index < 0 || index >= tx->num_constf_allowed) {
560 tx->failure = TRUE;
561 return FALSE;
562 }
563 for (i = 0; i < tx->num_lconstf; ++i) {
564 if (tx->lconstf[i].idx == index) {
565 *src = tx->lconstf[i].reg;
566 return TRUE;
567 }
568 }
569 return FALSE;
570 }
571 static boolean
tx_lconsti(struct shader_translator * tx,struct ureg_src * src,INT index)572 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
573 {
574 int i;
575
576 if (index < 0 || index >= tx->num_consti_allowed) {
577 tx->failure = TRUE;
578 return FALSE;
579 }
580 for (i = 0; i < tx->num_lconsti; ++i) {
581 if (tx->lconsti[i].idx == index) {
582 *src = tx->lconsti[i].reg;
583 return TRUE;
584 }
585 }
586 return FALSE;
587 }
588 static boolean
tx_lconstb(struct shader_translator * tx,struct ureg_src * src,INT index)589 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
590 {
591 int i;
592
593 if (index < 0 || index >= tx->num_constb_allowed) {
594 tx->failure = TRUE;
595 return FALSE;
596 }
597 for (i = 0; i < tx->num_lconstb; ++i) {
598 if (tx->lconstb[i].idx == index) {
599 *src = tx->lconstb[i].reg;
600 return TRUE;
601 }
602 }
603 return FALSE;
604 }
605
606 static void
tx_set_lconstf(struct shader_translator * tx,INT index,float f[4])607 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
608 {
609 unsigned n;
610
611 FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
612
613 for (n = 0; n < tx->num_lconstf; ++n)
614 if (tx->lconstf[n].idx == index)
615 break;
616 if (n == tx->num_lconstf) {
617 if ((n % 8) == 0) {
618 tx->lconstf = REALLOC(tx->lconstf,
619 (n + 0) * sizeof(tx->lconstf[0]),
620 (n + 8) * sizeof(tx->lconstf[0]));
621 assert(tx->lconstf);
622 }
623 tx->num_lconstf++;
624 }
625 tx->lconstf[n].idx = index;
626 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
627
628 memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f));
629 }
630 static void
tx_set_lconsti(struct shader_translator * tx,INT index,int i[4])631 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
632 {
633 unsigned n;
634
635 FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
636
637 for (n = 0; n < tx->num_lconsti; ++n)
638 if (tx->lconsti[n].idx == index)
639 break;
640 if (n == tx->num_lconsti) {
641 if ((n % 8) == 0) {
642 tx->lconsti = REALLOC(tx->lconsti,
643 (n + 0) * sizeof(tx->lconsti[0]),
644 (n + 8) * sizeof(tx->lconsti[0]));
645 assert(tx->lconsti);
646 }
647 tx->num_lconsti++;
648 }
649
650 tx->lconsti[n].idx = index;
651 tx->lconsti[n].reg = tx->native_integers ?
652 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
653 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
654 }
655 static void
tx_set_lconstb(struct shader_translator * tx,INT index,BOOL b)656 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
657 {
658 unsigned n;
659
660 FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
661
662 for (n = 0; n < tx->num_lconstb; ++n)
663 if (tx->lconstb[n].idx == index)
664 break;
665 if (n == tx->num_lconstb) {
666 if ((n % 8) == 0) {
667 tx->lconstb = REALLOC(tx->lconstb,
668 (n + 0) * sizeof(tx->lconstb[0]),
669 (n + 8) * sizeof(tx->lconstb[0]));
670 assert(tx->lconstb);
671 }
672 tx->num_lconstb++;
673 }
674
675 tx->lconstb[n].idx = index;
676 tx->lconstb[n].reg = tx->native_integers ?
677 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
678 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
679 }
680
681 static inline struct ureg_dst
tx_scratch(struct shader_translator * tx)682 tx_scratch(struct shader_translator *tx)
683 {
684 if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) {
685 tx->failure = TRUE;
686 return tx->regs.t[0];
687 }
688 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
689 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
690 return tx->regs.t[tx->num_scratch++];
691 }
692
693 static inline struct ureg_dst
tx_scratch_scalar(struct shader_translator * tx)694 tx_scratch_scalar(struct shader_translator *tx)
695 {
696 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
697 }
698
699 static inline struct ureg_src
tx_src_scalar(struct ureg_dst dst)700 tx_src_scalar(struct ureg_dst dst)
701 {
702 struct ureg_src src = ureg_src(dst);
703 int c = ffs(dst.WriteMask) - 1;
704 if (dst.WriteMask == (1 << c))
705 src = ureg_scalar(src, c);
706 return src;
707 }
708
709 static inline void
tx_temp_alloc(struct shader_translator * tx,INT idx)710 tx_temp_alloc(struct shader_translator *tx, INT idx)
711 {
712 assert(idx >= 0);
713 if (idx >= tx->num_temp) {
714 unsigned k = tx->num_temp;
715 unsigned n = idx + 1;
716 tx->regs.r = REALLOC(tx->regs.r,
717 k * sizeof(tx->regs.r[0]),
718 n * sizeof(tx->regs.r[0]));
719 for (; k < n; ++k)
720 tx->regs.r[k] = ureg_dst_undef();
721 tx->num_temp = n;
722 }
723 if (ureg_dst_is_undef(tx->regs.r[idx]))
724 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
725 }
726
727 static inline void
tx_addr_alloc(struct shader_translator * tx,INT idx)728 tx_addr_alloc(struct shader_translator *tx, INT idx)
729 {
730 assert(idx == 0);
731 if (ureg_dst_is_undef(tx->regs.address))
732 tx->regs.address = ureg_DECL_address(tx->ureg);
733 if (ureg_dst_is_undef(tx->regs.a0))
734 tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
735 }
736
737 /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
738 * the projection should be applied on the texture. It doesn't
739 * apply on texkill.
740 * The doc is very imprecise here (it says the projection is done
741 * before rasterization, thus in vs, which seems wrong since ps instructions
742 * are affected differently)
743 * For now we only apply to the ps TEX instruction and TEXBEM.
744 * Perhaps some other instructions would need it */
745 static inline void
apply_ps1x_projection(struct shader_translator * tx,struct ureg_dst dst,struct ureg_src src,INT idx)746 apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
747 struct ureg_src src, INT idx)
748 {
749 struct ureg_dst tmp;
750 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
751
752 /* no projection */
753 if (dim == 1) {
754 ureg_MOV(tx->ureg, dst, src);
755 } else {
756 tmp = tx_scratch_scalar(tx);
757 ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
758 ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
759 }
760 }
761
762 static inline void
TEX_with_ps1x_projection(struct shader_translator * tx,struct ureg_dst dst,unsigned target,struct ureg_src src0,struct ureg_src src1,INT idx)763 TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
764 unsigned target, struct ureg_src src0,
765 struct ureg_src src1, INT idx)
766 {
767 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
768 struct ureg_dst tmp;
769
770 /* dim == 1: no projection
771 * Looks like must be disabled when it makes no
772 * sense according the texture dimensions
773 */
774 if (dim == 1 || dim <= target) {
775 ureg_TEX(tx->ureg, dst, target, src0, src1);
776 } else if (dim == 4) {
777 ureg_TXP(tx->ureg, dst, target, src0, src1);
778 } else {
779 tmp = tx_scratch(tx);
780 apply_ps1x_projection(tx, tmp, src0, idx);
781 ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
782 }
783 }
784
785 static inline void
tx_texcoord_alloc(struct shader_translator * tx,INT idx)786 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
787 {
788 assert(IS_PS);
789 assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT));
790 if (ureg_src_is_undef(tx->regs.vT[idx]))
791 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
792 TGSI_INTERPOLATE_PERSPECTIVE);
793 }
794
795 static inline unsigned *
tx_bgnloop(struct shader_translator * tx)796 tx_bgnloop(struct shader_translator *tx)
797 {
798 tx->loop_depth++;
799 if (tx->loop_depth_max < tx->loop_depth)
800 tx->loop_depth_max = tx->loop_depth;
801 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
802 return &tx->loop_labels[tx->loop_depth - 1];
803 }
804
805 static inline unsigned *
tx_endloop(struct shader_translator * tx)806 tx_endloop(struct shader_translator *tx)
807 {
808 assert(tx->loop_depth);
809 tx->loop_depth--;
810 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
811 ureg_get_instruction_number(tx->ureg));
812 return &tx->loop_labels[tx->loop_depth];
813 }
814
815 static struct ureg_dst
tx_get_loopctr(struct shader_translator * tx,boolean loop_or_rep)816 tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
817 {
818 const unsigned l = tx->loop_depth - 1;
819
820 if (!tx->loop_depth)
821 {
822 DBG("loop counter requested outside of loop\n");
823 return ureg_dst_undef();
824 }
825
826 if (ureg_dst_is_undef(tx->regs.rL[l])) {
827 /* loop or rep ctr creation */
828 tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
829 tx->loop_or_rep[l] = loop_or_rep;
830 }
831 /* loop - rep - endloop - endrep not allowed */
832 assert(tx->loop_or_rep[l] == loop_or_rep);
833
834 return tx->regs.rL[l];
835 }
836
837 static struct ureg_src
tx_get_loopal(struct shader_translator * tx)838 tx_get_loopal(struct shader_translator *tx)
839 {
840 int loop_level = tx->loop_depth - 1;
841
842 while (loop_level >= 0) {
843 /* handle loop - rep - endrep - endloop case */
844 if (tx->loop_or_rep[loop_level])
845 /* the value is in the loop counter y component (nine implementation) */
846 return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
847 loop_level--;
848 }
849
850 DBG("aL counter requested outside of loop\n");
851 return ureg_src_undef();
852 }
853
854 static inline unsigned *
tx_cond(struct shader_translator * tx)855 tx_cond(struct shader_translator *tx)
856 {
857 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
858 tx->cond_depth++;
859 return &tx->cond_labels[tx->cond_depth - 1];
860 }
861
862 static inline unsigned *
tx_elsecond(struct shader_translator * tx)863 tx_elsecond(struct shader_translator *tx)
864 {
865 assert(tx->cond_depth);
866 return &tx->cond_labels[tx->cond_depth - 1];
867 }
868
869 static inline void
tx_endcond(struct shader_translator * tx)870 tx_endcond(struct shader_translator *tx)
871 {
872 assert(tx->cond_depth);
873 tx->cond_depth--;
874 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
875 ureg_get_instruction_number(tx->ureg));
876 }
877
878 static inline struct ureg_dst
nine_ureg_dst_register(unsigned file,int index)879 nine_ureg_dst_register(unsigned file, int index)
880 {
881 return ureg_dst(ureg_src_register(file, index));
882 }
883
884 static inline struct ureg_src
nine_get_position_input(struct shader_translator * tx)885 nine_get_position_input(struct shader_translator *tx)
886 {
887 struct ureg_program *ureg = tx->ureg;
888
889 if (tx->wpos_is_sysval)
890 return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
891 else
892 return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION,
893 0, TGSI_INTERPOLATE_LINEAR);
894 }
895
896 static struct ureg_src
tx_src_param(struct shader_translator * tx,const struct sm1_src_param * param)897 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
898 {
899 struct ureg_program *ureg = tx->ureg;
900 struct ureg_src src;
901 struct ureg_dst tmp;
902
903 switch (param->file)
904 {
905 case D3DSPR_TEMP:
906 assert(!param->rel);
907 tx_temp_alloc(tx, param->idx);
908 src = ureg_src(tx->regs.r[param->idx]);
909 break;
910 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
911 case D3DSPR_ADDR:
912 assert(!param->rel);
913 if (IS_VS) {
914 assert(param->idx == 0);
915 /* the address register (vs only) must be
916 * assigned before use */
917 assert(!ureg_dst_is_undef(tx->regs.a0));
918 /* Round to lowest for vs1.1 (contrary to the doc), else
919 * round to nearest */
920 if (tx->version.major < 2 && tx->version.minor < 2)
921 ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
922 else
923 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
924 src = ureg_src(tx->regs.address);
925 } else {
926 if (tx->version.major < 2 && tx->version.minor < 4) {
927 /* no subroutines, so should be defined */
928 src = ureg_src(tx->regs.tS[param->idx]);
929 } else {
930 tx_texcoord_alloc(tx, param->idx);
931 src = tx->regs.vT[param->idx];
932 }
933 }
934 break;
935 case D3DSPR_INPUT:
936 if (IS_VS) {
937 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
938 } else {
939 if (tx->version.major < 3) {
940 assert(!param->rel);
941 src = ureg_DECL_fs_input_cyl_centroid(
942 ureg, TGSI_SEMANTIC_COLOR, param->idx,
943 TGSI_INTERPOLATE_COLOR, 0,
944 tx->info->force_color_in_centroid ?
945 TGSI_INTERPOLATE_LOC_CENTROID : 0,
946 0, 1);
947 } else {
948 if(param->rel) {
949 /* Copy all inputs (non consecutive)
950 * to temp array (consecutive).
951 * This is not good for performance.
952 * A better way would be to have inputs
953 * consecutive (would need implement alternative
954 * way to match vs outputs and ps inputs).
955 * However even with the better way, the temp array
956 * copy would need to be used if some inputs
957 * are not GENERIC or if they have different
958 * interpolation flag. */
959 if (ureg_src_is_undef(tx->regs.v_consecutive)) {
960 int i;
961 tx->regs.v_consecutive = ureg_src(ureg_DECL_array_temporary(ureg, 10, 0));
962 for (i = 0; i < 10; i++) {
963 if (!ureg_src_is_undef(tx->regs.v[i]))
964 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), tx->regs.v[i]);
965 else
966 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
967 }
968 }
969 src = ureg_src_array_offset(tx->regs.v_consecutive, param->idx);
970 } else {
971 assert(param->idx < ARRAY_SIZE(tx->regs.v));
972 src = tx->regs.v[param->idx];
973 }
974 }
975 }
976 break;
977 case D3DSPR_PREDICATE:
978 assert(!"D3DSPR_PREDICATE");
979 break;
980 case D3DSPR_SAMPLER:
981 assert(param->mod == NINED3DSPSM_NONE);
982 assert(param->swizzle == NINED3DSP_NOSWIZZLE);
983 assert(!param->rel);
984 src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx);
985 break;
986 case D3DSPR_CONST:
987 assert(!param->rel || IS_VS);
988 if (param->rel)
989 tx->indirect_const_access = TRUE;
990 if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
991 if (!param->rel)
992 nine_info_mark_const_f_used(tx->info, param->idx);
993 /* vswp constant handling: we use two buffers
994 * to fit all the float constants. The special handling
995 * doesn't need to be elsewhere, because all the instructions
996 * accessing the constants directly are VS1, and swvp
997 * is VS >= 2 */
998 if (IS_VS && tx->info->swvp_on) {
999 if (!param->rel) {
1000 if (param->idx < 4096) {
1001 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
1002 src = ureg_src_dimension(src, 0);
1003 } else {
1004 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx - 4096);
1005 src = ureg_src_dimension(src, 1);
1006 }
1007 } else {
1008 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx); /* TODO: swvp rel > 4096 */
1009 src = ureg_src_dimension(src, 0);
1010 }
1011 } else
1012 src = NINE_CONSTANT_SRC(param->idx);
1013 }
1014 if (!IS_VS && tx->version.major < 2) {
1015 /* ps 1.X clamps constants */
1016 tmp = tx_scratch(tx);
1017 ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
1018 ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
1019 src = ureg_src(tmp);
1020 }
1021 break;
1022 case D3DSPR_CONST2:
1023 case D3DSPR_CONST3:
1024 case D3DSPR_CONST4:
1025 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
1026 assert(!"CONST2/3/4");
1027 src = ureg_imm1f(ureg, 0.0f);
1028 break;
1029 case D3DSPR_CONSTINT:
1030 /* relative adressing only possible for float constants in vs */
1031 assert(!param->rel);
1032 if (!tx_lconsti(tx, &src, param->idx)) {
1033 nine_info_mark_const_i_used(tx->info, param->idx);
1034 if (IS_VS && tx->info->swvp_on) {
1035 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
1036 src = ureg_src_dimension(src, 2);
1037 } else
1038 src = NINE_CONSTANT_SRC(tx->info->const_i_base + param->idx);
1039 }
1040 break;
1041 case D3DSPR_CONSTBOOL:
1042 assert(!param->rel);
1043 if (!tx_lconstb(tx, &src, param->idx)) {
1044 char r = param->idx / 4;
1045 char s = param->idx & 3;
1046 nine_info_mark_const_b_used(tx->info, param->idx);
1047 if (IS_VS && tx->info->swvp_on) {
1048 src = ureg_src_register(TGSI_FILE_CONSTANT, r);
1049 src = ureg_src_dimension(src, 3);
1050 } else
1051 src = NINE_CONSTANT_SRC(tx->info->const_b_base + r);
1052 src = ureg_swizzle(src, s, s, s, s);
1053 }
1054 break;
1055 case D3DSPR_LOOP:
1056 if (ureg_dst_is_undef(tx->regs.address))
1057 tx->regs.address = ureg_DECL_address(ureg);
1058 if (!tx->native_integers)
1059 ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
1060 else
1061 ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
1062 src = ureg_src(tx->regs.address);
1063 break;
1064 case D3DSPR_MISCTYPE:
1065 switch (param->idx) {
1066 case D3DSMO_POSITION:
1067 if (ureg_src_is_undef(tx->regs.vPos))
1068 tx->regs.vPos = nine_get_position_input(tx);
1069 if (tx->shift_wpos) {
1070 /* TODO: do this only once */
1071 struct ureg_dst wpos = tx_scratch(tx);
1072 ureg_ADD(ureg, wpos, tx->regs.vPos,
1073 ureg_imm4f(ureg, -0.5f, -0.5f, 0.0f, 0.0f));
1074 src = ureg_src(wpos);
1075 } else {
1076 src = tx->regs.vPos;
1077 }
1078 break;
1079 case D3DSMO_FACE:
1080 if (ureg_src_is_undef(tx->regs.vFace)) {
1081 if (tx->face_is_sysval_integer) {
1082 tmp = ureg_DECL_temporary(ureg);
1083 tx->regs.vFace =
1084 ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
1085
1086 /* convert bool to float */
1087 ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X),
1088 ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1));
1089 tx->regs.vFace = ureg_src(tmp);
1090 } else {
1091 tx->regs.vFace = ureg_DECL_fs_input(ureg,
1092 TGSI_SEMANTIC_FACE, 0,
1093 TGSI_INTERPOLATE_CONSTANT);
1094 }
1095 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
1096 }
1097 src = tx->regs.vFace;
1098 break;
1099 default:
1100 assert(!"invalid src D3DSMO");
1101 break;
1102 }
1103 assert(!param->rel);
1104 break;
1105 case D3DSPR_TEMPFLOAT16:
1106 break;
1107 default:
1108 assert(!"invalid src D3DSPR");
1109 }
1110 if (param->rel)
1111 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1112
1113 switch (param->mod) {
1114 case NINED3DSPSM_DW:
1115 tmp = tx_scratch(tx);
1116 /* NOTE: app is not allowed to read w with this modifier */
1117 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), ureg_scalar(src, TGSI_SWIZZLE_W));
1118 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
1119 src = ureg_src(tmp);
1120 break;
1121 case NINED3DSPSM_DZ:
1122 tmp = tx_scratch(tx);
1123 /* NOTE: app is not allowed to read z with this modifier */
1124 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), ureg_scalar(src, TGSI_SWIZZLE_Z));
1125 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
1126 src = ureg_src(tmp);
1127 break;
1128 default:
1129 break;
1130 }
1131
1132 if (param->swizzle != NINED3DSP_NOSWIZZLE)
1133 src = ureg_swizzle(src,
1134 (param->swizzle >> 0) & 0x3,
1135 (param->swizzle >> 2) & 0x3,
1136 (param->swizzle >> 4) & 0x3,
1137 (param->swizzle >> 6) & 0x3);
1138
1139 switch (param->mod) {
1140 case NINED3DSPSM_ABS:
1141 src = ureg_abs(src);
1142 break;
1143 case NINED3DSPSM_ABSNEG:
1144 src = ureg_negate(ureg_abs(src));
1145 break;
1146 case NINED3DSPSM_NEG:
1147 src = ureg_negate(src);
1148 break;
1149 case NINED3DSPSM_BIAS:
1150 tmp = tx_scratch(tx);
1151 ureg_ADD(ureg, tmp, src, ureg_imm1f(ureg, -0.5f));
1152 src = ureg_src(tmp);
1153 break;
1154 case NINED3DSPSM_BIASNEG:
1155 tmp = tx_scratch(tx);
1156 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 0.5f), ureg_negate(src));
1157 src = ureg_src(tmp);
1158 break;
1159 case NINED3DSPSM_NOT:
1160 if (tx->native_integers) {
1161 tmp = tx_scratch(tx);
1162 ureg_NOT(ureg, tmp, src);
1163 src = ureg_src(tmp);
1164 break;
1165 }
1166 /* fall through */
1167 case NINED3DSPSM_COMP:
1168 tmp = tx_scratch(tx);
1169 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
1170 src = ureg_src(tmp);
1171 break;
1172 case NINED3DSPSM_DZ:
1173 case NINED3DSPSM_DW:
1174 /* Already handled*/
1175 break;
1176 case NINED3DSPSM_SIGN:
1177 tmp = tx_scratch(tx);
1178 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1179 src = ureg_src(tmp);
1180 break;
1181 case NINED3DSPSM_SIGNNEG:
1182 tmp = tx_scratch(tx);
1183 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
1184 src = ureg_src(tmp);
1185 break;
1186 case NINED3DSPSM_X2:
1187 tmp = tx_scratch(tx);
1188 ureg_ADD(ureg, tmp, src, src);
1189 src = ureg_src(tmp);
1190 break;
1191 case NINED3DSPSM_X2NEG:
1192 tmp = tx_scratch(tx);
1193 ureg_ADD(ureg, tmp, src, src);
1194 src = ureg_negate(ureg_src(tmp));
1195 break;
1196 default:
1197 assert(param->mod == NINED3DSPSM_NONE);
1198 break;
1199 }
1200
1201 return src;
1202 }
1203
1204 static struct ureg_dst
_tx_dst_param(struct shader_translator * tx,const struct sm1_dst_param * param)1205 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1206 {
1207 struct ureg_dst dst;
1208
1209 switch (param->file)
1210 {
1211 case D3DSPR_TEMP:
1212 assert(!param->rel);
1213 tx_temp_alloc(tx, param->idx);
1214 dst = tx->regs.r[param->idx];
1215 break;
1216 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1217 case D3DSPR_ADDR:
1218 assert(!param->rel);
1219 if (tx->version.major < 2 && !IS_VS) {
1220 if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1221 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1222 dst = tx->regs.tS[param->idx];
1223 } else
1224 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1225 tx_texcoord_alloc(tx, param->idx);
1226 dst = ureg_dst(tx->regs.vT[param->idx]);
1227 } else {
1228 tx_addr_alloc(tx, param->idx);
1229 dst = tx->regs.a0;
1230 }
1231 break;
1232 case D3DSPR_RASTOUT:
1233 assert(!param->rel);
1234 switch (param->idx) {
1235 case 0:
1236 if (ureg_dst_is_undef(tx->regs.oPos))
1237 tx->regs.oPos =
1238 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1239 dst = tx->regs.oPos;
1240 break;
1241 case 1:
1242 if (ureg_dst_is_undef(tx->regs.oFog))
1243 tx->regs.oFog =
1244 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0));
1245 dst = tx->regs.oFog;
1246 break;
1247 case 2:
1248 if (ureg_dst_is_undef(tx->regs.oPts))
1249 tx->regs.oPts = ureg_DECL_temporary(tx->ureg);
1250 dst = tx->regs.oPts;
1251 break;
1252 default:
1253 assert(0);
1254 break;
1255 }
1256 break;
1257 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1258 case D3DSPR_OUTPUT:
1259 if (tx->version.major < 3) {
1260 assert(!param->rel);
1261 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1262 } else {
1263 assert(!param->rel); /* TODO */
1264 assert(param->idx < ARRAY_SIZE(tx->regs.o));
1265 dst = tx->regs.o[param->idx];
1266 }
1267 break;
1268 case D3DSPR_ATTROUT: /* VS */
1269 case D3DSPR_COLOROUT: /* PS */
1270 assert(param->idx >= 0 && param->idx < 4);
1271 assert(!param->rel);
1272 tx->info->rt_mask |= 1 << param->idx;
1273 if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
1274 /* ps < 3: oCol[0] will have fog blending afterward */
1275 if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
1276 tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
1277 } else {
1278 tx->regs.oCol[param->idx] =
1279 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1280 }
1281 }
1282 dst = tx->regs.oCol[param->idx];
1283 if (IS_VS && tx->version.major < 3)
1284 dst = ureg_saturate(dst);
1285 break;
1286 case D3DSPR_DEPTHOUT:
1287 assert(!param->rel);
1288 if (ureg_dst_is_undef(tx->regs.oDepth))
1289 tx->regs.oDepth =
1290 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1291 TGSI_WRITEMASK_Z, 0, 1);
1292 dst = tx->regs.oDepth; /* XXX: must write .z component */
1293 break;
1294 case D3DSPR_PREDICATE:
1295 assert(!"D3DSPR_PREDICATE");
1296 break;
1297 case D3DSPR_TEMPFLOAT16:
1298 DBG("unhandled D3DSPR: %u\n", param->file);
1299 break;
1300 default:
1301 assert(!"invalid dst D3DSPR");
1302 break;
1303 }
1304 if (param->rel)
1305 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1306
1307 if (param->mask != NINED3DSP_WRITEMASK_ALL)
1308 dst = ureg_writemask(dst, param->mask);
1309 if (param->mod & NINED3DSPDM_SATURATE)
1310 dst = ureg_saturate(dst);
1311
1312 return dst;
1313 }
1314
1315 static struct ureg_dst
tx_dst_param(struct shader_translator * tx,const struct sm1_dst_param * param)1316 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1317 {
1318 if (param->shift) {
1319 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1320 return tx->regs.tdst;
1321 }
1322 return _tx_dst_param(tx, param);
1323 }
1324
1325 static void
tx_apply_dst0_modifiers(struct shader_translator * tx)1326 tx_apply_dst0_modifiers(struct shader_translator *tx)
1327 {
1328 struct ureg_dst rdst;
1329 float f;
1330
1331 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1332 return;
1333 rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1334
1335 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1336
1337 if (tx->insn.dst[0].shift < 0)
1338 f = 1.0f / (1 << -tx->insn.dst[0].shift);
1339 else
1340 f = 1 << tx->insn.dst[0].shift;
1341
1342 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1343 }
1344
1345 static struct ureg_src
tx_dst_param_as_src(struct shader_translator * tx,const struct sm1_dst_param * param)1346 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1347 {
1348 struct ureg_src src;
1349
1350 assert(!param->shift);
1351 assert(!(param->mod & NINED3DSPDM_SATURATE));
1352
1353 switch (param->file) {
1354 case D3DSPR_INPUT:
1355 if (IS_VS) {
1356 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1357 } else {
1358 assert(!param->rel);
1359 assert(param->idx < ARRAY_SIZE(tx->regs.v));
1360 src = tx->regs.v[param->idx];
1361 }
1362 break;
1363 default:
1364 src = ureg_src(tx_dst_param(tx, param));
1365 break;
1366 }
1367 if (param->rel)
1368 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1369
1370 if (!param->mask)
1371 WARN("mask is 0, using identity swizzle\n");
1372
1373 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1374 char s[4];
1375 int n;
1376 int c;
1377 for (n = 0, c = 0; c < 4; ++c)
1378 if (param->mask & (1 << c))
1379 s[n++] = c;
1380 assert(n);
1381 for (c = n; c < 4; ++c)
1382 s[c] = s[n - 1];
1383 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1384 }
1385 return src;
1386 }
1387
1388 static HRESULT
NineTranslateInstruction_Mkxn(struct shader_translator * tx,const unsigned k,const unsigned n)1389 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1390 {
1391 struct ureg_program *ureg = tx->ureg;
1392 struct ureg_dst dst;
1393 struct ureg_src src[2];
1394 struct sm1_src_param *src_mat = &tx->insn.src[1];
1395 unsigned i;
1396
1397 dst = tx_dst_param(tx, &tx->insn.dst[0]);
1398 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1399
1400 for (i = 0; i < n; i++)
1401 {
1402 const unsigned m = (1 << i);
1403
1404 src[1] = tx_src_param(tx, src_mat);
1405 src_mat->idx++;
1406
1407 if (!(dst.WriteMask & m))
1408 continue;
1409
1410 /* XXX: src == dst case ? */
1411
1412 switch (k) {
1413 case 3:
1414 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1415 break;
1416 case 4:
1417 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1418 break;
1419 default:
1420 DBG("invalid operation: M%ux%u\n", m, n);
1421 break;
1422 }
1423 }
1424
1425 return D3D_OK;
1426 }
1427
1428 #define VNOTSUPPORTED 0, 0
1429 #define V(maj, min) (((maj) << 8) | (min))
1430
1431 static inline const char *
d3dsio_to_string(unsigned opcode)1432 d3dsio_to_string( unsigned opcode )
1433 {
1434 static const char *names[] = {
1435 "NOP",
1436 "MOV",
1437 "ADD",
1438 "SUB",
1439 "MAD",
1440 "MUL",
1441 "RCP",
1442 "RSQ",
1443 "DP3",
1444 "DP4",
1445 "MIN",
1446 "MAX",
1447 "SLT",
1448 "SGE",
1449 "EXP",
1450 "LOG",
1451 "LIT",
1452 "DST",
1453 "LRP",
1454 "FRC",
1455 "M4x4",
1456 "M4x3",
1457 "M3x4",
1458 "M3x3",
1459 "M3x2",
1460 "CALL",
1461 "CALLNZ",
1462 "LOOP",
1463 "RET",
1464 "ENDLOOP",
1465 "LABEL",
1466 "DCL",
1467 "POW",
1468 "CRS",
1469 "SGN",
1470 "ABS",
1471 "NRM",
1472 "SINCOS",
1473 "REP",
1474 "ENDREP",
1475 "IF",
1476 "IFC",
1477 "ELSE",
1478 "ENDIF",
1479 "BREAK",
1480 "BREAKC",
1481 "MOVA",
1482 "DEFB",
1483 "DEFI",
1484 NULL,
1485 NULL,
1486 NULL,
1487 NULL,
1488 NULL,
1489 NULL,
1490 NULL,
1491 NULL,
1492 NULL,
1493 NULL,
1494 NULL,
1495 NULL,
1496 NULL,
1497 NULL,
1498 NULL,
1499 "TEXCOORD",
1500 "TEXKILL",
1501 "TEX",
1502 "TEXBEM",
1503 "TEXBEML",
1504 "TEXREG2AR",
1505 "TEXREG2GB",
1506 "TEXM3x2PAD",
1507 "TEXM3x2TEX",
1508 "TEXM3x3PAD",
1509 "TEXM3x3TEX",
1510 NULL,
1511 "TEXM3x3SPEC",
1512 "TEXM3x3VSPEC",
1513 "EXPP",
1514 "LOGP",
1515 "CND",
1516 "DEF",
1517 "TEXREG2RGB",
1518 "TEXDP3TEX",
1519 "TEXM3x2DEPTH",
1520 "TEXDP3",
1521 "TEXM3x3",
1522 "TEXDEPTH",
1523 "CMP",
1524 "BEM",
1525 "DP2ADD",
1526 "DSX",
1527 "DSY",
1528 "TEXLDD",
1529 "SETP",
1530 "TEXLDL",
1531 "BREAKP"
1532 };
1533
1534 if (opcode < ARRAY_SIZE(names)) return names[opcode];
1535
1536 switch (opcode) {
1537 case D3DSIO_PHASE: return "PHASE";
1538 case D3DSIO_COMMENT: return "COMMENT";
1539 case D3DSIO_END: return "END";
1540 default:
1541 return NULL;
1542 }
1543 }
1544
1545 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1546 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1547 (inst).vert_version.max | \
1548 (inst).frag_version.min | \
1549 (inst).frag_version.max)
1550
1551 #define SPECIAL(name) \
1552 NineTranslateInstruction_##name
1553
1554 #define DECL_SPECIAL(name) \
1555 static HRESULT \
1556 NineTranslateInstruction_##name( struct shader_translator *tx )
1557
1558 static HRESULT
1559 NineTranslateInstruction_Generic(struct shader_translator *);
1560
DECL_SPECIAL(NOP)1561 DECL_SPECIAL(NOP)
1562 {
1563 /* Nothing to do. NOP was used to avoid hangs
1564 * with very old d3d drivers. */
1565 return D3D_OK;
1566 }
1567
DECL_SPECIAL(SUB)1568 DECL_SPECIAL(SUB)
1569 {
1570 struct ureg_program *ureg = tx->ureg;
1571 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1572 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1573 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1574
1575 ureg_ADD(ureg, dst, src0, ureg_negate(src1));
1576 return D3D_OK;
1577 }
1578
DECL_SPECIAL(ABS)1579 DECL_SPECIAL(ABS)
1580 {
1581 struct ureg_program *ureg = tx->ureg;
1582 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1583 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1584
1585 ureg_MOV(ureg, dst, ureg_abs(src));
1586 return D3D_OK;
1587 }
1588
DECL_SPECIAL(XPD)1589 DECL_SPECIAL(XPD)
1590 {
1591 struct ureg_program *ureg = tx->ureg;
1592 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1593 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1594 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1595
1596 ureg_MUL(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
1597 ureg_swizzle(src0, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
1598 TGSI_SWIZZLE_X, 0),
1599 ureg_swizzle(src1, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
1600 TGSI_SWIZZLE_Y, 0));
1601 ureg_MAD(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
1602 ureg_swizzle(src0, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
1603 TGSI_SWIZZLE_Y, 0),
1604 ureg_negate(ureg_swizzle(src1, TGSI_SWIZZLE_Y,
1605 TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)),
1606 ureg_src(dst));
1607 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
1608 ureg_imm1f(ureg, 1));
1609 return D3D_OK;
1610 }
1611
DECL_SPECIAL(M4x4)1612 DECL_SPECIAL(M4x4)
1613 {
1614 return NineTranslateInstruction_Mkxn(tx, 4, 4);
1615 }
1616
DECL_SPECIAL(M4x3)1617 DECL_SPECIAL(M4x3)
1618 {
1619 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1620 }
1621
DECL_SPECIAL(M3x4)1622 DECL_SPECIAL(M3x4)
1623 {
1624 return NineTranslateInstruction_Mkxn(tx, 3, 4);
1625 }
1626
DECL_SPECIAL(M3x3)1627 DECL_SPECIAL(M3x3)
1628 {
1629 return NineTranslateInstruction_Mkxn(tx, 3, 3);
1630 }
1631
DECL_SPECIAL(M3x2)1632 DECL_SPECIAL(M3x2)
1633 {
1634 return NineTranslateInstruction_Mkxn(tx, 3, 2);
1635 }
1636
DECL_SPECIAL(CMP)1637 DECL_SPECIAL(CMP)
1638 {
1639 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1640 tx_src_param(tx, &tx->insn.src[0]),
1641 tx_src_param(tx, &tx->insn.src[2]),
1642 tx_src_param(tx, &tx->insn.src[1]));
1643 return D3D_OK;
1644 }
1645
DECL_SPECIAL(CND)1646 DECL_SPECIAL(CND)
1647 {
1648 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1649 struct ureg_dst cgt;
1650 struct ureg_src cnd;
1651
1652 /* the coissue flag was a tip for compilers to advise to
1653 * execute two operations at the same time, in cases
1654 * the two executions had same dst with different channels.
1655 * It has no effect on current hw. However it seems CND
1656 * is affected. The handling of this very specific case
1657 * handled below mimick wine behaviour */
1658 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1659 ureg_MOV(tx->ureg,
1660 dst, tx_src_param(tx, &tx->insn.src[1]));
1661 return D3D_OK;
1662 }
1663
1664 cnd = tx_src_param(tx, &tx->insn.src[0]);
1665 cgt = tx_scratch(tx);
1666
1667 if (tx->version.major == 1 && tx->version.minor < 4)
1668 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1669
1670 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1671
1672 ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1673 tx_src_param(tx, &tx->insn.src[1]),
1674 tx_src_param(tx, &tx->insn.src[2]));
1675 return D3D_OK;
1676 }
1677
DECL_SPECIAL(CALL)1678 DECL_SPECIAL(CALL)
1679 {
1680 assert(tx->insn.src[0].idx < tx->num_inst_labels);
1681 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1682 return D3D_OK;
1683 }
1684
DECL_SPECIAL(CALLNZ)1685 DECL_SPECIAL(CALLNZ)
1686 {
1687 struct ureg_program *ureg = tx->ureg;
1688 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1689
1690 if (!tx->native_integers)
1691 ureg_IF(ureg, src, tx_cond(tx));
1692 else
1693 ureg_UIF(ureg, src, tx_cond(tx));
1694 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1695 tx_endcond(tx);
1696 ureg_ENDIF(ureg);
1697 return D3D_OK;
1698 }
1699
DECL_SPECIAL(LOOP)1700 DECL_SPECIAL(LOOP)
1701 {
1702 struct ureg_program *ureg = tx->ureg;
1703 unsigned *label;
1704 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1705 struct ureg_dst ctr;
1706 struct ureg_dst tmp;
1707 struct ureg_src ctrx;
1708
1709 label = tx_bgnloop(tx);
1710 ctr = tx_get_loopctr(tx, TRUE);
1711 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1712
1713 /* src: num_iterations - start_value of al - step for al - 0 */
1714 ureg_MOV(ureg, ctr, src);
1715 ureg_BGNLOOP(tx->ureg, label);
1716 tmp = tx_scratch_scalar(tx);
1717 /* Initially ctr.x contains the number of iterations.
1718 * ctr.y will contain the updated value of al.
1719 * We decrease ctr.x at the end of every iteration,
1720 * and stop when it reaches 0. */
1721
1722 if (!tx->native_integers) {
1723 /* case src and ctr contain floats */
1724 /* to avoid precision issue, we stop when ctr <= 0.5 */
1725 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1726 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1727 } else {
1728 /* case src and ctr contain integers */
1729 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1730 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1731 }
1732 ureg_BRK(ureg);
1733 tx_endcond(tx);
1734 ureg_ENDIF(ureg);
1735 return D3D_OK;
1736 }
1737
DECL_SPECIAL(RET)1738 DECL_SPECIAL(RET)
1739 {
1740 ureg_RET(tx->ureg);
1741 return D3D_OK;
1742 }
1743
DECL_SPECIAL(ENDLOOP)1744 DECL_SPECIAL(ENDLOOP)
1745 {
1746 struct ureg_program *ureg = tx->ureg;
1747 struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
1748 struct ureg_dst dst_ctrx, dst_al;
1749 struct ureg_src src_ctr, al_counter;
1750
1751 dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1752 dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
1753 src_ctr = ureg_src(ctr);
1754 al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
1755
1756 /* ctr.x -= 1
1757 * ctr.y (aL) += step */
1758 if (!tx->native_integers) {
1759 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1760 ureg_ADD(ureg, dst_al, src_ctr, al_counter);
1761 } else {
1762 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1763 ureg_UADD(ureg, dst_al, src_ctr, al_counter);
1764 }
1765 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1766 return D3D_OK;
1767 }
1768
DECL_SPECIAL(LABEL)1769 DECL_SPECIAL(LABEL)
1770 {
1771 unsigned k = tx->num_inst_labels;
1772 unsigned n = tx->insn.src[0].idx;
1773 assert(n < 2048);
1774 if (n >= k)
1775 tx->inst_labels = REALLOC(tx->inst_labels,
1776 k * sizeof(tx->inst_labels[0]),
1777 n * sizeof(tx->inst_labels[0]));
1778
1779 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1780 return D3D_OK;
1781 }
1782
DECL_SPECIAL(SINCOS)1783 DECL_SPECIAL(SINCOS)
1784 {
1785 struct ureg_program *ureg = tx->ureg;
1786 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1787 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1788
1789 assert(!(dst.WriteMask & 0xc));
1790
1791 /* z undefined, w untouched */
1792 ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
1793 ureg_scalar(src, TGSI_SWIZZLE_X));
1794 ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
1795 ureg_scalar(src, TGSI_SWIZZLE_X));
1796 return D3D_OK;
1797 }
1798
DECL_SPECIAL(SGN)1799 DECL_SPECIAL(SGN)
1800 {
1801 ureg_SSG(tx->ureg,
1802 tx_dst_param(tx, &tx->insn.dst[0]),
1803 tx_src_param(tx, &tx->insn.src[0]));
1804 return D3D_OK;
1805 }
1806
DECL_SPECIAL(REP)1807 DECL_SPECIAL(REP)
1808 {
1809 struct ureg_program *ureg = tx->ureg;
1810 unsigned *label;
1811 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1812 struct ureg_dst ctr;
1813 struct ureg_dst tmp;
1814 struct ureg_src ctrx;
1815
1816 label = tx_bgnloop(tx);
1817 ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
1818 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1819
1820 /* NOTE: rep must be constant, so we don't have to save the count */
1821 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1822
1823 /* rep: num_iterations - 0 - 0 - 0 */
1824 ureg_MOV(ureg, ctr, rep);
1825 ureg_BGNLOOP(ureg, label);
1826 tmp = tx_scratch_scalar(tx);
1827 /* Initially ctr.x contains the number of iterations.
1828 * We decrease ctr.x at the end of every iteration,
1829 * and stop when it reaches 0. */
1830
1831 if (!tx->native_integers) {
1832 /* case src and ctr contain floats */
1833 /* to avoid precision issue, we stop when ctr <= 0.5 */
1834 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1835 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1836 } else {
1837 /* case src and ctr contain integers */
1838 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1839 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1840 }
1841 ureg_BRK(ureg);
1842 tx_endcond(tx);
1843 ureg_ENDIF(ureg);
1844
1845 return D3D_OK;
1846 }
1847
DECL_SPECIAL(ENDREP)1848 DECL_SPECIAL(ENDREP)
1849 {
1850 struct ureg_program *ureg = tx->ureg;
1851 struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
1852 struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1853 struct ureg_src src_ctr = ureg_src(ctr);
1854
1855 /* ctr.x -= 1 */
1856 if (!tx->native_integers)
1857 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1858 else
1859 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1860
1861 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1862 return D3D_OK;
1863 }
1864
DECL_SPECIAL(ENDIF)1865 DECL_SPECIAL(ENDIF)
1866 {
1867 tx_endcond(tx);
1868 ureg_ENDIF(tx->ureg);
1869 return D3D_OK;
1870 }
1871
DECL_SPECIAL(IF)1872 DECL_SPECIAL(IF)
1873 {
1874 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1875
1876 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1877 ureg_UIF(tx->ureg, src, tx_cond(tx));
1878 else
1879 ureg_IF(tx->ureg, src, tx_cond(tx));
1880
1881 return D3D_OK;
1882 }
1883
1884 static inline unsigned
sm1_insn_flags_to_tgsi_setop(BYTE flags)1885 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1886 {
1887 switch (flags) {
1888 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1889 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1890 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1891 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1892 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1893 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1894 default:
1895 assert(!"invalid comparison flags");
1896 return TGSI_OPCODE_SGT;
1897 }
1898 }
1899
DECL_SPECIAL(IFC)1900 DECL_SPECIAL(IFC)
1901 {
1902 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1903 struct ureg_src src[2];
1904 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1905 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1906 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1907 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
1908 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1909 return D3D_OK;
1910 }
1911
DECL_SPECIAL(ELSE)1912 DECL_SPECIAL(ELSE)
1913 {
1914 ureg_ELSE(tx->ureg, tx_elsecond(tx));
1915 return D3D_OK;
1916 }
1917
DECL_SPECIAL(BREAKC)1918 DECL_SPECIAL(BREAKC)
1919 {
1920 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1921 struct ureg_src src[2];
1922 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1923 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1924 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1925 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
1926 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1927 ureg_BRK(tx->ureg);
1928 tx_endcond(tx);
1929 ureg_ENDIF(tx->ureg);
1930 return D3D_OK;
1931 }
1932
1933 static const char *sm1_declusage_names[] =
1934 {
1935 [D3DDECLUSAGE_POSITION] = "POSITION",
1936 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
1937 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
1938 [D3DDECLUSAGE_NORMAL] = "NORMAL",
1939 [D3DDECLUSAGE_PSIZE] = "PSIZE",
1940 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
1941 [D3DDECLUSAGE_TANGENT] = "TANGENT",
1942 [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
1943 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
1944 [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
1945 [D3DDECLUSAGE_COLOR] = "COLOR",
1946 [D3DDECLUSAGE_FOG] = "FOG",
1947 [D3DDECLUSAGE_DEPTH] = "DEPTH",
1948 [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
1949 };
1950
1951 static inline unsigned
sm1_to_nine_declusage(struct sm1_semantic * dcl)1952 sm1_to_nine_declusage(struct sm1_semantic *dcl)
1953 {
1954 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
1955 }
1956
1957 static void
sm1_declusage_to_tgsi(struct tgsi_declaration_semantic * sem,boolean tc,struct sm1_semantic * dcl)1958 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
1959 boolean tc,
1960 struct sm1_semantic *dcl)
1961 {
1962 BYTE index = dcl->usage_idx;
1963
1964 /* For everything that is not matching to a TGSI_SEMANTIC_****,
1965 * we match to a TGSI_SEMANTIC_GENERIC with index.
1966 *
1967 * The index can be anything UINT16 and usage_idx is BYTE,
1968 * so we can fit everything. It doesn't matter if indices
1969 * are close together or low.
1970 *
1971 *
1972 * POSITION >= 1: 10 * index + 6
1973 * COLOR >= 2: 10 * (index-1) + 7
1974 * TEXCOORD[0..15]: index
1975 * BLENDWEIGHT: 10 * index + 18
1976 * BLENDINDICES: 10 * index + 19
1977 * NORMAL: 10 * index + 20
1978 * TANGENT: 10 * index + 21
1979 * BINORMAL: 10 * index + 22
1980 * TESSFACTOR: 10 * index + 23
1981 */
1982
1983 switch (dcl->usage) {
1984 case D3DDECLUSAGE_POSITION:
1985 case D3DDECLUSAGE_POSITIONT:
1986 case D3DDECLUSAGE_DEPTH:
1987 if (index == 0) {
1988 sem->Name = TGSI_SEMANTIC_POSITION;
1989 sem->Index = 0;
1990 } else {
1991 sem->Name = TGSI_SEMANTIC_GENERIC;
1992 sem->Index = 10 * index + 6;
1993 }
1994 break;
1995 case D3DDECLUSAGE_COLOR:
1996 if (index < 2) {
1997 sem->Name = TGSI_SEMANTIC_COLOR;
1998 sem->Index = index;
1999 } else {
2000 sem->Name = TGSI_SEMANTIC_GENERIC;
2001 sem->Index = 10 * (index-1) + 7;
2002 }
2003 break;
2004 case D3DDECLUSAGE_FOG:
2005 assert(index == 0);
2006 sem->Name = TGSI_SEMANTIC_FOG;
2007 sem->Index = 0;
2008 break;
2009 case D3DDECLUSAGE_PSIZE:
2010 assert(index == 0);
2011 sem->Name = TGSI_SEMANTIC_PSIZE;
2012 sem->Index = 0;
2013 break;
2014 case D3DDECLUSAGE_TEXCOORD:
2015 assert(index < 16);
2016 if (index < 8 && tc)
2017 sem->Name = TGSI_SEMANTIC_TEXCOORD;
2018 else
2019 sem->Name = TGSI_SEMANTIC_GENERIC;
2020 sem->Index = index;
2021 break;
2022 case D3DDECLUSAGE_BLENDWEIGHT:
2023 sem->Name = TGSI_SEMANTIC_GENERIC;
2024 sem->Index = 10 * index + 18;
2025 break;
2026 case D3DDECLUSAGE_BLENDINDICES:
2027 sem->Name = TGSI_SEMANTIC_GENERIC;
2028 sem->Index = 10 * index + 19;
2029 break;
2030 case D3DDECLUSAGE_NORMAL:
2031 sem->Name = TGSI_SEMANTIC_GENERIC;
2032 sem->Index = 10 * index + 20;
2033 break;
2034 case D3DDECLUSAGE_TANGENT:
2035 sem->Name = TGSI_SEMANTIC_GENERIC;
2036 sem->Index = 10 * index + 21;
2037 break;
2038 case D3DDECLUSAGE_BINORMAL:
2039 sem->Name = TGSI_SEMANTIC_GENERIC;
2040 sem->Index = 10 * index + 22;
2041 break;
2042 case D3DDECLUSAGE_TESSFACTOR:
2043 sem->Name = TGSI_SEMANTIC_GENERIC;
2044 sem->Index = 10 * index + 23;
2045 break;
2046 case D3DDECLUSAGE_SAMPLE:
2047 sem->Name = TGSI_SEMANTIC_COUNT;
2048 sem->Index = 0;
2049 break;
2050 default:
2051 unreachable("Invalid DECLUSAGE.");
2052 break;
2053 }
2054 }
2055
2056 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
2057 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
2058 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
2059 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
2060 static inline unsigned
d3dstt_to_tgsi_tex(BYTE sampler_type)2061 d3dstt_to_tgsi_tex(BYTE sampler_type)
2062 {
2063 switch (sampler_type) {
2064 case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
2065 case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
2066 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
2067 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
2068 default:
2069 assert(0);
2070 return TGSI_TEXTURE_UNKNOWN;
2071 }
2072 }
2073 static inline unsigned
d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)2074 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
2075 {
2076 switch (sampler_type) {
2077 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
2078 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
2079 case NINED3DSTT_VOLUME:
2080 case NINED3DSTT_CUBE:
2081 default:
2082 assert(0);
2083 return TGSI_TEXTURE_UNKNOWN;
2084 }
2085 }
2086 static inline unsigned
ps1x_sampler_type(const struct nine_shader_info * info,unsigned stage)2087 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
2088 {
2089 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
2090 case 1: return TGSI_TEXTURE_1D;
2091 case 0: return TGSI_TEXTURE_2D;
2092 case 3: return TGSI_TEXTURE_3D;
2093 default:
2094 return TGSI_TEXTURE_CUBE;
2095 }
2096 }
2097
2098 static const char *
sm1_sampler_type_name(BYTE sampler_type)2099 sm1_sampler_type_name(BYTE sampler_type)
2100 {
2101 switch (sampler_type) {
2102 case NINED3DSTT_1D: return "1D";
2103 case NINED3DSTT_2D: return "2D";
2104 case NINED3DSTT_VOLUME: return "VOLUME";
2105 case NINED3DSTT_CUBE: return "CUBE";
2106 default:
2107 return "(D3DSTT_?)";
2108 }
2109 }
2110
2111 static inline unsigned
nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic * sem)2112 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
2113 {
2114 switch (sem->Name) {
2115 case TGSI_SEMANTIC_POSITION:
2116 case TGSI_SEMANTIC_NORMAL:
2117 return TGSI_INTERPOLATE_LINEAR;
2118 case TGSI_SEMANTIC_BCOLOR:
2119 case TGSI_SEMANTIC_COLOR:
2120 return TGSI_INTERPOLATE_COLOR;
2121 case TGSI_SEMANTIC_FOG:
2122 case TGSI_SEMANTIC_GENERIC:
2123 case TGSI_SEMANTIC_TEXCOORD:
2124 case TGSI_SEMANTIC_CLIPDIST:
2125 case TGSI_SEMANTIC_CLIPVERTEX:
2126 return TGSI_INTERPOLATE_PERSPECTIVE;
2127 case TGSI_SEMANTIC_EDGEFLAG:
2128 case TGSI_SEMANTIC_FACE:
2129 case TGSI_SEMANTIC_INSTANCEID:
2130 case TGSI_SEMANTIC_PCOORD:
2131 case TGSI_SEMANTIC_PRIMID:
2132 case TGSI_SEMANTIC_PSIZE:
2133 case TGSI_SEMANTIC_VERTEXID:
2134 return TGSI_INTERPOLATE_CONSTANT;
2135 default:
2136 assert(0);
2137 return TGSI_INTERPOLATE_CONSTANT;
2138 }
2139 }
2140
DECL_SPECIAL(DCL)2141 DECL_SPECIAL(DCL)
2142 {
2143 struct ureg_program *ureg = tx->ureg;
2144 boolean is_input;
2145 boolean is_sampler;
2146 struct tgsi_declaration_semantic tgsi;
2147 struct sm1_semantic sem;
2148 sm1_read_semantic(tx, &sem);
2149
2150 is_input = sem.reg.file == D3DSPR_INPUT;
2151 is_sampler =
2152 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
2153
2154 DUMP("DCL ");
2155 sm1_dump_dst_param(&sem.reg);
2156 if (is_sampler)
2157 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
2158 else
2159 if (tx->version.major >= 3)
2160 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
2161 else
2162 if (sem.usage | sem.usage_idx)
2163 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
2164 else
2165 DUMP("\n");
2166
2167 if (is_sampler) {
2168 const unsigned m = 1 << sem.reg.idx;
2169 ureg_DECL_sampler(ureg, sem.reg.idx);
2170 tx->info->sampler_mask |= m;
2171 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
2172 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
2173 d3dstt_to_tgsi_tex(sem.sampler_type);
2174 return D3D_OK;
2175 }
2176
2177 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
2178 if (IS_VS) {
2179 if (is_input) {
2180 /* linkage outside of shader with vertex declaration */
2181 ureg_DECL_vs_input(ureg, sem.reg.idx);
2182 assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map));
2183 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
2184 tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1);
2185 /* NOTE: preserving order in case of indirect access */
2186 } else
2187 if (tx->version.major >= 3) {
2188 /* SM2 output semantic determined by file */
2189 assert(sem.reg.mask != 0);
2190 if (sem.usage == D3DDECLUSAGE_POSITIONT)
2191 tx->info->position_t = TRUE;
2192 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o));
2193 assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing");
2194 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
2195 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
2196 nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx);
2197 if (tx->info->process_vertices && sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) {
2198 tx->regs.oPos_out = tx->regs.o[sem.reg.idx];
2199 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2200 tx->regs.oPos = tx->regs.o[sem.reg.idx];
2201 }
2202
2203 if (tgsi.Name == TGSI_SEMANTIC_PSIZE) {
2204 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2205 tx->regs.oPts = tx->regs.o[sem.reg.idx];
2206 }
2207 }
2208 } else {
2209 if (is_input && tx->version.major >= 3) {
2210 unsigned interp_location = 0;
2211 /* SM3 only, SM2 input semantic determined by file */
2212 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v));
2213 assert(ureg_src_is_undef(tx->regs.v[sem.reg.idx]) && "Nine doesn't support yet packing");
2214 /* PositionT and tessfactor forbidden */
2215 if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR)
2216 return D3DERR_INVALIDCALL;
2217
2218 if (tgsi.Name == TGSI_SEMANTIC_POSITION) {
2219 /* Position0 is forbidden (likely because vPos already does that) */
2220 if (sem.usage == D3DDECLUSAGE_POSITION)
2221 return D3DERR_INVALIDCALL;
2222 /* Following code is for depth */
2223 tx->regs.v[sem.reg.idx] = nine_get_position_input(tx);
2224 return D3D_OK;
2225 }
2226
2227 if (sem.reg.mod & NINED3DSPDM_CENTROID ||
2228 (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
2229 interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
2230
2231 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
2232 ureg, tgsi.Name, tgsi.Index,
2233 nine_tgsi_to_interp_mode(&tgsi),
2234 0, /* cylwrap */
2235 interp_location, 0, 1);
2236 } else
2237 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
2238 /* FragColor or FragDepth */
2239 assert(sem.reg.mask != 0);
2240 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
2241 0, 1);
2242 }
2243 }
2244 return D3D_OK;
2245 }
2246
DECL_SPECIAL(DEF)2247 DECL_SPECIAL(DEF)
2248 {
2249 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
2250 return D3D_OK;
2251 }
2252
DECL_SPECIAL(DEFB)2253 DECL_SPECIAL(DEFB)
2254 {
2255 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2256 return D3D_OK;
2257 }
2258
DECL_SPECIAL(DEFI)2259 DECL_SPECIAL(DEFI)
2260 {
2261 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2262 return D3D_OK;
2263 }
2264
DECL_SPECIAL(POW)2265 DECL_SPECIAL(POW)
2266 {
2267 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2268 struct ureg_src src[2] = {
2269 tx_src_param(tx, &tx->insn.src[0]),
2270 tx_src_param(tx, &tx->insn.src[1])
2271 };
2272 ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2273 return D3D_OK;
2274 }
2275
DECL_SPECIAL(RSQ)2276 DECL_SPECIAL(RSQ)
2277 {
2278 struct ureg_program *ureg = tx->ureg;
2279 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2280 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2281 struct ureg_dst tmp = tx_scratch(tx);
2282 ureg_RSQ(ureg, tmp, ureg_abs(src));
2283 ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2284 return D3D_OK;
2285 }
2286
DECL_SPECIAL(LOG)2287 DECL_SPECIAL(LOG)
2288 {
2289 struct ureg_program *ureg = tx->ureg;
2290 struct ureg_dst tmp = tx_scratch_scalar(tx);
2291 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2292 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2293 ureg_LG2(ureg, tmp, ureg_abs(src));
2294 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2295 return D3D_OK;
2296 }
2297
DECL_SPECIAL(LIT)2298 DECL_SPECIAL(LIT)
2299 {
2300 struct ureg_program *ureg = tx->ureg;
2301 struct ureg_dst tmp = tx_scratch(tx);
2302 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2303 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2304 ureg_LIT(ureg, tmp, src);
2305 /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2306 * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2307 * it 0^0 if src.w=0, which value is driver dependent. */
2308 ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
2309 ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
2310 ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
2311 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
2312 return D3D_OK;
2313 }
2314
DECL_SPECIAL(NRM)2315 DECL_SPECIAL(NRM)
2316 {
2317 struct ureg_program *ureg = tx->ureg;
2318 struct ureg_dst tmp = tx_scratch_scalar(tx);
2319 struct ureg_src nrm = tx_src_scalar(tmp);
2320 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2321 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2322 ureg_DP3(ureg, tmp, src, src);
2323 ureg_RSQ(ureg, tmp, nrm);
2324 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2325 ureg_MUL(ureg, dst, src, nrm);
2326 return D3D_OK;
2327 }
2328
DECL_SPECIAL(DP2ADD)2329 DECL_SPECIAL(DP2ADD)
2330 {
2331 struct ureg_dst tmp = tx_scratch_scalar(tx);
2332 struct ureg_src dp2 = tx_src_scalar(tmp);
2333 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2334 struct ureg_src src[3];
2335 int i;
2336 for (i = 0; i < 3; ++i)
2337 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2338 assert_replicate_swizzle(&src[2]);
2339
2340 ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2341 ureg_ADD(tx->ureg, dst, src[2], dp2);
2342
2343 return D3D_OK;
2344 }
2345
DECL_SPECIAL(TEXCOORD)2346 DECL_SPECIAL(TEXCOORD)
2347 {
2348 struct ureg_program *ureg = tx->ureg;
2349 const unsigned s = tx->insn.dst[0].idx;
2350 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2351
2352 tx_texcoord_alloc(tx, s);
2353 ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2354 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2355
2356 return D3D_OK;
2357 }
2358
DECL_SPECIAL(TEXCOORD_ps14)2359 DECL_SPECIAL(TEXCOORD_ps14)
2360 {
2361 struct ureg_program *ureg = tx->ureg;
2362 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2363 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2364
2365 assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2366
2367 ureg_MOV(ureg, dst, src);
2368
2369 return D3D_OK;
2370 }
2371
DECL_SPECIAL(TEXKILL)2372 DECL_SPECIAL(TEXKILL)
2373 {
2374 struct ureg_src reg;
2375
2376 if (tx->version.major > 1 || tx->version.minor > 3) {
2377 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2378 } else {
2379 tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2380 reg = tx->regs.vT[tx->insn.dst[0].idx];
2381 }
2382 if (tx->version.major < 2)
2383 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2384 ureg_KILL_IF(tx->ureg, reg);
2385
2386 return D3D_OK;
2387 }
2388
DECL_SPECIAL(TEXBEM)2389 DECL_SPECIAL(TEXBEM)
2390 {
2391 struct ureg_program *ureg = tx->ureg;
2392 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2393 struct ureg_dst tmp, tmp2, texcoord;
2394 struct ureg_src sample, m00, m01, m10, m11;
2395 struct ureg_src bumpenvlscale, bumpenvloffset;
2396 const int m = tx->insn.dst[0].idx;
2397 const int n = tx->insn.src[0].idx;
2398
2399 assert(tx->version.major == 1);
2400
2401 sample = ureg_DECL_sampler(ureg, m);
2402 tx->info->sampler_mask |= 1 << m;
2403
2404 tx_texcoord_alloc(tx, m);
2405
2406 tmp = tx_scratch(tx);
2407 tmp2 = tx_scratch(tx);
2408 texcoord = tx_scratch(tx);
2409 /*
2410 * Bump-env-matrix:
2411 * 00 is X
2412 * 01 is Y
2413 * 10 is Z
2414 * 11 is W
2415 */
2416 nine_info_mark_const_f_used(tx->info, 8 + 8 + m/2);
2417 m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
2418 m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
2419 m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
2420 m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
2421
2422 /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
2423 if (m % 2 == 0) {
2424 bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, X);
2425 bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Y);
2426 } else {
2427 bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Z);
2428 bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, W);
2429 }
2430
2431 apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
2432
2433 /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */
2434 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2435 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
2436 /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
2437 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2438 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
2439 NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2440
2441 /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
2442 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2443 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
2444 /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
2445 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2446 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
2447 NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2448
2449 /* Now the texture coordinates are in tmp.xy */
2450
2451 if (tx->insn.opcode == D3DSIO_TEXBEM) {
2452 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2453 } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
2454 /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
2455 ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2456 ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Z),
2457 bumpenvlscale, bumpenvloffset);
2458 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
2459 }
2460
2461 tx->info->bumpenvmat_needed = 1;
2462
2463 return D3D_OK;
2464 }
2465
DECL_SPECIAL(TEXREG2AR)2466 DECL_SPECIAL(TEXREG2AR)
2467 {
2468 struct ureg_program *ureg = tx->ureg;
2469 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2470 struct ureg_src sample;
2471 const int m = tx->insn.dst[0].idx;
2472 const int n = tx->insn.src[0].idx;
2473 assert(m >= 0 && m > n);
2474
2475 sample = ureg_DECL_sampler(ureg, m);
2476 tx->info->sampler_mask |= 1 << m;
2477 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(W,X,X,X)), sample);
2478
2479 return D3D_OK;
2480 }
2481
DECL_SPECIAL(TEXREG2GB)2482 DECL_SPECIAL(TEXREG2GB)
2483 {
2484 struct ureg_program *ureg = tx->ureg;
2485 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2486 struct ureg_src sample;
2487 const int m = tx->insn.dst[0].idx;
2488 const int n = tx->insn.src[0].idx;
2489 assert(m >= 0 && m > n);
2490
2491 sample = ureg_DECL_sampler(ureg, m);
2492 tx->info->sampler_mask |= 1 << m;
2493 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2494
2495 return D3D_OK;
2496 }
2497
DECL_SPECIAL(TEXM3x2PAD)2498 DECL_SPECIAL(TEXM3x2PAD)
2499 {
2500 return D3D_OK; /* this is just padding */
2501 }
2502
DECL_SPECIAL(TEXM3x2TEX)2503 DECL_SPECIAL(TEXM3x2TEX)
2504 {
2505 struct ureg_program *ureg = tx->ureg;
2506 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2507 struct ureg_src sample;
2508 const int m = tx->insn.dst[0].idx - 1;
2509 const int n = tx->insn.src[0].idx;
2510 assert(m >= 0 && m > n);
2511
2512 tx_texcoord_alloc(tx, m);
2513 tx_texcoord_alloc(tx, m+1);
2514
2515 /* performs the matrix multiplication */
2516 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2517 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2518
2519 sample = ureg_DECL_sampler(ureg, m + 1);
2520 tx->info->sampler_mask |= 1 << (m + 1);
2521 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2522
2523 return D3D_OK;
2524 }
2525
DECL_SPECIAL(TEXM3x3PAD)2526 DECL_SPECIAL(TEXM3x3PAD)
2527 {
2528 return D3D_OK; /* this is just padding */
2529 }
2530
DECL_SPECIAL(TEXM3x3SPEC)2531 DECL_SPECIAL(TEXM3x3SPEC)
2532 {
2533 struct ureg_program *ureg = tx->ureg;
2534 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2535 struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2536 struct ureg_src sample;
2537 struct ureg_dst tmp;
2538 const int m = tx->insn.dst[0].idx - 2;
2539 const int n = tx->insn.src[0].idx;
2540 assert(m >= 0 && m > n);
2541
2542 tx_texcoord_alloc(tx, m);
2543 tx_texcoord_alloc(tx, m+1);
2544 tx_texcoord_alloc(tx, m+2);
2545
2546 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2547 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2548 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2549
2550 sample = ureg_DECL_sampler(ureg, m + 2);
2551 tx->info->sampler_mask |= 1 << (m + 2);
2552 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2553
2554 /* At this step, dst = N = (u', w', z').
2555 * We want dst to be the texture sampled at (u'', w'', z''), with
2556 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2557 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2558 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2559 /* at this step tmp.x = 1/N.N */
2560 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2561 /* at this step tmp.y = N.E */
2562 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2563 /* at this step tmp.x = N.E/N.N */
2564 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2565 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2566 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2567 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(E));
2568 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2569
2570 return D3D_OK;
2571 }
2572
DECL_SPECIAL(TEXREG2RGB)2573 DECL_SPECIAL(TEXREG2RGB)
2574 {
2575 struct ureg_program *ureg = tx->ureg;
2576 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2577 struct ureg_src sample;
2578 const int m = tx->insn.dst[0].idx;
2579 const int n = tx->insn.src[0].idx;
2580 assert(m >= 0 && m > n);
2581
2582 sample = ureg_DECL_sampler(ureg, m);
2583 tx->info->sampler_mask |= 1 << m;
2584 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tx->regs.tS[n]), sample);
2585
2586 return D3D_OK;
2587 }
2588
DECL_SPECIAL(TEXDP3TEX)2589 DECL_SPECIAL(TEXDP3TEX)
2590 {
2591 struct ureg_program *ureg = tx->ureg;
2592 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2593 struct ureg_dst tmp;
2594 struct ureg_src sample;
2595 const int m = tx->insn.dst[0].idx;
2596 const int n = tx->insn.src[0].idx;
2597 assert(m >= 0 && m > n);
2598
2599 tx_texcoord_alloc(tx, m);
2600
2601 tmp = tx_scratch(tx);
2602 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2603 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2604
2605 sample = ureg_DECL_sampler(ureg, m);
2606 tx->info->sampler_mask |= 1 << m;
2607 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2608
2609 return D3D_OK;
2610 }
2611
DECL_SPECIAL(TEXM3x2DEPTH)2612 DECL_SPECIAL(TEXM3x2DEPTH)
2613 {
2614 struct ureg_program *ureg = tx->ureg;
2615 struct ureg_dst tmp;
2616 const int m = tx->insn.dst[0].idx - 1;
2617 const int n = tx->insn.src[0].idx;
2618 assert(m >= 0 && m > n);
2619
2620 tx_texcoord_alloc(tx, m);
2621 tx_texcoord_alloc(tx, m+1);
2622
2623 tmp = tx_scratch(tx);
2624
2625 /* performs the matrix multiplication */
2626 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2627 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2628
2629 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2630 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2631 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2632 /* res = 'w' == 0 ? 1.0 : z/w */
2633 ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2634 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2635 /* replace the depth for depth testing with the result */
2636 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2637 TGSI_WRITEMASK_Z, 0, 1);
2638 ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2639 /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2640 return D3D_OK;
2641 }
2642
DECL_SPECIAL(TEXDP3)2643 DECL_SPECIAL(TEXDP3)
2644 {
2645 struct ureg_program *ureg = tx->ureg;
2646 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2647 const int m = tx->insn.dst[0].idx;
2648 const int n = tx->insn.src[0].idx;
2649 assert(m >= 0 && m > n);
2650
2651 tx_texcoord_alloc(tx, m);
2652
2653 ureg_DP3(ureg, dst, tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2654
2655 return D3D_OK;
2656 }
2657
DECL_SPECIAL(TEXM3x3)2658 DECL_SPECIAL(TEXM3x3)
2659 {
2660 struct ureg_program *ureg = tx->ureg;
2661 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2662 struct ureg_src sample;
2663 struct ureg_dst E, tmp;
2664 const int m = tx->insn.dst[0].idx - 2;
2665 const int n = tx->insn.src[0].idx;
2666 assert(m >= 0 && m > n);
2667
2668 tx_texcoord_alloc(tx, m);
2669 tx_texcoord_alloc(tx, m+1);
2670 tx_texcoord_alloc(tx, m+2);
2671
2672 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2673 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2674 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2675
2676 switch (tx->insn.opcode) {
2677 case D3DSIO_TEXM3x3:
2678 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2679 break;
2680 case D3DSIO_TEXM3x3TEX:
2681 sample = ureg_DECL_sampler(ureg, m + 2);
2682 tx->info->sampler_mask |= 1 << (m + 2);
2683 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2684 break;
2685 case D3DSIO_TEXM3x3VSPEC:
2686 sample = ureg_DECL_sampler(ureg, m + 2);
2687 tx->info->sampler_mask |= 1 << (m + 2);
2688 E = tx_scratch(tx);
2689 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2690 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2691 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2692 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2693 /* At this step, dst = N = (u', w', z').
2694 * We want dst to be the texture sampled at (u'', w'', z''), with
2695 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2696 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2697 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2698 /* at this step tmp.x = 1/N.N */
2699 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2700 /* at this step tmp.y = N.E */
2701 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2702 /* at this step tmp.x = N.E/N.N */
2703 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2704 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2705 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2706 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(ureg_src(E)));
2707 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2708 break;
2709 default:
2710 return D3DERR_INVALIDCALL;
2711 }
2712 return D3D_OK;
2713 }
2714
DECL_SPECIAL(TEXDEPTH)2715 DECL_SPECIAL(TEXDEPTH)
2716 {
2717 struct ureg_program *ureg = tx->ureg;
2718 struct ureg_dst r5;
2719 struct ureg_src r5r, r5g;
2720
2721 assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2722
2723 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2724 * r5 won't be used afterward, thus we can use r5.ba */
2725 r5 = tx->regs.r[5];
2726 r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2727 r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2728
2729 ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2730 ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2731 /* r5.r = r/g */
2732 ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2733 r5r, ureg_imm1f(ureg, 1.0f));
2734 /* replace the depth for depth testing with the result */
2735 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2736 TGSI_WRITEMASK_Z, 0, 1);
2737 ureg_MOV(ureg, tx->regs.oDepth, r5r);
2738
2739 return D3D_OK;
2740 }
2741
DECL_SPECIAL(BEM)2742 DECL_SPECIAL(BEM)
2743 {
2744 struct ureg_program *ureg = tx->ureg;
2745 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2746 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
2747 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
2748 struct ureg_src m00, m01, m10, m11;
2749 const int m = tx->insn.dst[0].idx;
2750 struct ureg_dst tmp;
2751 /*
2752 * Bump-env-matrix:
2753 * 00 is X
2754 * 01 is Y
2755 * 10 is Z
2756 * 11 is W
2757 */
2758 nine_info_mark_const_f_used(tx->info, 8 + m);
2759 m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
2760 m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
2761 m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
2762 m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
2763 /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */
2764 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2765 NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
2766 /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
2767 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2768 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2769
2770 /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
2771 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2772 NINE_APPLY_SWIZZLE(src1, X), src0);
2773 /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
2774 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2775 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2776 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
2777
2778 tx->info->bumpenvmat_needed = 1;
2779
2780 return D3D_OK;
2781 }
2782
DECL_SPECIAL(TEXLD)2783 DECL_SPECIAL(TEXLD)
2784 {
2785 struct ureg_program *ureg = tx->ureg;
2786 unsigned target;
2787 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2788 struct ureg_src src[2] = {
2789 tx_src_param(tx, &tx->insn.src[0]),
2790 tx_src_param(tx, &tx->insn.src[1])
2791 };
2792 assert(tx->insn.src[1].idx >= 0 &&
2793 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2794 target = tx->sampler_targets[tx->insn.src[1].idx];
2795
2796 switch (tx->insn.flags) {
2797 case 0:
2798 ureg_TEX(ureg, dst, target, src[0], src[1]);
2799 break;
2800 case NINED3DSI_TEXLD_PROJECT:
2801 ureg_TXP(ureg, dst, target, src[0], src[1]);
2802 break;
2803 case NINED3DSI_TEXLD_BIAS:
2804 ureg_TXB(ureg, dst, target, src[0], src[1]);
2805 break;
2806 default:
2807 assert(0);
2808 return D3DERR_INVALIDCALL;
2809 }
2810 return D3D_OK;
2811 }
2812
DECL_SPECIAL(TEXLD_14)2813 DECL_SPECIAL(TEXLD_14)
2814 {
2815 struct ureg_program *ureg = tx->ureg;
2816 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2817 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2818 const unsigned s = tx->insn.dst[0].idx;
2819 const unsigned t = ps1x_sampler_type(tx->info, s);
2820
2821 tx->info->sampler_mask |= 1 << s;
2822 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2823
2824 return D3D_OK;
2825 }
2826
DECL_SPECIAL(TEX)2827 DECL_SPECIAL(TEX)
2828 {
2829 struct ureg_program *ureg = tx->ureg;
2830 const unsigned s = tx->insn.dst[0].idx;
2831 const unsigned t = ps1x_sampler_type(tx->info, s);
2832 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2833 struct ureg_src src[2];
2834
2835 tx_texcoord_alloc(tx, s);
2836
2837 src[0] = tx->regs.vT[s];
2838 src[1] = ureg_DECL_sampler(ureg, s);
2839 tx->info->sampler_mask |= 1 << s;
2840
2841 TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
2842
2843 return D3D_OK;
2844 }
2845
DECL_SPECIAL(TEXLDD)2846 DECL_SPECIAL(TEXLDD)
2847 {
2848 unsigned target;
2849 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2850 struct ureg_src src[4] = {
2851 tx_src_param(tx, &tx->insn.src[0]),
2852 tx_src_param(tx, &tx->insn.src[1]),
2853 tx_src_param(tx, &tx->insn.src[2]),
2854 tx_src_param(tx, &tx->insn.src[3])
2855 };
2856 assert(tx->insn.src[1].idx >= 0 &&
2857 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2858 target = tx->sampler_targets[tx->insn.src[1].idx];
2859
2860 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
2861 return D3D_OK;
2862 }
2863
DECL_SPECIAL(TEXLDL)2864 DECL_SPECIAL(TEXLDL)
2865 {
2866 unsigned target;
2867 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2868 struct ureg_src src[2] = {
2869 tx_src_param(tx, &tx->insn.src[0]),
2870 tx_src_param(tx, &tx->insn.src[1])
2871 };
2872 assert(tx->insn.src[1].idx >= 0 &&
2873 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2874 target = tx->sampler_targets[tx->insn.src[1].idx];
2875
2876 ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
2877 return D3D_OK;
2878 }
2879
DECL_SPECIAL(SETP)2880 DECL_SPECIAL(SETP)
2881 {
2882 STUB(D3DERR_INVALIDCALL);
2883 }
2884
DECL_SPECIAL(BREAKP)2885 DECL_SPECIAL(BREAKP)
2886 {
2887 STUB(D3DERR_INVALIDCALL);
2888 }
2889
DECL_SPECIAL(PHASE)2890 DECL_SPECIAL(PHASE)
2891 {
2892 return D3D_OK; /* we don't care about phase */
2893 }
2894
DECL_SPECIAL(COMMENT)2895 DECL_SPECIAL(COMMENT)
2896 {
2897 return D3D_OK; /* nothing to do */
2898 }
2899
2900
2901 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2902 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2903
2904 struct sm1_op_info inst_table[] =
2905 {
2906 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP)), /* 0 */
2907 _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2908 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
2909 _OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */
2910 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
2911 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
2912 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
2913 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
2914 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
2915 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
2916 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
2917 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
2918 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
2919 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
2920 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
2921 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
2922 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
2923 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
2924 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
2925 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
2926
2927 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
2928 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
2929 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
2930 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
2931 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
2932
2933 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
2934 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
2935 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
2936 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
2937 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
2938 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
2939
2940 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
2941
2942 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
2943 _OPI(CRS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(XPD)), /* XXX: .w */
2944 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
2945 _OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)),
2946 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
2947
2948 _OPI(SINCOS, NOP, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
2949 _OPI(SINCOS, NOP, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
2950
2951 /* More flow control */
2952 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
2953 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
2954 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
2955 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
2956 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
2957 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
2958 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
2959 _OPI(BREAKC, NOP, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
2960 /* we don't write to the address register, but a normal register (copied
2961 * when needed to the address register), thus we don't use ARR */
2962 _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2963
2964 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
2965 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
2966
2967 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
2968 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
2969 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
2970 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
2971 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
2972 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
2973 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2974 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2975 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
2976 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
2977 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
2978 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
2979 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
2980 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2981 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
2982 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2983
2984 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
2985 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2986 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
2987 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
2988
2989 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
2990
2991 /* More tex stuff */
2992 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
2993 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
2994 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
2995 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
2996 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2997 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
2998
2999 /* Misc */
3000 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
3001 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
3002 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
3003 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
3004 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
3005 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
3006 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
3007 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
3008 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
3009 };
3010
3011 struct sm1_op_info inst_phase =
3012 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
3013
3014 struct sm1_op_info inst_comment =
3015 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
3016
3017 static void
create_op_info_map(struct shader_translator * tx)3018 create_op_info_map(struct shader_translator *tx)
3019 {
3020 const unsigned version = (tx->version.major << 8) | tx->version.minor;
3021 unsigned i;
3022
3023 for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i)
3024 tx->op_info_map[i] = -1;
3025
3026 if (tx->processor == PIPE_SHADER_VERTEX) {
3027 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3028 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3029 if (inst_table[i].vert_version.min <= version &&
3030 inst_table[i].vert_version.max >= version)
3031 tx->op_info_map[inst_table[i].sio] = i;
3032 }
3033 } else {
3034 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3035 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3036 if (inst_table[i].frag_version.min <= version &&
3037 inst_table[i].frag_version.max >= version)
3038 tx->op_info_map[inst_table[i].sio] = i;
3039 }
3040 }
3041 }
3042
3043 static inline HRESULT
NineTranslateInstruction_Generic(struct shader_translator * tx)3044 NineTranslateInstruction_Generic(struct shader_translator *tx)
3045 {
3046 struct ureg_dst dst[1];
3047 struct ureg_src src[4];
3048 unsigned i;
3049
3050 for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i)
3051 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
3052 for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i)
3053 src[i] = tx_src_param(tx, &tx->insn.src[i]);
3054
3055 ureg_insn(tx->ureg, tx->insn.info->opcode,
3056 dst, tx->insn.ndst,
3057 src, tx->insn.nsrc, 0);
3058 return D3D_OK;
3059 }
3060
3061 static inline DWORD
TOKEN_PEEK(struct shader_translator * tx)3062 TOKEN_PEEK(struct shader_translator *tx)
3063 {
3064 return *(tx->parse);
3065 }
3066
3067 static inline DWORD
TOKEN_NEXT(struct shader_translator * tx)3068 TOKEN_NEXT(struct shader_translator *tx)
3069 {
3070 return *(tx->parse)++;
3071 }
3072
3073 static inline void
TOKEN_JUMP(struct shader_translator * tx)3074 TOKEN_JUMP(struct shader_translator *tx)
3075 {
3076 if (tx->parse_next && tx->parse != tx->parse_next) {
3077 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
3078 tx->parse = tx->parse_next;
3079 }
3080 }
3081
3082 static inline boolean
sm1_parse_eof(struct shader_translator * tx)3083 sm1_parse_eof(struct shader_translator *tx)
3084 {
3085 return TOKEN_PEEK(tx) == NINED3DSP_END;
3086 }
3087
3088 static void
sm1_read_version(struct shader_translator * tx)3089 sm1_read_version(struct shader_translator *tx)
3090 {
3091 const DWORD tok = TOKEN_NEXT(tx);
3092
3093 tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
3094 tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
3095
3096 switch (tok >> 16) {
3097 case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break;
3098 case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break;
3099 default:
3100 DBG("Invalid shader type: %x\n", tok);
3101 tx->processor = ~0;
3102 break;
3103 }
3104 }
3105
3106 /* This is just to check if we parsed the instruction properly. */
3107 static void
sm1_parse_get_skip(struct shader_translator * tx)3108 sm1_parse_get_skip(struct shader_translator *tx)
3109 {
3110 const DWORD tok = TOKEN_PEEK(tx);
3111
3112 if (tx->version.major >= 2) {
3113 tx->parse_next = tx->parse + 1 /* this */ +
3114 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
3115 } else {
3116 tx->parse_next = NULL; /* TODO: determine from param count */
3117 }
3118 }
3119
3120 static void
sm1_print_comment(const char * comment,UINT size)3121 sm1_print_comment(const char *comment, UINT size)
3122 {
3123 if (!size)
3124 return;
3125 /* TODO */
3126 }
3127
3128 static void
sm1_parse_comments(struct shader_translator * tx,BOOL print)3129 sm1_parse_comments(struct shader_translator *tx, BOOL print)
3130 {
3131 DWORD tok = TOKEN_PEEK(tx);
3132
3133 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
3134 {
3135 const char *comment = "";
3136 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
3137 tx->parse += size + 1;
3138
3139 if (print)
3140 sm1_print_comment(comment, size);
3141
3142 tok = TOKEN_PEEK(tx);
3143 }
3144 }
3145
3146 static void
sm1_parse_get_param(struct shader_translator * tx,DWORD * reg,DWORD * rel)3147 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
3148 {
3149 *reg = TOKEN_NEXT(tx);
3150
3151 if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
3152 {
3153 if (tx->version.major < 2)
3154 *rel = (1 << 31) |
3155 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
3156 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
3157 D3DSP_NOSWIZZLE;
3158 else
3159 *rel = TOKEN_NEXT(tx);
3160 }
3161 }
3162
3163 static void
sm1_parse_dst_param(struct sm1_dst_param * dst,DWORD tok)3164 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
3165 {
3166 int8_t shift;
3167 dst->file =
3168 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
3169 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
3170 dst->type = TGSI_RETURN_TYPE_FLOAT;
3171 dst->idx = tok & D3DSP_REGNUM_MASK;
3172 dst->rel = NULL;
3173 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
3174 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
3175 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
3176 dst->shift = (shift & 0x7) - (shift & 0x8);
3177 }
3178
3179 static void
sm1_parse_src_param(struct sm1_src_param * src,DWORD tok)3180 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
3181 {
3182 src->file =
3183 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
3184 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
3185 src->type = TGSI_RETURN_TYPE_FLOAT;
3186 src->idx = tok & D3DSP_REGNUM_MASK;
3187 src->rel = NULL;
3188 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
3189 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
3190
3191 switch (src->file) {
3192 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
3193 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
3194 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
3195 default:
3196 break;
3197 }
3198 }
3199
3200 static void
sm1_parse_immediate(struct shader_translator * tx,struct sm1_src_param * imm)3201 sm1_parse_immediate(struct shader_translator *tx,
3202 struct sm1_src_param *imm)
3203 {
3204 imm->file = NINED3DSPR_IMMEDIATE;
3205 imm->idx = INT_MIN;
3206 imm->rel = NULL;
3207 imm->swizzle = NINED3DSP_NOSWIZZLE;
3208 imm->mod = 0;
3209 switch (tx->insn.opcode) {
3210 case D3DSIO_DEF:
3211 imm->type = NINED3DSPTYPE_FLOAT4;
3212 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3213 tx->parse += 4;
3214 break;
3215 case D3DSIO_DEFI:
3216 imm->type = NINED3DSPTYPE_INT4;
3217 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3218 tx->parse += 4;
3219 break;
3220 case D3DSIO_DEFB:
3221 imm->type = NINED3DSPTYPE_BOOL;
3222 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
3223 tx->parse += 1;
3224 break;
3225 default:
3226 assert(0);
3227 break;
3228 }
3229 }
3230
3231 static void
sm1_read_dst_param(struct shader_translator * tx,struct sm1_dst_param * dst,struct sm1_src_param * rel)3232 sm1_read_dst_param(struct shader_translator *tx,
3233 struct sm1_dst_param *dst,
3234 struct sm1_src_param *rel)
3235 {
3236 DWORD tok_dst, tok_rel = 0;
3237
3238 sm1_parse_get_param(tx, &tok_dst, &tok_rel);
3239 sm1_parse_dst_param(dst, tok_dst);
3240 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
3241 sm1_parse_src_param(rel, tok_rel);
3242 dst->rel = rel;
3243 }
3244 }
3245
3246 static void
sm1_read_src_param(struct shader_translator * tx,struct sm1_src_param * src,struct sm1_src_param * rel)3247 sm1_read_src_param(struct shader_translator *tx,
3248 struct sm1_src_param *src,
3249 struct sm1_src_param *rel)
3250 {
3251 DWORD tok_src, tok_rel = 0;
3252
3253 sm1_parse_get_param(tx, &tok_src, &tok_rel);
3254 sm1_parse_src_param(src, tok_src);
3255 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
3256 assert(rel);
3257 sm1_parse_src_param(rel, tok_rel);
3258 src->rel = rel;
3259 }
3260 }
3261
3262 static void
sm1_read_semantic(struct shader_translator * tx,struct sm1_semantic * sem)3263 sm1_read_semantic(struct shader_translator *tx,
3264 struct sm1_semantic *sem)
3265 {
3266 const DWORD tok_usg = TOKEN_NEXT(tx);
3267 const DWORD tok_dst = TOKEN_NEXT(tx);
3268
3269 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
3270 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
3271 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
3272
3273 sm1_parse_dst_param(&sem->reg, tok_dst);
3274 }
3275
3276 static void
sm1_parse_instruction(struct shader_translator * tx)3277 sm1_parse_instruction(struct shader_translator *tx)
3278 {
3279 struct sm1_instruction *insn = &tx->insn;
3280 HRESULT hr;
3281 DWORD tok;
3282 struct sm1_op_info *info = NULL;
3283 unsigned i;
3284
3285 sm1_parse_comments(tx, TRUE);
3286 sm1_parse_get_skip(tx);
3287
3288 tok = TOKEN_NEXT(tx);
3289
3290 insn->opcode = tok & D3DSI_OPCODE_MASK;
3291 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
3292 insn->coissue = !!(tok & D3DSI_COISSUE);
3293 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
3294
3295 if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) {
3296 int k = tx->op_info_map[insn->opcode];
3297 if (k >= 0) {
3298 assert(k < ARRAY_SIZE(inst_table));
3299 info = &inst_table[k];
3300 }
3301 } else {
3302 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
3303 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
3304 }
3305 if (!info) {
3306 DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
3307 TOKEN_JUMP(tx);
3308 return;
3309 }
3310 insn->info = info;
3311 insn->ndst = info->ndst;
3312 insn->nsrc = info->nsrc;
3313
3314 assert(!insn->predicated && "TODO: predicated instructions");
3315
3316 /* check version */
3317 {
3318 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
3319 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
3320 unsigned ver = (tx->version.major << 8) | tx->version.minor;
3321 if (ver < min || ver > max) {
3322 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
3323 min, ver, max);
3324 return;
3325 }
3326 }
3327
3328 for (i = 0; i < insn->ndst; ++i)
3329 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
3330 if (insn->predicated)
3331 sm1_read_src_param(tx, &insn->pred, NULL);
3332 for (i = 0; i < insn->nsrc; ++i)
3333 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
3334
3335 /* parse here so we can dump them before processing */
3336 if (insn->opcode == D3DSIO_DEF ||
3337 insn->opcode == D3DSIO_DEFI ||
3338 insn->opcode == D3DSIO_DEFB)
3339 sm1_parse_immediate(tx, &tx->insn.src[0]);
3340
3341 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
3342 sm1_instruction_check(insn);
3343
3344 if (info->handler)
3345 hr = info->handler(tx);
3346 else
3347 hr = NineTranslateInstruction_Generic(tx);
3348 tx_apply_dst0_modifiers(tx);
3349
3350 if (hr != D3D_OK)
3351 tx->failure = TRUE;
3352 tx->num_scratch = 0; /* reset */
3353
3354 TOKEN_JUMP(tx);
3355 }
3356
3357 static void
tx_ctor(struct shader_translator * tx,struct nine_shader_info * info)3358 tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
3359 {
3360 unsigned i;
3361
3362 tx->info = info;
3363
3364 tx->byte_code = info->byte_code;
3365 tx->parse = info->byte_code;
3366
3367 for (i = 0; i < ARRAY_SIZE(info->input_map); ++i)
3368 info->input_map[i] = NINE_DECLUSAGE_NONE;
3369 info->num_inputs = 0;
3370
3371 info->position_t = FALSE;
3372 info->point_size = FALSE;
3373
3374 tx->info->const_float_slots = 0;
3375 tx->info->const_int_slots = 0;
3376 tx->info->const_bool_slots = 0;
3377
3378 info->sampler_mask = 0x0;
3379 info->rt_mask = 0x0;
3380
3381 info->lconstf.data = NULL;
3382 info->lconstf.ranges = NULL;
3383
3384 info->bumpenvmat_needed = 0;
3385
3386 for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) {
3387 tx->regs.rL[i] = ureg_dst_undef();
3388 }
3389 tx->regs.address = ureg_dst_undef();
3390 tx->regs.a0 = ureg_dst_undef();
3391 tx->regs.p = ureg_dst_undef();
3392 tx->regs.oDepth = ureg_dst_undef();
3393 tx->regs.vPos = ureg_src_undef();
3394 tx->regs.vFace = ureg_src_undef();
3395 for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i)
3396 tx->regs.o[i] = ureg_dst_undef();
3397 for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i)
3398 tx->regs.oCol[i] = ureg_dst_undef();
3399 for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i)
3400 tx->regs.vC[i] = ureg_src_undef();
3401 for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i)
3402 tx->regs.vT[i] = ureg_src_undef();
3403
3404 sm1_read_version(tx);
3405
3406 info->version = (tx->version.major << 4) | tx->version.minor;
3407
3408 tx->num_outputs = 0;
3409
3410 create_op_info_map(tx);
3411 }
3412
3413 static void
tx_dtor(struct shader_translator * tx)3414 tx_dtor(struct shader_translator *tx)
3415 {
3416 if (tx->num_inst_labels)
3417 FREE(tx->inst_labels);
3418 FREE(tx->lconstf);
3419 FREE(tx->regs.r);
3420 FREE(tx);
3421 }
3422
3423 /* CONST[0].xyz = width/2, -height/2, zmax-zmin
3424 * CONST[1].xyz = x+width/2, y+height/2, zmin */
3425 static void
shader_add_vs_viewport_transform(struct shader_translator * tx)3426 shader_add_vs_viewport_transform(struct shader_translator *tx)
3427 {
3428 struct ureg_program *ureg = tx->ureg;
3429 struct ureg_src c0 = NINE_CONSTANT_SRC(0);
3430 struct ureg_src c1 = NINE_CONSTANT_SRC(1);
3431 /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/
3432
3433 c0 = ureg_src_dimension(c0, 4);
3434 c1 = ureg_src_dimension(c1, 4);
3435 /* TODO: find out when we need to apply the viewport transformation or not.
3436 * Likely will be XYZ vs XYZRHW in vdecl_out
3437 * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0);
3438 * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1);
3439 */
3440 ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos));
3441 }
3442
3443 static void
shader_add_ps_fog_stage(struct shader_translator * tx,struct ureg_src src_col)3444 shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
3445 {
3446 struct ureg_program *ureg = tx->ureg;
3447 struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
3448 struct ureg_src fog_end, fog_coeff, fog_density;
3449 struct ureg_src fog_vs, depth, fog_color;
3450 struct ureg_dst fog_factor;
3451
3452 if (!tx->info->fog_enable) {
3453 ureg_MOV(ureg, oCol0, src_col);
3454 return;
3455 }
3456
3457 if (tx->info->fog_mode != D3DFOG_NONE) {
3458 depth = nine_get_position_input(tx);
3459 depth = ureg_scalar(depth, TGSI_SWIZZLE_Z);
3460 }
3461
3462 nine_info_mark_const_f_used(tx->info, 33);
3463 fog_color = NINE_CONSTANT_SRC(32);
3464 fog_factor = tx_scratch_scalar(tx);
3465
3466 if (tx->info->fog_mode == D3DFOG_LINEAR) {
3467 fog_end = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3468 fog_coeff = NINE_CONSTANT_SRC_SWIZZLE(33, Y);
3469 ureg_ADD(ureg, fog_factor, fog_end, ureg_negate(depth));
3470 ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
3471 } else if (tx->info->fog_mode == D3DFOG_EXP) {
3472 fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3473 ureg_MUL(ureg, fog_factor, depth, fog_density);
3474 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3475 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3476 } else if (tx->info->fog_mode == D3DFOG_EXP2) {
3477 fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3478 ureg_MUL(ureg, fog_factor, depth, fog_density);
3479 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
3480 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3481 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3482 } else {
3483 fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0,
3484 TGSI_INTERPOLATE_PERSPECTIVE),
3485 TGSI_SWIZZLE_X);
3486 ureg_MOV(ureg, fog_factor, fog_vs);
3487 }
3488
3489 ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ),
3490 tx_src_scalar(fog_factor), src_col, fog_color);
3491 ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
3492 }
3493
3494 #define GET_CAP(n) screen->get_param( \
3495 screen, PIPE_CAP_##n)
3496 #define GET_SHADER_CAP(n) screen->get_shader_param( \
3497 screen, info->type, PIPE_SHADER_CAP_##n)
3498
3499 HRESULT
nine_translate_shader(struct NineDevice9 * device,struct nine_shader_info * info,struct pipe_context * pipe)3500 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, struct pipe_context *pipe)
3501 {
3502 struct shader_translator *tx;
3503 HRESULT hr = D3D_OK;
3504 const unsigned processor = info->type;
3505 struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen;
3506
3507 user_assert(processor != ~0, D3DERR_INVALIDCALL);
3508
3509 tx = CALLOC_STRUCT(shader_translator);
3510 if (!tx)
3511 return E_OUTOFMEMORY;
3512 tx_ctor(tx, info);
3513
3514 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
3515 hr = D3DERR_INVALIDCALL;
3516 DBG("Unsupported shader version: %u.%u !\n",
3517 tx->version.major, tx->version.minor);
3518 goto out;
3519 }
3520 if (tx->processor != processor) {
3521 hr = D3DERR_INVALIDCALL;
3522 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
3523 goto out;
3524 }
3525 DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS",
3526 tx->version.major, tx->version.minor);
3527
3528 tx->ureg = ureg_create(processor);
3529 if (!tx->ureg) {
3530 hr = E_OUTOFMEMORY;
3531 goto out;
3532 }
3533
3534 tx->native_integers = GET_SHADER_CAP(INTEGERS);
3535 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3536 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3537 tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3538 tx->texcoord_sn = tx->want_texcoord ?
3539 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3540 tx->wpos_is_sysval = GET_CAP(TGSI_FS_POSITION_IS_SYSVAL);
3541 tx->face_is_sysval_integer = GET_CAP(TGSI_FS_FACE_IS_INTEGER_SYSVAL);
3542
3543 if (IS_VS) {
3544 tx->num_constf_allowed = NINE_MAX_CONST_F;
3545 } else if (tx->version.major < 2) {/* IS_PS v1 */
3546 tx->num_constf_allowed = 8;
3547 } else if (tx->version.major == 2) {/* IS_PS v2 */
3548 tx->num_constf_allowed = 32;
3549 } else {/* IS_PS v3 */
3550 tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
3551 }
3552
3553 if (tx->version.major < 2) {
3554 tx->num_consti_allowed = 0;
3555 tx->num_constb_allowed = 0;
3556 } else {
3557 tx->num_consti_allowed = NINE_MAX_CONST_I;
3558 tx->num_constb_allowed = NINE_MAX_CONST_B;
3559 }
3560
3561 if (IS_VS && tx->version.major >= 2 && info->swvp_on) {
3562 tx->num_constf_allowed = 8192;
3563 tx->num_consti_allowed = 2048;
3564 tx->num_constb_allowed = 2048;
3565 }
3566
3567 /* VS must always write position. Declare it here to make it the 1st output.
3568 * (Some drivers like nv50 are buggy and rely on that.)
3569 */
3570 if (IS_VS) {
3571 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3572 } else {
3573 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3574 if (!tx->shift_wpos)
3575 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3576 }
3577
3578 if (GET_CAP(TGSI_MUL_ZERO_WINS))
3579 ureg_property(tx->ureg, TGSI_PROPERTY_MUL_ZERO_WINS, 1);
3580
3581 while (!sm1_parse_eof(tx) && !tx->failure)
3582 sm1_parse_instruction(tx);
3583 tx->parse++; /* for byte_size */
3584
3585 if (tx->failure) {
3586 /* For VS shaders, we print the warning later,
3587 * we first try with swvp. */
3588 if (IS_PS)
3589 ERR("Encountered buggy shader\n");
3590 ureg_destroy(tx->ureg);
3591 hr = D3DERR_INVALIDCALL;
3592 goto out;
3593 }
3594
3595 if (IS_PS && tx->version.major < 3) {
3596 if (tx->version.major < 2) {
3597 assert(tx->num_temp); /* there must be color output */
3598 info->rt_mask |= 0x1;
3599 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
3600 } else {
3601 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
3602 }
3603 }
3604
3605 if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
3606 tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0);
3607 ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
3608 }
3609
3610 if (info->position_t)
3611 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
3612
3613 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
3614 struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
3615 ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
3616 ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
3617 info->point_size = TRUE;
3618 }
3619
3620 if (info->process_vertices)
3621 shader_add_vs_viewport_transform(tx);
3622
3623 ureg_END(tx->ureg);
3624
3625 /* record local constants */
3626 if (tx->num_lconstf && tx->indirect_const_access) {
3627 struct nine_range *ranges;
3628 float *data;
3629 int *indices;
3630 unsigned i, k, n;
3631
3632 hr = E_OUTOFMEMORY;
3633
3634 data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
3635 if (!data)
3636 goto out;
3637 info->lconstf.data = data;
3638
3639 indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
3640 if (!indices)
3641 goto out;
3642
3643 /* lazy sort, num_lconstf should be small */
3644 for (n = 0; n < tx->num_lconstf; ++n) {
3645 for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
3646 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
3647 k = i;
3648 }
3649 indices[n] = tx->lconstf[k].idx;
3650 memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float));
3651 tx->lconstf[k].idx = INT_MAX;
3652 }
3653
3654 /* count ranges */
3655 for (n = 1, i = 1; i < tx->num_lconstf; ++i)
3656 if (indices[i] != indices[i - 1] + 1)
3657 ++n;
3658 ranges = MALLOC(n * sizeof(ranges[0]));
3659 if (!ranges) {
3660 FREE(indices);
3661 goto out;
3662 }
3663 info->lconstf.ranges = ranges;
3664
3665 k = 0;
3666 ranges[k].bgn = indices[0];
3667 for (i = 1; i < tx->num_lconstf; ++i) {
3668 if (indices[i] != indices[i - 1] + 1) {
3669 ranges[k].next = &ranges[k + 1];
3670 ranges[k].end = indices[i - 1] + 1;
3671 ++k;
3672 ranges[k].bgn = indices[i];
3673 }
3674 }
3675 ranges[k].end = indices[i - 1] + 1;
3676 ranges[k].next = NULL;
3677 assert(n == (k + 1));
3678
3679 FREE(indices);
3680 hr = D3D_OK;
3681 }
3682
3683 /* r500 */
3684 if (info->const_float_slots > device->max_vs_const_f &&
3685 (info->const_int_slots || info->const_bool_slots) &&
3686 (!IS_VS || !info->swvp_on))
3687 ERR("Overlapping constant slots. The shader is likely to be buggy\n");
3688
3689
3690 if (tx->indirect_const_access) /* vs only */
3691 info->const_float_slots = device->max_vs_const_f;
3692
3693 if (!IS_VS || !info->swvp_on) {
3694 unsigned s, slot_max;
3695 unsigned max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f;
3696
3697 slot_max = info->const_bool_slots > 0 ?
3698 max_const_f + NINE_MAX_CONST_I
3699 + DIV_ROUND_UP(info->const_bool_slots, 4) :
3700 info->const_int_slots > 0 ?
3701 max_const_f + info->const_int_slots :
3702 info->const_float_slots;
3703
3704 info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
3705
3706 for (s = 0; s < slot_max; s++)
3707 ureg_DECL_constant(tx->ureg, s);
3708 } else {
3709 ureg_DECL_constant2D(tx->ureg, 0, 4095, 0);
3710 ureg_DECL_constant2D(tx->ureg, 0, 4095, 1);
3711 ureg_DECL_constant2D(tx->ureg, 0, 2047, 2);
3712 ureg_DECL_constant2D(tx->ureg, 0, 511, 3);
3713 }
3714
3715 if (info->process_vertices)
3716 ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */
3717
3718 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
3719 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, NULL);
3720 tgsi_dump(toks, 0);
3721 ureg_free_tokens(toks);
3722 }
3723
3724 if (info->process_vertices) {
3725 NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out,
3726 tx->output_info,
3727 tx->num_outputs,
3728 &(info->so));
3729 info->cso = ureg_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so));
3730 } else
3731 info->cso = ureg_create_shader_and_destroy(tx->ureg, pipe);
3732 if (!info->cso) {
3733 hr = D3DERR_DRIVERINTERNALERROR;
3734 FREE(info->lconstf.data);
3735 FREE(info->lconstf.ranges);
3736 goto out;
3737 }
3738
3739 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
3740 out:
3741 tx_dtor(tx);
3742 return hr;
3743 }
3744