1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "nine_shader.h"
25
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29 #include "vertexdeclaration9.h"
30
31 #include "util/macros.h"
32 #include "util/u_memory.h"
33 #include "util/u_inlines.h"
34 #include "pipe/p_shader_tokens.h"
35 #include "tgsi/tgsi_ureg.h"
36 #include "tgsi/tgsi_dump.h"
37
38 #define DBG_CHANNEL DBG_SHADER
39
40 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
41
42
43 struct shader_translator;
44
45 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
46
47 static inline const char *d3dsio_to_string(unsigned opcode);
48
49
50 #define NINED3D_SM1_VS 0xfffe
51 #define NINED3D_SM1_PS 0xffff
52
53 #define NINE_MAX_COND_DEPTH 64
54 #define NINE_MAX_LOOP_DEPTH 64
55
56 #define NINED3DSP_END 0x0000ffff
57
58 #define NINED3DSPTYPE_FLOAT4 0
59 #define NINED3DSPTYPE_INT4 1
60 #define NINED3DSPTYPE_BOOL 2
61
62 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
63
64 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
65 #define NINED3DSP_WRITEMASK_SHIFT 16
66
67 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
68
69 #define NINED3DSHADER_REL_OP_GT 1
70 #define NINED3DSHADER_REL_OP_EQ 2
71 #define NINED3DSHADER_REL_OP_GE 3
72 #define NINED3DSHADER_REL_OP_LT 4
73 #define NINED3DSHADER_REL_OP_NE 5
74 #define NINED3DSHADER_REL_OP_LE 6
75
76 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
77 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
78
79 #define NINED3DSI_TEXLD_PROJECT 0x1
80 #define NINED3DSI_TEXLD_BIAS 0x2
81
82 #define NINED3DSP_WRITEMASK_0 0x1
83 #define NINED3DSP_WRITEMASK_1 0x2
84 #define NINED3DSP_WRITEMASK_2 0x4
85 #define NINED3DSP_WRITEMASK_3 0x8
86 #define NINED3DSP_WRITEMASK_ALL 0xf
87
88 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
89
90 #define NINE_SWIZZLE4(x,y,z,w) \
91 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
92
93 #define NINE_CONSTANT_SRC(index) \
94 ureg_src_register(TGSI_FILE_CONSTANT, index)
95
96 #define NINE_APPLY_SWIZZLE(src, s) \
97 ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
98
99 #define NINE_CONSTANT_SRC_SWIZZLE(index, s) \
100 NINE_APPLY_SWIZZLE(NINE_CONSTANT_SRC(index), s)
101
102 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
103 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
104 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
105
106 /*
107 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
108 * BIAS <= PS 1.4 (x-0.5)
109 * BIASNEG <= PS 1.4 (-(x-0.5))
110 * SIGN <= PS 1.4 (2(x-0.5))
111 * SIGNNEG <= PS 1.4 (-2(x-0.5))
112 * COMP <= PS 1.4 (1-x)
113 * X2 = PS 1.4 (2x)
114 * X2NEG = PS 1.4 (-2x)
115 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
116 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
117 * ABS >= SM 3.0 (abs(x))
118 * ABSNEG >= SM 3.0 (-abs(x))
119 * NOT >= SM 2.0 pedication only
120 */
121 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
125 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
126 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
127 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
128 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
129 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
130 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
131 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
132 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
133 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
134 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
135
136 static const char *sm1_mod_str[] =
137 {
138 [NINED3DSPSM_NONE] = "",
139 [NINED3DSPSM_NEG] = "-",
140 [NINED3DSPSM_BIAS] = "bias",
141 [NINED3DSPSM_BIASNEG] = "biasneg",
142 [NINED3DSPSM_SIGN] = "sign",
143 [NINED3DSPSM_SIGNNEG] = "signneg",
144 [NINED3DSPSM_COMP] = "comp",
145 [NINED3DSPSM_X2] = "x2",
146 [NINED3DSPSM_X2NEG] = "x2neg",
147 [NINED3DSPSM_DZ] = "dz",
148 [NINED3DSPSM_DW] = "dw",
149 [NINED3DSPSM_ABS] = "abs",
150 [NINED3DSPSM_ABSNEG] = "-abs",
151 [NINED3DSPSM_NOT] = "not"
152 };
153
154 static void
sm1_dump_writemask(BYTE mask)155 sm1_dump_writemask(BYTE mask)
156 {
157 if (mask & 1) DUMP("x"); else DUMP("_");
158 if (mask & 2) DUMP("y"); else DUMP("_");
159 if (mask & 4) DUMP("z"); else DUMP("_");
160 if (mask & 8) DUMP("w"); else DUMP("_");
161 }
162
163 static void
sm1_dump_swizzle(BYTE s)164 sm1_dump_swizzle(BYTE s)
165 {
166 char c[4] = { 'x', 'y', 'z', 'w' };
167 DUMP("%c%c%c%c",
168 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
169 }
170
171 static const char sm1_file_char[] =
172 {
173 [D3DSPR_TEMP] = 'r',
174 [D3DSPR_INPUT] = 'v',
175 [D3DSPR_CONST] = 'c',
176 [D3DSPR_ADDR] = 'A',
177 [D3DSPR_RASTOUT] = 'R',
178 [D3DSPR_ATTROUT] = 'D',
179 [D3DSPR_OUTPUT] = 'o',
180 [D3DSPR_CONSTINT] = 'I',
181 [D3DSPR_COLOROUT] = 'C',
182 [D3DSPR_DEPTHOUT] = 'D',
183 [D3DSPR_SAMPLER] = 's',
184 [D3DSPR_CONST2] = 'c',
185 [D3DSPR_CONST3] = 'c',
186 [D3DSPR_CONST4] = 'c',
187 [D3DSPR_CONSTBOOL] = 'B',
188 [D3DSPR_LOOP] = 'L',
189 [D3DSPR_TEMPFLOAT16] = 'h',
190 [D3DSPR_MISCTYPE] = 'M',
191 [D3DSPR_LABEL] = 'X',
192 [D3DSPR_PREDICATE] = 'p'
193 };
194
195 static void
sm1_dump_reg(BYTE file,INT index)196 sm1_dump_reg(BYTE file, INT index)
197 {
198 switch (file) {
199 case D3DSPR_LOOP:
200 DUMP("aL");
201 break;
202 case D3DSPR_COLOROUT:
203 DUMP("oC%i", index);
204 break;
205 case D3DSPR_DEPTHOUT:
206 DUMP("oDepth");
207 break;
208 case D3DSPR_RASTOUT:
209 DUMP("oRast%i", index);
210 break;
211 case D3DSPR_CONSTINT:
212 DUMP("iconst[%i]", index);
213 break;
214 case D3DSPR_CONSTBOOL:
215 DUMP("bconst[%i]", index);
216 break;
217 default:
218 DUMP("%c%i", sm1_file_char[file], index);
219 break;
220 }
221 }
222
223 struct sm1_src_param
224 {
225 INT idx;
226 struct sm1_src_param *rel;
227 BYTE file;
228 BYTE swizzle;
229 BYTE mod;
230 BYTE type;
231 union {
232 DWORD d[4];
233 float f[4];
234 int i[4];
235 BOOL b;
236 } imm;
237 };
238 static void
239 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
240
241 struct sm1_dst_param
242 {
243 INT idx;
244 struct sm1_src_param *rel;
245 BYTE file;
246 BYTE mask;
247 BYTE mod;
248 int8_t shift; /* sint4 */
249 BYTE type;
250 };
251
252 static inline void
assert_replicate_swizzle(const struct ureg_src * reg)253 assert_replicate_swizzle(const struct ureg_src *reg)
254 {
255 assert(reg->SwizzleY == reg->SwizzleX &&
256 reg->SwizzleZ == reg->SwizzleX &&
257 reg->SwizzleW == reg->SwizzleX);
258 }
259
260 static void
sm1_dump_immediate(const struct sm1_src_param * param)261 sm1_dump_immediate(const struct sm1_src_param *param)
262 {
263 switch (param->type) {
264 case NINED3DSPTYPE_FLOAT4:
265 DUMP("{ %f %f %f %f }",
266 param->imm.f[0], param->imm.f[1],
267 param->imm.f[2], param->imm.f[3]);
268 break;
269 case NINED3DSPTYPE_INT4:
270 DUMP("{ %i %i %i %i }",
271 param->imm.i[0], param->imm.i[1],
272 param->imm.i[2], param->imm.i[3]);
273 break;
274 case NINED3DSPTYPE_BOOL:
275 DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
276 break;
277 default:
278 assert(0);
279 break;
280 }
281 }
282
283 static void
sm1_dump_src_param(const struct sm1_src_param * param)284 sm1_dump_src_param(const struct sm1_src_param *param)
285 {
286 if (param->file == NINED3DSPR_IMMEDIATE) {
287 assert(!param->mod &&
288 !param->rel &&
289 param->swizzle == NINED3DSP_NOSWIZZLE);
290 sm1_dump_immediate(param);
291 return;
292 }
293
294 if (param->mod)
295 DUMP("%s(", sm1_mod_str[param->mod]);
296 if (param->rel) {
297 DUMP("%c[", sm1_file_char[param->file]);
298 sm1_dump_src_param(param->rel);
299 DUMP("+%i]", param->idx);
300 } else {
301 sm1_dump_reg(param->file, param->idx);
302 }
303 if (param->mod)
304 DUMP(")");
305 if (param->swizzle != NINED3DSP_NOSWIZZLE) {
306 DUMP(".");
307 sm1_dump_swizzle(param->swizzle);
308 }
309 }
310
311 static void
sm1_dump_dst_param(const struct sm1_dst_param * param)312 sm1_dump_dst_param(const struct sm1_dst_param *param)
313 {
314 if (param->mod & NINED3DSPDM_SATURATE)
315 DUMP("sat ");
316 if (param->mod & NINED3DSPDM_PARTIALP)
317 DUMP("pp ");
318 if (param->mod & NINED3DSPDM_CENTROID)
319 DUMP("centroid ");
320 if (param->shift < 0)
321 DUMP("/%u ", 1 << -param->shift);
322 if (param->shift > 0)
323 DUMP("*%u ", 1 << param->shift);
324
325 if (param->rel) {
326 DUMP("%c[", sm1_file_char[param->file]);
327 sm1_dump_src_param(param->rel);
328 DUMP("+%i]", param->idx);
329 } else {
330 sm1_dump_reg(param->file, param->idx);
331 }
332 if (param->mask != NINED3DSP_WRITEMASK_ALL) {
333 DUMP(".");
334 sm1_dump_writemask(param->mask);
335 }
336 }
337
338 struct sm1_semantic
339 {
340 struct sm1_dst_param reg;
341 BYTE sampler_type;
342 D3DDECLUSAGE usage;
343 BYTE usage_idx;
344 };
345
346 struct sm1_op_info
347 {
348 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
349 * should be ignored completely */
350 unsigned sio;
351 unsigned opcode; /* TGSI_OPCODE_x */
352
353 /* versions are still set even handler is set */
354 struct {
355 unsigned min;
356 unsigned max;
357 } vert_version, frag_version;
358
359 /* number of regs parsed outside of special handler */
360 unsigned ndst;
361 unsigned nsrc;
362
363 /* some instructions don't map perfectly, so use a special handler */
364 translate_instruction_func handler;
365 };
366
367 struct sm1_instruction
368 {
369 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
370 BYTE flags;
371 BOOL coissue;
372 BOOL predicated;
373 BYTE ndst;
374 BYTE nsrc;
375 struct sm1_src_param src[4];
376 struct sm1_src_param src_rel[4];
377 struct sm1_src_param pred;
378 struct sm1_src_param dst_rel[1];
379 struct sm1_dst_param dst[1];
380
381 struct sm1_op_info *info;
382 };
383
384 static void
sm1_dump_instruction(struct sm1_instruction * insn,unsigned indent)385 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
386 {
387 unsigned i;
388
389 /* no info stored for these: */
390 if (insn->opcode == D3DSIO_DCL)
391 return;
392 for (i = 0; i < indent; ++i)
393 DUMP(" ");
394
395 if (insn->predicated) {
396 DUMP("@");
397 sm1_dump_src_param(&insn->pred);
398 DUMP(" ");
399 }
400 DUMP("%s", d3dsio_to_string(insn->opcode));
401 if (insn->flags) {
402 switch (insn->opcode) {
403 case D3DSIO_TEX:
404 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
405 break;
406 default:
407 DUMP("_%x", insn->flags);
408 break;
409 }
410 }
411 if (insn->coissue)
412 DUMP("_co");
413 DUMP(" ");
414
415 for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) {
416 sm1_dump_dst_param(&insn->dst[i]);
417 DUMP(" ");
418 }
419
420 for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) {
421 sm1_dump_src_param(&insn->src[i]);
422 DUMP(" ");
423 }
424 if (insn->opcode == D3DSIO_DEF ||
425 insn->opcode == D3DSIO_DEFI ||
426 insn->opcode == D3DSIO_DEFB)
427 sm1_dump_immediate(&insn->src[0]);
428
429 DUMP("\n");
430 }
431
432 struct sm1_local_const
433 {
434 INT idx;
435 struct ureg_src reg;
436 float f[4]; /* for indirect addressing of float constants */
437 };
438
439 struct shader_translator
440 {
441 const DWORD *byte_code;
442 const DWORD *parse;
443 const DWORD *parse_next;
444
445 struct ureg_program *ureg;
446
447 /* shader version */
448 struct {
449 BYTE major;
450 BYTE minor;
451 } version;
452 unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */
453 unsigned num_constf_allowed;
454 unsigned num_consti_allowed;
455 unsigned num_constb_allowed;
456
457 boolean native_integers;
458 boolean inline_subroutines;
459 boolean lower_preds;
460 boolean want_texcoord;
461 boolean shift_wpos;
462 boolean wpos_is_sysval;
463 boolean face_is_sysval_integer;
464 unsigned texcoord_sn;
465
466 struct sm1_instruction insn; /* current instruction */
467
468 struct {
469 struct ureg_dst *r;
470 struct ureg_dst oPos;
471 struct ureg_dst oPos_out; /* the real output when doing streamout */
472 struct ureg_dst oFog;
473 struct ureg_dst oPts;
474 struct ureg_dst oCol[4];
475 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
476 struct ureg_dst oDepth;
477 struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
478 struct ureg_src v_consecutive; /* copy in temp array of ps inputs for rel addressing */
479 struct ureg_src vPos;
480 struct ureg_src vFace;
481 struct ureg_src s;
482 struct ureg_dst p;
483 struct ureg_dst address;
484 struct ureg_dst a0;
485 struct ureg_dst tS[8]; /* texture stage registers */
486 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
487 struct ureg_dst t[5]; /* scratch TEMPs */
488 struct ureg_src vC[2]; /* PS color in */
489 struct ureg_src vT[8]; /* PS texcoord in */
490 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */
491 } regs;
492 unsigned num_temp; /* ARRAY_SIZE(regs.r) */
493 unsigned num_scratch;
494 unsigned loop_depth;
495 unsigned loop_depth_max;
496 unsigned cond_depth;
497 unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
498 unsigned cond_labels[NINE_MAX_COND_DEPTH];
499 boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
500
501 unsigned *inst_labels; /* LABEL op */
502 unsigned num_inst_labels;
503
504 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
505
506 struct sm1_local_const *lconstf;
507 unsigned num_lconstf;
508 struct sm1_local_const *lconsti;
509 unsigned num_lconsti;
510 struct sm1_local_const *lconstb;
511 unsigned num_lconstb;
512
513 boolean indirect_const_access;
514 boolean failure;
515
516 struct nine_vs_output_info output_info[16];
517 int num_outputs;
518
519 struct nine_shader_info *info;
520
521 int16_t op_info_map[D3DSIO_BREAKP + 1];
522 };
523
524 #define IS_VS (tx->processor == PIPE_SHADER_VERTEX)
525 #define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT)
526
527 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
528
529 static void
530 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
531
532 static void
sm1_instruction_check(const struct sm1_instruction * insn)533 sm1_instruction_check(const struct sm1_instruction *insn)
534 {
535 if (insn->opcode == D3DSIO_CRS)
536 {
537 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
538 {
539 DBG("CRS.mask.w\n");
540 }
541 }
542 }
543
544 static void
nine_record_outputs(struct shader_translator * tx,BYTE Usage,BYTE UsageIndex,int mask,int output_index)545 nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex,
546 int mask, int output_index)
547 {
548 tx->output_info[tx->num_outputs].output_semantic = Usage;
549 tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex;
550 tx->output_info[tx->num_outputs].mask = mask;
551 tx->output_info[tx->num_outputs].output_index = output_index;
552 tx->num_outputs++;
553 }
554
555 static boolean
tx_lconstf(struct shader_translator * tx,struct ureg_src * src,INT index)556 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
557 {
558 INT i;
559
560 if (index < 0 || index >= tx->num_constf_allowed) {
561 tx->failure = TRUE;
562 return FALSE;
563 }
564 for (i = 0; i < tx->num_lconstf; ++i) {
565 if (tx->lconstf[i].idx == index) {
566 *src = tx->lconstf[i].reg;
567 return TRUE;
568 }
569 }
570 return FALSE;
571 }
572 static boolean
tx_lconsti(struct shader_translator * tx,struct ureg_src * src,INT index)573 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
574 {
575 int i;
576
577 if (index < 0 || index >= tx->num_consti_allowed) {
578 tx->failure = TRUE;
579 return FALSE;
580 }
581 for (i = 0; i < tx->num_lconsti; ++i) {
582 if (tx->lconsti[i].idx == index) {
583 *src = tx->lconsti[i].reg;
584 return TRUE;
585 }
586 }
587 return FALSE;
588 }
589 static boolean
tx_lconstb(struct shader_translator * tx,struct ureg_src * src,INT index)590 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
591 {
592 int i;
593
594 if (index < 0 || index >= tx->num_constb_allowed) {
595 tx->failure = TRUE;
596 return FALSE;
597 }
598 for (i = 0; i < tx->num_lconstb; ++i) {
599 if (tx->lconstb[i].idx == index) {
600 *src = tx->lconstb[i].reg;
601 return TRUE;
602 }
603 }
604 return FALSE;
605 }
606
607 static void
tx_set_lconstf(struct shader_translator * tx,INT index,float f[4])608 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
609 {
610 unsigned n;
611
612 FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
613
614 for (n = 0; n < tx->num_lconstf; ++n)
615 if (tx->lconstf[n].idx == index)
616 break;
617 if (n == tx->num_lconstf) {
618 if ((n % 8) == 0) {
619 tx->lconstf = REALLOC(tx->lconstf,
620 (n + 0) * sizeof(tx->lconstf[0]),
621 (n + 8) * sizeof(tx->lconstf[0]));
622 assert(tx->lconstf);
623 }
624 tx->num_lconstf++;
625 }
626 tx->lconstf[n].idx = index;
627 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
628
629 memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f));
630 }
631 static void
tx_set_lconsti(struct shader_translator * tx,INT index,int i[4])632 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
633 {
634 unsigned n;
635
636 FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
637
638 for (n = 0; n < tx->num_lconsti; ++n)
639 if (tx->lconsti[n].idx == index)
640 break;
641 if (n == tx->num_lconsti) {
642 if ((n % 8) == 0) {
643 tx->lconsti = REALLOC(tx->lconsti,
644 (n + 0) * sizeof(tx->lconsti[0]),
645 (n + 8) * sizeof(tx->lconsti[0]));
646 assert(tx->lconsti);
647 }
648 tx->num_lconsti++;
649 }
650
651 tx->lconsti[n].idx = index;
652 tx->lconsti[n].reg = tx->native_integers ?
653 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
654 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
655 }
656 static void
tx_set_lconstb(struct shader_translator * tx,INT index,BOOL b)657 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
658 {
659 unsigned n;
660
661 FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
662
663 for (n = 0; n < tx->num_lconstb; ++n)
664 if (tx->lconstb[n].idx == index)
665 break;
666 if (n == tx->num_lconstb) {
667 if ((n % 8) == 0) {
668 tx->lconstb = REALLOC(tx->lconstb,
669 (n + 0) * sizeof(tx->lconstb[0]),
670 (n + 8) * sizeof(tx->lconstb[0]));
671 assert(tx->lconstb);
672 }
673 tx->num_lconstb++;
674 }
675
676 tx->lconstb[n].idx = index;
677 tx->lconstb[n].reg = tx->native_integers ?
678 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
679 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
680 }
681
682 static inline struct ureg_dst
tx_scratch(struct shader_translator * tx)683 tx_scratch(struct shader_translator *tx)
684 {
685 if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) {
686 tx->failure = TRUE;
687 return tx->regs.t[0];
688 }
689 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
690 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
691 return tx->regs.t[tx->num_scratch++];
692 }
693
694 static inline struct ureg_dst
tx_scratch_scalar(struct shader_translator * tx)695 tx_scratch_scalar(struct shader_translator *tx)
696 {
697 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
698 }
699
700 static inline struct ureg_src
tx_src_scalar(struct ureg_dst dst)701 tx_src_scalar(struct ureg_dst dst)
702 {
703 struct ureg_src src = ureg_src(dst);
704 int c = ffs(dst.WriteMask) - 1;
705 if (dst.WriteMask == (1 << c))
706 src = ureg_scalar(src, c);
707 return src;
708 }
709
710 static inline void
tx_temp_alloc(struct shader_translator * tx,INT idx)711 tx_temp_alloc(struct shader_translator *tx, INT idx)
712 {
713 assert(idx >= 0);
714 if (idx >= tx->num_temp) {
715 unsigned k = tx->num_temp;
716 unsigned n = idx + 1;
717 tx->regs.r = REALLOC(tx->regs.r,
718 k * sizeof(tx->regs.r[0]),
719 n * sizeof(tx->regs.r[0]));
720 for (; k < n; ++k)
721 tx->regs.r[k] = ureg_dst_undef();
722 tx->num_temp = n;
723 }
724 if (ureg_dst_is_undef(tx->regs.r[idx]))
725 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
726 }
727
728 static inline void
tx_addr_alloc(struct shader_translator * tx,INT idx)729 tx_addr_alloc(struct shader_translator *tx, INT idx)
730 {
731 assert(idx == 0);
732 if (ureg_dst_is_undef(tx->regs.address))
733 tx->regs.address = ureg_DECL_address(tx->ureg);
734 if (ureg_dst_is_undef(tx->regs.a0))
735 tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
736 }
737
738 static inline void
tx_pred_alloc(struct shader_translator * tx,INT idx)739 tx_pred_alloc(struct shader_translator *tx, INT idx)
740 {
741 assert(idx == 0);
742 if (ureg_dst_is_undef(tx->regs.p))
743 tx->regs.p = ureg_DECL_predicate(tx->ureg);
744 }
745
746 /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
747 * the projection should be applied on the texture. It doesn't
748 * apply on texkill.
749 * The doc is very imprecise here (it says the projection is done
750 * before rasterization, thus in vs, which seems wrong since ps instructions
751 * are affected differently)
752 * For now we only apply to the ps TEX instruction and TEXBEM.
753 * Perhaps some other instructions would need it */
754 static inline void
apply_ps1x_projection(struct shader_translator * tx,struct ureg_dst dst,struct ureg_src src,INT idx)755 apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
756 struct ureg_src src, INT idx)
757 {
758 struct ureg_dst tmp;
759 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
760
761 /* no projection */
762 if (dim == 1) {
763 ureg_MOV(tx->ureg, dst, src);
764 } else {
765 tmp = tx_scratch_scalar(tx);
766 ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
767 ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
768 }
769 }
770
771 static inline void
TEX_with_ps1x_projection(struct shader_translator * tx,struct ureg_dst dst,unsigned target,struct ureg_src src0,struct ureg_src src1,INT idx)772 TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
773 unsigned target, struct ureg_src src0,
774 struct ureg_src src1, INT idx)
775 {
776 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
777 struct ureg_dst tmp;
778
779 /* dim == 1: no projection
780 * Looks like must be disabled when it makes no
781 * sense according the texture dimensions
782 */
783 if (dim == 1 || dim <= target) {
784 ureg_TEX(tx->ureg, dst, target, src0, src1);
785 } else if (dim == 4) {
786 ureg_TXP(tx->ureg, dst, target, src0, src1);
787 } else {
788 tmp = tx_scratch(tx);
789 apply_ps1x_projection(tx, tmp, src0, idx);
790 ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
791 }
792 }
793
794 static inline void
tx_texcoord_alloc(struct shader_translator * tx,INT idx)795 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
796 {
797 assert(IS_PS);
798 assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT));
799 if (ureg_src_is_undef(tx->regs.vT[idx]))
800 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
801 TGSI_INTERPOLATE_PERSPECTIVE);
802 }
803
804 static inline unsigned *
tx_bgnloop(struct shader_translator * tx)805 tx_bgnloop(struct shader_translator *tx)
806 {
807 tx->loop_depth++;
808 if (tx->loop_depth_max < tx->loop_depth)
809 tx->loop_depth_max = tx->loop_depth;
810 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
811 return &tx->loop_labels[tx->loop_depth - 1];
812 }
813
814 static inline unsigned *
tx_endloop(struct shader_translator * tx)815 tx_endloop(struct shader_translator *tx)
816 {
817 assert(tx->loop_depth);
818 tx->loop_depth--;
819 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
820 ureg_get_instruction_number(tx->ureg));
821 return &tx->loop_labels[tx->loop_depth];
822 }
823
824 static struct ureg_dst
tx_get_loopctr(struct shader_translator * tx,boolean loop_or_rep)825 tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep)
826 {
827 const unsigned l = tx->loop_depth - 1;
828
829 if (!tx->loop_depth)
830 {
831 DBG("loop counter requested outside of loop\n");
832 return ureg_dst_undef();
833 }
834
835 if (ureg_dst_is_undef(tx->regs.rL[l])) {
836 /* loop or rep ctr creation */
837 tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
838 tx->loop_or_rep[l] = loop_or_rep;
839 }
840 /* loop - rep - endloop - endrep not allowed */
841 assert(tx->loop_or_rep[l] == loop_or_rep);
842
843 return tx->regs.rL[l];
844 }
845
846 static struct ureg_src
tx_get_loopal(struct shader_translator * tx)847 tx_get_loopal(struct shader_translator *tx)
848 {
849 int loop_level = tx->loop_depth - 1;
850
851 while (loop_level >= 0) {
852 /* handle loop - rep - endrep - endloop case */
853 if (tx->loop_or_rep[loop_level])
854 /* the value is in the loop counter y component (nine implementation) */
855 return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y);
856 loop_level--;
857 }
858
859 DBG("aL counter requested outside of loop\n");
860 return ureg_src_undef();
861 }
862
863 static inline unsigned *
tx_cond(struct shader_translator * tx)864 tx_cond(struct shader_translator *tx)
865 {
866 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
867 tx->cond_depth++;
868 return &tx->cond_labels[tx->cond_depth - 1];
869 }
870
871 static inline unsigned *
tx_elsecond(struct shader_translator * tx)872 tx_elsecond(struct shader_translator *tx)
873 {
874 assert(tx->cond_depth);
875 return &tx->cond_labels[tx->cond_depth - 1];
876 }
877
878 static inline void
tx_endcond(struct shader_translator * tx)879 tx_endcond(struct shader_translator *tx)
880 {
881 assert(tx->cond_depth);
882 tx->cond_depth--;
883 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
884 ureg_get_instruction_number(tx->ureg));
885 }
886
887 static inline struct ureg_dst
nine_ureg_dst_register(unsigned file,int index)888 nine_ureg_dst_register(unsigned file, int index)
889 {
890 return ureg_dst(ureg_src_register(file, index));
891 }
892
893 static inline struct ureg_src
nine_get_position_input(struct shader_translator * tx)894 nine_get_position_input(struct shader_translator *tx)
895 {
896 struct ureg_program *ureg = tx->ureg;
897
898 if (tx->wpos_is_sysval)
899 return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
900 else
901 return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION,
902 0, TGSI_INTERPOLATE_LINEAR);
903 }
904
905 static struct ureg_src
tx_src_param(struct shader_translator * tx,const struct sm1_src_param * param)906 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
907 {
908 struct ureg_program *ureg = tx->ureg;
909 struct ureg_src src;
910 struct ureg_dst tmp;
911
912 switch (param->file)
913 {
914 case D3DSPR_TEMP:
915 assert(!param->rel);
916 tx_temp_alloc(tx, param->idx);
917 src = ureg_src(tx->regs.r[param->idx]);
918 break;
919 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
920 case D3DSPR_ADDR:
921 assert(!param->rel);
922 if (IS_VS) {
923 assert(param->idx == 0);
924 /* the address register (vs only) must be
925 * assigned before use */
926 assert(!ureg_dst_is_undef(tx->regs.a0));
927 /* Round to lowest for vs1.1 (contrary to the doc), else
928 * round to nearest */
929 if (tx->version.major < 2 && tx->version.minor < 2)
930 ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
931 else
932 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
933 src = ureg_src(tx->regs.address);
934 } else {
935 if (tx->version.major < 2 && tx->version.minor < 4) {
936 /* no subroutines, so should be defined */
937 src = ureg_src(tx->regs.tS[param->idx]);
938 } else {
939 tx_texcoord_alloc(tx, param->idx);
940 src = tx->regs.vT[param->idx];
941 }
942 }
943 break;
944 case D3DSPR_INPUT:
945 if (IS_VS) {
946 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
947 } else {
948 if (tx->version.major < 3) {
949 assert(!param->rel);
950 src = ureg_DECL_fs_input_cyl_centroid(
951 ureg, TGSI_SEMANTIC_COLOR, param->idx,
952 TGSI_INTERPOLATE_COLOR, 0,
953 tx->info->force_color_in_centroid ?
954 TGSI_INTERPOLATE_LOC_CENTROID : 0,
955 0, 1);
956 } else {
957 if(param->rel) {
958 /* Copy all inputs (non consecutive)
959 * to temp array (consecutive).
960 * This is not good for performance.
961 * A better way would be to have inputs
962 * consecutive (would need implement alternative
963 * way to match vs outputs and ps inputs).
964 * However even with the better way, the temp array
965 * copy would need to be used if some inputs
966 * are not GENERIC or if they have different
967 * interpolation flag. */
968 if (ureg_src_is_undef(tx->regs.v_consecutive)) {
969 int i;
970 tx->regs.v_consecutive = ureg_src(ureg_DECL_array_temporary(ureg, 10, 0));
971 for (i = 0; i < 10; i++) {
972 if (!ureg_src_is_undef(tx->regs.v[i]))
973 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), tx->regs.v[i]);
974 else
975 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
976 }
977 }
978 src = ureg_src_array_offset(tx->regs.v_consecutive, param->idx);
979 } else {
980 assert(param->idx < ARRAY_SIZE(tx->regs.v));
981 src = tx->regs.v[param->idx];
982 }
983 }
984 }
985 break;
986 case D3DSPR_PREDICATE:
987 assert(!param->rel);
988 tx_pred_alloc(tx, param->idx);
989 src = ureg_src(tx->regs.p);
990 break;
991 case D3DSPR_SAMPLER:
992 assert(param->mod == NINED3DSPSM_NONE);
993 assert(param->swizzle == NINED3DSP_NOSWIZZLE);
994 assert(!param->rel);
995 src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx);
996 break;
997 case D3DSPR_CONST:
998 assert(!param->rel || IS_VS);
999 if (param->rel)
1000 tx->indirect_const_access = TRUE;
1001 if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
1002 if (!param->rel)
1003 nine_info_mark_const_f_used(tx->info, param->idx);
1004 /* vswp constant handling: we use two buffers
1005 * to fit all the float constants. The special handling
1006 * doesn't need to be elsewhere, because all the instructions
1007 * accessing the constants directly are VS1, and swvp
1008 * is VS >= 2 */
1009 if (IS_VS && tx->info->swvp_on) {
1010 if (!param->rel) {
1011 if (param->idx < 4096) {
1012 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
1013 src = ureg_src_dimension(src, 0);
1014 } else {
1015 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx - 4096);
1016 src = ureg_src_dimension(src, 1);
1017 }
1018 } else {
1019 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx); /* TODO: swvp rel > 4096 */
1020 src = ureg_src_dimension(src, 0);
1021 }
1022 } else
1023 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
1024 }
1025 if (!IS_VS && tx->version.major < 2) {
1026 /* ps 1.X clamps constants */
1027 tmp = tx_scratch(tx);
1028 ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
1029 ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
1030 src = ureg_src(tmp);
1031 }
1032 break;
1033 case D3DSPR_CONST2:
1034 case D3DSPR_CONST3:
1035 case D3DSPR_CONST4:
1036 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
1037 assert(!"CONST2/3/4");
1038 src = ureg_imm1f(ureg, 0.0f);
1039 break;
1040 case D3DSPR_CONSTINT:
1041 /* relative adressing only possible for float constants in vs */
1042 assert(!param->rel);
1043 if (!tx_lconsti(tx, &src, param->idx)) {
1044 nine_info_mark_const_i_used(tx->info, param->idx);
1045 if (IS_VS && tx->info->swvp_on) {
1046 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx);
1047 src = ureg_src_dimension(src, 2);
1048 } else
1049 src = ureg_src_register(TGSI_FILE_CONSTANT,
1050 tx->info->const_i_base + param->idx);
1051 }
1052 break;
1053 case D3DSPR_CONSTBOOL:
1054 assert(!param->rel);
1055 if (!tx_lconstb(tx, &src, param->idx)) {
1056 char r = param->idx / 4;
1057 char s = param->idx & 3;
1058 nine_info_mark_const_b_used(tx->info, param->idx);
1059 if (IS_VS && tx->info->swvp_on) {
1060 src = ureg_src_register(TGSI_FILE_CONSTANT, r);
1061 src = ureg_src_dimension(src, 3);
1062 } else
1063 src = ureg_src_register(TGSI_FILE_CONSTANT,
1064 tx->info->const_b_base + r);
1065 src = ureg_swizzle(src, s, s, s, s);
1066 }
1067 break;
1068 case D3DSPR_LOOP:
1069 if (ureg_dst_is_undef(tx->regs.address))
1070 tx->regs.address = ureg_DECL_address(ureg);
1071 if (!tx->native_integers)
1072 ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx));
1073 else
1074 ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx));
1075 src = ureg_src(tx->regs.address);
1076 break;
1077 case D3DSPR_MISCTYPE:
1078 switch (param->idx) {
1079 case D3DSMO_POSITION:
1080 if (ureg_src_is_undef(tx->regs.vPos))
1081 tx->regs.vPos = nine_get_position_input(tx);
1082 if (tx->shift_wpos) {
1083 /* TODO: do this only once */
1084 struct ureg_dst wpos = tx_scratch(tx);
1085 ureg_ADD(ureg, wpos, tx->regs.vPos,
1086 ureg_imm4f(ureg, -0.5f, -0.5f, 0.0f, 0.0f));
1087 src = ureg_src(wpos);
1088 } else {
1089 src = tx->regs.vPos;
1090 }
1091 break;
1092 case D3DSMO_FACE:
1093 if (ureg_src_is_undef(tx->regs.vFace)) {
1094 if (tx->face_is_sysval_integer) {
1095 tmp = tx_scratch(tx);
1096 tx->regs.vFace =
1097 ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
1098
1099 /* convert bool to float */
1100 ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X),
1101 ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1));
1102 tx->regs.vFace = ureg_src(tmp);
1103 } else {
1104 tx->regs.vFace = ureg_DECL_fs_input(ureg,
1105 TGSI_SEMANTIC_FACE, 0,
1106 TGSI_INTERPOLATE_CONSTANT);
1107 }
1108 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
1109 }
1110 src = tx->regs.vFace;
1111 break;
1112 default:
1113 assert(!"invalid src D3DSMO");
1114 break;
1115 }
1116 assert(!param->rel);
1117 break;
1118 case D3DSPR_TEMPFLOAT16:
1119 break;
1120 default:
1121 assert(!"invalid src D3DSPR");
1122 }
1123 if (param->rel)
1124 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1125
1126 switch (param->mod) {
1127 case NINED3DSPSM_DW:
1128 tmp = tx_scratch(tx);
1129 /* NOTE: app is not allowed to read w with this modifier */
1130 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), src);
1131 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
1132 src = ureg_src(tmp);
1133 break;
1134 case NINED3DSPSM_DZ:
1135 tmp = tx_scratch(tx);
1136 /* NOTE: app is not allowed to read z with this modifier */
1137 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), src);
1138 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
1139 src = ureg_src(tmp);
1140 break;
1141 default:
1142 break;
1143 }
1144
1145 if (param->swizzle != NINED3DSP_NOSWIZZLE)
1146 src = ureg_swizzle(src,
1147 (param->swizzle >> 0) & 0x3,
1148 (param->swizzle >> 2) & 0x3,
1149 (param->swizzle >> 4) & 0x3,
1150 (param->swizzle >> 6) & 0x3);
1151
1152 switch (param->mod) {
1153 case NINED3DSPSM_ABS:
1154 src = ureg_abs(src);
1155 break;
1156 case NINED3DSPSM_ABSNEG:
1157 src = ureg_negate(ureg_abs(src));
1158 break;
1159 case NINED3DSPSM_NEG:
1160 src = ureg_negate(src);
1161 break;
1162 case NINED3DSPSM_BIAS:
1163 tmp = tx_scratch(tx);
1164 ureg_ADD(ureg, tmp, src, ureg_imm1f(ureg, -0.5f));
1165 src = ureg_src(tmp);
1166 break;
1167 case NINED3DSPSM_BIASNEG:
1168 tmp = tx_scratch(tx);
1169 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 0.5f), ureg_negate(src));
1170 src = ureg_src(tmp);
1171 break;
1172 case NINED3DSPSM_NOT:
1173 if (tx->native_integers) {
1174 tmp = tx_scratch(tx);
1175 ureg_NOT(ureg, tmp, src);
1176 src = ureg_src(tmp);
1177 break;
1178 }
1179 /* fall through */
1180 case NINED3DSPSM_COMP:
1181 tmp = tx_scratch(tx);
1182 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
1183 src = ureg_src(tmp);
1184 break;
1185 case NINED3DSPSM_DZ:
1186 case NINED3DSPSM_DW:
1187 /* Already handled*/
1188 break;
1189 case NINED3DSPSM_SIGN:
1190 tmp = tx_scratch(tx);
1191 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1192 src = ureg_src(tmp);
1193 break;
1194 case NINED3DSPSM_SIGNNEG:
1195 tmp = tx_scratch(tx);
1196 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
1197 src = ureg_src(tmp);
1198 break;
1199 case NINED3DSPSM_X2:
1200 tmp = tx_scratch(tx);
1201 ureg_ADD(ureg, tmp, src, src);
1202 src = ureg_src(tmp);
1203 break;
1204 case NINED3DSPSM_X2NEG:
1205 tmp = tx_scratch(tx);
1206 ureg_ADD(ureg, tmp, src, src);
1207 src = ureg_negate(ureg_src(tmp));
1208 break;
1209 default:
1210 assert(param->mod == NINED3DSPSM_NONE);
1211 break;
1212 }
1213
1214 return src;
1215 }
1216
1217 static struct ureg_dst
_tx_dst_param(struct shader_translator * tx,const struct sm1_dst_param * param)1218 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1219 {
1220 struct ureg_dst dst;
1221
1222 switch (param->file)
1223 {
1224 case D3DSPR_TEMP:
1225 assert(!param->rel);
1226 tx_temp_alloc(tx, param->idx);
1227 dst = tx->regs.r[param->idx];
1228 break;
1229 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1230 case D3DSPR_ADDR:
1231 assert(!param->rel);
1232 if (tx->version.major < 2 && !IS_VS) {
1233 if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1234 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1235 dst = tx->regs.tS[param->idx];
1236 } else
1237 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1238 tx_texcoord_alloc(tx, param->idx);
1239 dst = ureg_dst(tx->regs.vT[param->idx]);
1240 } else {
1241 tx_addr_alloc(tx, param->idx);
1242 dst = tx->regs.a0;
1243 }
1244 break;
1245 case D3DSPR_RASTOUT:
1246 assert(!param->rel);
1247 switch (param->idx) {
1248 case 0:
1249 if (ureg_dst_is_undef(tx->regs.oPos))
1250 tx->regs.oPos =
1251 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
1252 dst = tx->regs.oPos;
1253 break;
1254 case 1:
1255 if (ureg_dst_is_undef(tx->regs.oFog))
1256 tx->regs.oFog =
1257 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0));
1258 dst = tx->regs.oFog;
1259 break;
1260 case 2:
1261 if (ureg_dst_is_undef(tx->regs.oPts))
1262 tx->regs.oPts = ureg_DECL_temporary(tx->ureg);
1263 dst = tx->regs.oPts;
1264 break;
1265 default:
1266 assert(0);
1267 break;
1268 }
1269 break;
1270 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1271 case D3DSPR_OUTPUT:
1272 if (tx->version.major < 3) {
1273 assert(!param->rel);
1274 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1275 } else {
1276 assert(!param->rel); /* TODO */
1277 assert(param->idx < ARRAY_SIZE(tx->regs.o));
1278 dst = tx->regs.o[param->idx];
1279 }
1280 break;
1281 case D3DSPR_ATTROUT: /* VS */
1282 case D3DSPR_COLOROUT: /* PS */
1283 assert(param->idx >= 0 && param->idx < 4);
1284 assert(!param->rel);
1285 tx->info->rt_mask |= 1 << param->idx;
1286 if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
1287 /* ps < 3: oCol[0] will have fog blending afterward */
1288 if (!IS_VS && tx->version.major < 3 && param->idx == 0) {
1289 tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
1290 } else {
1291 tx->regs.oCol[param->idx] =
1292 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1293 }
1294 }
1295 dst = tx->regs.oCol[param->idx];
1296 if (IS_VS && tx->version.major < 3)
1297 dst = ureg_saturate(dst);
1298 break;
1299 case D3DSPR_DEPTHOUT:
1300 assert(!param->rel);
1301 if (ureg_dst_is_undef(tx->regs.oDepth))
1302 tx->regs.oDepth =
1303 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1304 TGSI_WRITEMASK_Z, 0, 1);
1305 dst = tx->regs.oDepth; /* XXX: must write .z component */
1306 break;
1307 case D3DSPR_PREDICATE:
1308 assert(!param->rel);
1309 tx_pred_alloc(tx, param->idx);
1310 dst = tx->regs.p;
1311 break;
1312 case D3DSPR_TEMPFLOAT16:
1313 DBG("unhandled D3DSPR: %u\n", param->file);
1314 break;
1315 default:
1316 assert(!"invalid dst D3DSPR");
1317 break;
1318 }
1319 if (param->rel)
1320 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1321
1322 if (param->mask != NINED3DSP_WRITEMASK_ALL)
1323 dst = ureg_writemask(dst, param->mask);
1324 if (param->mod & NINED3DSPDM_SATURATE)
1325 dst = ureg_saturate(dst);
1326
1327 return dst;
1328 }
1329
1330 static struct ureg_dst
tx_dst_param(struct shader_translator * tx,const struct sm1_dst_param * param)1331 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1332 {
1333 if (param->shift) {
1334 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1335 return tx->regs.tdst;
1336 }
1337 return _tx_dst_param(tx, param);
1338 }
1339
1340 static void
tx_apply_dst0_modifiers(struct shader_translator * tx)1341 tx_apply_dst0_modifiers(struct shader_translator *tx)
1342 {
1343 struct ureg_dst rdst;
1344 float f;
1345
1346 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1347 return;
1348 rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1349
1350 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1351
1352 if (tx->insn.dst[0].shift < 0)
1353 f = 1.0f / (1 << -tx->insn.dst[0].shift);
1354 else
1355 f = 1 << tx->insn.dst[0].shift;
1356
1357 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1358 }
1359
1360 static struct ureg_src
tx_dst_param_as_src(struct shader_translator * tx,const struct sm1_dst_param * param)1361 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1362 {
1363 struct ureg_src src;
1364
1365 assert(!param->shift);
1366 assert(!(param->mod & NINED3DSPDM_SATURATE));
1367
1368 switch (param->file) {
1369 case D3DSPR_INPUT:
1370 if (IS_VS) {
1371 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1372 } else {
1373 assert(!param->rel);
1374 assert(param->idx < ARRAY_SIZE(tx->regs.v));
1375 src = tx->regs.v[param->idx];
1376 }
1377 break;
1378 default:
1379 src = ureg_src(tx_dst_param(tx, param));
1380 break;
1381 }
1382 if (param->rel)
1383 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1384
1385 if (!param->mask)
1386 WARN("mask is 0, using identity swizzle\n");
1387
1388 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1389 char s[4];
1390 int n;
1391 int c;
1392 for (n = 0, c = 0; c < 4; ++c)
1393 if (param->mask & (1 << c))
1394 s[n++] = c;
1395 assert(n);
1396 for (c = n; c < 4; ++c)
1397 s[c] = s[n - 1];
1398 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1399 }
1400 return src;
1401 }
1402
1403 static HRESULT
NineTranslateInstruction_Mkxn(struct shader_translator * tx,const unsigned k,const unsigned n)1404 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1405 {
1406 struct ureg_program *ureg = tx->ureg;
1407 struct ureg_dst dst;
1408 struct ureg_src src[2];
1409 struct sm1_src_param *src_mat = &tx->insn.src[1];
1410 unsigned i;
1411
1412 dst = tx_dst_param(tx, &tx->insn.dst[0]);
1413 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1414
1415 for (i = 0; i < n; i++)
1416 {
1417 const unsigned m = (1 << i);
1418
1419 src[1] = tx_src_param(tx, src_mat);
1420 src_mat->idx++;
1421
1422 if (!(dst.WriteMask & m))
1423 continue;
1424
1425 /* XXX: src == dst case ? */
1426
1427 switch (k) {
1428 case 3:
1429 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1430 break;
1431 case 4:
1432 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1433 break;
1434 default:
1435 DBG("invalid operation: M%ux%u\n", m, n);
1436 break;
1437 }
1438 }
1439
1440 return D3D_OK;
1441 }
1442
1443 #define VNOTSUPPORTED 0, 0
1444 #define V(maj, min) (((maj) << 8) | (min))
1445
1446 static inline const char *
d3dsio_to_string(unsigned opcode)1447 d3dsio_to_string( unsigned opcode )
1448 {
1449 static const char *names[] = {
1450 "NOP",
1451 "MOV",
1452 "ADD",
1453 "SUB",
1454 "MAD",
1455 "MUL",
1456 "RCP",
1457 "RSQ",
1458 "DP3",
1459 "DP4",
1460 "MIN",
1461 "MAX",
1462 "SLT",
1463 "SGE",
1464 "EXP",
1465 "LOG",
1466 "LIT",
1467 "DST",
1468 "LRP",
1469 "FRC",
1470 "M4x4",
1471 "M4x3",
1472 "M3x4",
1473 "M3x3",
1474 "M3x2",
1475 "CALL",
1476 "CALLNZ",
1477 "LOOP",
1478 "RET",
1479 "ENDLOOP",
1480 "LABEL",
1481 "DCL",
1482 "POW",
1483 "CRS",
1484 "SGN",
1485 "ABS",
1486 "NRM",
1487 "SINCOS",
1488 "REP",
1489 "ENDREP",
1490 "IF",
1491 "IFC",
1492 "ELSE",
1493 "ENDIF",
1494 "BREAK",
1495 "BREAKC",
1496 "MOVA",
1497 "DEFB",
1498 "DEFI",
1499 NULL,
1500 NULL,
1501 NULL,
1502 NULL,
1503 NULL,
1504 NULL,
1505 NULL,
1506 NULL,
1507 NULL,
1508 NULL,
1509 NULL,
1510 NULL,
1511 NULL,
1512 NULL,
1513 NULL,
1514 "TEXCOORD",
1515 "TEXKILL",
1516 "TEX",
1517 "TEXBEM",
1518 "TEXBEML",
1519 "TEXREG2AR",
1520 "TEXREG2GB",
1521 "TEXM3x2PAD",
1522 "TEXM3x2TEX",
1523 "TEXM3x3PAD",
1524 "TEXM3x3TEX",
1525 NULL,
1526 "TEXM3x3SPEC",
1527 "TEXM3x3VSPEC",
1528 "EXPP",
1529 "LOGP",
1530 "CND",
1531 "DEF",
1532 "TEXREG2RGB",
1533 "TEXDP3TEX",
1534 "TEXM3x2DEPTH",
1535 "TEXDP3",
1536 "TEXM3x3",
1537 "TEXDEPTH",
1538 "CMP",
1539 "BEM",
1540 "DP2ADD",
1541 "DSX",
1542 "DSY",
1543 "TEXLDD",
1544 "SETP",
1545 "TEXLDL",
1546 "BREAKP"
1547 };
1548
1549 if (opcode < ARRAY_SIZE(names)) return names[opcode];
1550
1551 switch (opcode) {
1552 case D3DSIO_PHASE: return "PHASE";
1553 case D3DSIO_COMMENT: return "COMMENT";
1554 case D3DSIO_END: return "END";
1555 default:
1556 return NULL;
1557 }
1558 }
1559
1560 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1561 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1562 (inst).vert_version.max | \
1563 (inst).frag_version.min | \
1564 (inst).frag_version.max)
1565
1566 #define SPECIAL(name) \
1567 NineTranslateInstruction_##name
1568
1569 #define DECL_SPECIAL(name) \
1570 static HRESULT \
1571 NineTranslateInstruction_##name( struct shader_translator *tx )
1572
1573 static HRESULT
1574 NineTranslateInstruction_Generic(struct shader_translator *);
1575
DECL_SPECIAL(NOP)1576 DECL_SPECIAL(NOP)
1577 {
1578 /* Nothing to do. NOP was used to avoid hangs
1579 * with very old d3d drivers. */
1580 return D3D_OK;
1581 }
1582
DECL_SPECIAL(SUB)1583 DECL_SPECIAL(SUB)
1584 {
1585 struct ureg_program *ureg = tx->ureg;
1586 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1587 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1588 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1589
1590 ureg_ADD(ureg, dst, src0, ureg_negate(src1));
1591 return D3D_OK;
1592 }
1593
DECL_SPECIAL(ABS)1594 DECL_SPECIAL(ABS)
1595 {
1596 struct ureg_program *ureg = tx->ureg;
1597 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1598 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1599
1600 ureg_MOV(ureg, dst, ureg_abs(src));
1601 return D3D_OK;
1602 }
1603
DECL_SPECIAL(M4x4)1604 DECL_SPECIAL(M4x4)
1605 {
1606 return NineTranslateInstruction_Mkxn(tx, 4, 4);
1607 }
1608
DECL_SPECIAL(M4x3)1609 DECL_SPECIAL(M4x3)
1610 {
1611 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1612 }
1613
DECL_SPECIAL(M3x4)1614 DECL_SPECIAL(M3x4)
1615 {
1616 return NineTranslateInstruction_Mkxn(tx, 3, 4);
1617 }
1618
DECL_SPECIAL(M3x3)1619 DECL_SPECIAL(M3x3)
1620 {
1621 return NineTranslateInstruction_Mkxn(tx, 3, 3);
1622 }
1623
DECL_SPECIAL(M3x2)1624 DECL_SPECIAL(M3x2)
1625 {
1626 return NineTranslateInstruction_Mkxn(tx, 3, 2);
1627 }
1628
DECL_SPECIAL(CMP)1629 DECL_SPECIAL(CMP)
1630 {
1631 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1632 tx_src_param(tx, &tx->insn.src[0]),
1633 tx_src_param(tx, &tx->insn.src[2]),
1634 tx_src_param(tx, &tx->insn.src[1]));
1635 return D3D_OK;
1636 }
1637
DECL_SPECIAL(CND)1638 DECL_SPECIAL(CND)
1639 {
1640 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1641 struct ureg_dst cgt;
1642 struct ureg_src cnd;
1643
1644 /* the coissue flag was a tip for compilers to advise to
1645 * execute two operations at the same time, in cases
1646 * the two executions had same dst with different channels.
1647 * It has no effect on current hw. However it seems CND
1648 * is affected. The handling of this very specific case
1649 * handled below mimick wine behaviour */
1650 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1651 ureg_MOV(tx->ureg,
1652 dst, tx_src_param(tx, &tx->insn.src[1]));
1653 return D3D_OK;
1654 }
1655
1656 cnd = tx_src_param(tx, &tx->insn.src[0]);
1657 cgt = tx_scratch(tx);
1658
1659 if (tx->version.major == 1 && tx->version.minor < 4)
1660 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1661
1662 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1663
1664 ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1665 tx_src_param(tx, &tx->insn.src[1]),
1666 tx_src_param(tx, &tx->insn.src[2]));
1667 return D3D_OK;
1668 }
1669
DECL_SPECIAL(CALL)1670 DECL_SPECIAL(CALL)
1671 {
1672 assert(tx->insn.src[0].idx < tx->num_inst_labels);
1673 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1674 return D3D_OK;
1675 }
1676
DECL_SPECIAL(CALLNZ)1677 DECL_SPECIAL(CALLNZ)
1678 {
1679 struct ureg_program *ureg = tx->ureg;
1680 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1681
1682 if (!tx->native_integers)
1683 ureg_IF(ureg, src, tx_cond(tx));
1684 else
1685 ureg_UIF(ureg, src, tx_cond(tx));
1686 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1687 tx_endcond(tx);
1688 ureg_ENDIF(ureg);
1689 return D3D_OK;
1690 }
1691
DECL_SPECIAL(LOOP)1692 DECL_SPECIAL(LOOP)
1693 {
1694 struct ureg_program *ureg = tx->ureg;
1695 unsigned *label;
1696 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1697 struct ureg_dst ctr;
1698 struct ureg_dst tmp;
1699 struct ureg_src ctrx;
1700
1701 label = tx_bgnloop(tx);
1702 ctr = tx_get_loopctr(tx, TRUE);
1703 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1704
1705 /* src: num_iterations - start_value of al - step for al - 0 */
1706 ureg_MOV(ureg, ctr, src);
1707 ureg_BGNLOOP(tx->ureg, label);
1708 tmp = tx_scratch_scalar(tx);
1709 /* Initially ctr.x contains the number of iterations.
1710 * ctr.y will contain the updated value of al.
1711 * We decrease ctr.x at the end of every iteration,
1712 * and stop when it reaches 0. */
1713
1714 if (!tx->native_integers) {
1715 /* case src and ctr contain floats */
1716 /* to avoid precision issue, we stop when ctr <= 0.5 */
1717 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1718 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1719 } else {
1720 /* case src and ctr contain integers */
1721 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1722 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1723 }
1724 ureg_BRK(ureg);
1725 tx_endcond(tx);
1726 ureg_ENDIF(ureg);
1727 return D3D_OK;
1728 }
1729
DECL_SPECIAL(RET)1730 DECL_SPECIAL(RET)
1731 {
1732 ureg_RET(tx->ureg);
1733 return D3D_OK;
1734 }
1735
DECL_SPECIAL(ENDLOOP)1736 DECL_SPECIAL(ENDLOOP)
1737 {
1738 struct ureg_program *ureg = tx->ureg;
1739 struct ureg_dst ctr = tx_get_loopctr(tx, TRUE);
1740 struct ureg_dst dst_ctrx, dst_al;
1741 struct ureg_src src_ctr, al_counter;
1742
1743 dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1744 dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1);
1745 src_ctr = ureg_src(ctr);
1746 al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z);
1747
1748 /* ctr.x -= 1
1749 * ctr.y (aL) += step */
1750 if (!tx->native_integers) {
1751 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1752 ureg_ADD(ureg, dst_al, src_ctr, al_counter);
1753 } else {
1754 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1755 ureg_UADD(ureg, dst_al, src_ctr, al_counter);
1756 }
1757 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1758 return D3D_OK;
1759 }
1760
DECL_SPECIAL(LABEL)1761 DECL_SPECIAL(LABEL)
1762 {
1763 unsigned k = tx->num_inst_labels;
1764 unsigned n = tx->insn.src[0].idx;
1765 assert(n < 2048);
1766 if (n >= k)
1767 tx->inst_labels = REALLOC(tx->inst_labels,
1768 k * sizeof(tx->inst_labels[0]),
1769 n * sizeof(tx->inst_labels[0]));
1770
1771 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1772 return D3D_OK;
1773 }
1774
DECL_SPECIAL(SINCOS)1775 DECL_SPECIAL(SINCOS)
1776 {
1777 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1778 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1779
1780 assert(!(dst.WriteMask & 0xc));
1781
1782 dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */
1783 ureg_SCS(tx->ureg, dst, src);
1784 return D3D_OK;
1785 }
1786
DECL_SPECIAL(SGN)1787 DECL_SPECIAL(SGN)
1788 {
1789 ureg_SSG(tx->ureg,
1790 tx_dst_param(tx, &tx->insn.dst[0]),
1791 tx_src_param(tx, &tx->insn.src[0]));
1792 return D3D_OK;
1793 }
1794
DECL_SPECIAL(REP)1795 DECL_SPECIAL(REP)
1796 {
1797 struct ureg_program *ureg = tx->ureg;
1798 unsigned *label;
1799 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1800 struct ureg_dst ctr;
1801 struct ureg_dst tmp;
1802 struct ureg_src ctrx;
1803
1804 label = tx_bgnloop(tx);
1805 ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0);
1806 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1807
1808 /* NOTE: rep must be constant, so we don't have to save the count */
1809 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1810
1811 /* rep: num_iterations - 0 - 0 - 0 */
1812 ureg_MOV(ureg, ctr, rep);
1813 ureg_BGNLOOP(ureg, label);
1814 tmp = tx_scratch_scalar(tx);
1815 /* Initially ctr.x contains the number of iterations.
1816 * We decrease ctr.x at the end of every iteration,
1817 * and stop when it reaches 0. */
1818
1819 if (!tx->native_integers) {
1820 /* case src and ctr contain floats */
1821 /* to avoid precision issue, we stop when ctr <= 0.5 */
1822 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1823 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1824 } else {
1825 /* case src and ctr contain integers */
1826 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1827 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1828 }
1829 ureg_BRK(ureg);
1830 tx_endcond(tx);
1831 ureg_ENDIF(ureg);
1832
1833 return D3D_OK;
1834 }
1835
DECL_SPECIAL(ENDREP)1836 DECL_SPECIAL(ENDREP)
1837 {
1838 struct ureg_program *ureg = tx->ureg;
1839 struct ureg_dst ctr = tx_get_loopctr(tx, FALSE);
1840 struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1841 struct ureg_src src_ctr = ureg_src(ctr);
1842
1843 /* ctr.x -= 1 */
1844 if (!tx->native_integers)
1845 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1846 else
1847 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1848
1849 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1850 return D3D_OK;
1851 }
1852
DECL_SPECIAL(ENDIF)1853 DECL_SPECIAL(ENDIF)
1854 {
1855 tx_endcond(tx);
1856 ureg_ENDIF(tx->ureg);
1857 return D3D_OK;
1858 }
1859
DECL_SPECIAL(IF)1860 DECL_SPECIAL(IF)
1861 {
1862 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1863
1864 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
1865 ureg_UIF(tx->ureg, src, tx_cond(tx));
1866 else
1867 ureg_IF(tx->ureg, src, tx_cond(tx));
1868
1869 return D3D_OK;
1870 }
1871
1872 static inline unsigned
sm1_insn_flags_to_tgsi_setop(BYTE flags)1873 sm1_insn_flags_to_tgsi_setop(BYTE flags)
1874 {
1875 switch (flags) {
1876 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
1877 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
1878 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
1879 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
1880 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
1881 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
1882 default:
1883 assert(!"invalid comparison flags");
1884 return TGSI_OPCODE_SGT;
1885 }
1886 }
1887
DECL_SPECIAL(IFC)1888 DECL_SPECIAL(IFC)
1889 {
1890 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1891 struct ureg_src src[2];
1892 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1893 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1894 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1895 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1896 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1897 return D3D_OK;
1898 }
1899
DECL_SPECIAL(ELSE)1900 DECL_SPECIAL(ELSE)
1901 {
1902 ureg_ELSE(tx->ureg, tx_elsecond(tx));
1903 return D3D_OK;
1904 }
1905
DECL_SPECIAL(BREAKC)1906 DECL_SPECIAL(BREAKC)
1907 {
1908 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
1909 struct ureg_src src[2];
1910 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
1911 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1912 src[1] = tx_src_param(tx, &tx->insn.src[1]);
1913 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2);
1914 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
1915 ureg_BRK(tx->ureg);
1916 tx_endcond(tx);
1917 ureg_ENDIF(tx->ureg);
1918 return D3D_OK;
1919 }
1920
1921 static const char *sm1_declusage_names[] =
1922 {
1923 [D3DDECLUSAGE_POSITION] = "POSITION",
1924 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
1925 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
1926 [D3DDECLUSAGE_NORMAL] = "NORMAL",
1927 [D3DDECLUSAGE_PSIZE] = "PSIZE",
1928 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
1929 [D3DDECLUSAGE_TANGENT] = "TANGENT",
1930 [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
1931 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
1932 [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
1933 [D3DDECLUSAGE_COLOR] = "COLOR",
1934 [D3DDECLUSAGE_FOG] = "FOG",
1935 [D3DDECLUSAGE_DEPTH] = "DEPTH",
1936 [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
1937 };
1938
1939 static inline unsigned
sm1_to_nine_declusage(struct sm1_semantic * dcl)1940 sm1_to_nine_declusage(struct sm1_semantic *dcl)
1941 {
1942 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
1943 }
1944
1945 static void
sm1_declusage_to_tgsi(struct tgsi_declaration_semantic * sem,boolean tc,struct sm1_semantic * dcl)1946 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
1947 boolean tc,
1948 struct sm1_semantic *dcl)
1949 {
1950 BYTE index = dcl->usage_idx;
1951
1952 /* For everything that is not matching to a TGSI_SEMANTIC_****,
1953 * we match to a TGSI_SEMANTIC_GENERIC with index.
1954 *
1955 * The index can be anything UINT16 and usage_idx is BYTE,
1956 * so we can fit everything. It doesn't matter if indices
1957 * are close together or low.
1958 *
1959 *
1960 * POSITION >= 1: 10 * index + 6
1961 * COLOR >= 2: 10 * (index-1) + 7
1962 * TEXCOORD[0..15]: index
1963 * BLENDWEIGHT: 10 * index + 18
1964 * BLENDINDICES: 10 * index + 19
1965 * NORMAL: 10 * index + 20
1966 * TANGENT: 10 * index + 21
1967 * BINORMAL: 10 * index + 22
1968 * TESSFACTOR: 10 * index + 23
1969 */
1970
1971 switch (dcl->usage) {
1972 case D3DDECLUSAGE_POSITION:
1973 case D3DDECLUSAGE_POSITIONT:
1974 case D3DDECLUSAGE_DEPTH:
1975 if (index == 0) {
1976 sem->Name = TGSI_SEMANTIC_POSITION;
1977 sem->Index = 0;
1978 } else {
1979 sem->Name = TGSI_SEMANTIC_GENERIC;
1980 sem->Index = 10 * index + 6;
1981 }
1982 break;
1983 case D3DDECLUSAGE_COLOR:
1984 if (index < 2) {
1985 sem->Name = TGSI_SEMANTIC_COLOR;
1986 sem->Index = index;
1987 } else {
1988 sem->Name = TGSI_SEMANTIC_GENERIC;
1989 sem->Index = 10 * (index-1) + 7;
1990 }
1991 break;
1992 case D3DDECLUSAGE_FOG:
1993 assert(index == 0);
1994 sem->Name = TGSI_SEMANTIC_FOG;
1995 sem->Index = 0;
1996 break;
1997 case D3DDECLUSAGE_PSIZE:
1998 assert(index == 0);
1999 sem->Name = TGSI_SEMANTIC_PSIZE;
2000 sem->Index = 0;
2001 break;
2002 case D3DDECLUSAGE_TEXCOORD:
2003 assert(index < 16);
2004 if (index < 8 && tc)
2005 sem->Name = TGSI_SEMANTIC_TEXCOORD;
2006 else
2007 sem->Name = TGSI_SEMANTIC_GENERIC;
2008 sem->Index = index;
2009 break;
2010 case D3DDECLUSAGE_BLENDWEIGHT:
2011 sem->Name = TGSI_SEMANTIC_GENERIC;
2012 sem->Index = 10 * index + 18;
2013 break;
2014 case D3DDECLUSAGE_BLENDINDICES:
2015 sem->Name = TGSI_SEMANTIC_GENERIC;
2016 sem->Index = 10 * index + 19;
2017 break;
2018 case D3DDECLUSAGE_NORMAL:
2019 sem->Name = TGSI_SEMANTIC_GENERIC;
2020 sem->Index = 10 * index + 20;
2021 break;
2022 case D3DDECLUSAGE_TANGENT:
2023 sem->Name = TGSI_SEMANTIC_GENERIC;
2024 sem->Index = 10 * index + 21;
2025 break;
2026 case D3DDECLUSAGE_BINORMAL:
2027 sem->Name = TGSI_SEMANTIC_GENERIC;
2028 sem->Index = 10 * index + 22;
2029 break;
2030 case D3DDECLUSAGE_TESSFACTOR:
2031 sem->Name = TGSI_SEMANTIC_GENERIC;
2032 sem->Index = 10 * index + 23;
2033 break;
2034 case D3DDECLUSAGE_SAMPLE:
2035 sem->Name = TGSI_SEMANTIC_COUNT;
2036 sem->Index = 0;
2037 break;
2038 default:
2039 unreachable("Invalid DECLUSAGE.");
2040 break;
2041 }
2042 }
2043
2044 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
2045 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
2046 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
2047 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
2048 static inline unsigned
d3dstt_to_tgsi_tex(BYTE sampler_type)2049 d3dstt_to_tgsi_tex(BYTE sampler_type)
2050 {
2051 switch (sampler_type) {
2052 case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
2053 case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
2054 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
2055 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
2056 default:
2057 assert(0);
2058 return TGSI_TEXTURE_UNKNOWN;
2059 }
2060 }
2061 static inline unsigned
d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)2062 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
2063 {
2064 switch (sampler_type) {
2065 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
2066 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
2067 case NINED3DSTT_VOLUME:
2068 case NINED3DSTT_CUBE:
2069 default:
2070 assert(0);
2071 return TGSI_TEXTURE_UNKNOWN;
2072 }
2073 }
2074 static inline unsigned
ps1x_sampler_type(const struct nine_shader_info * info,unsigned stage)2075 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
2076 {
2077 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
2078 case 1: return TGSI_TEXTURE_1D;
2079 case 0: return TGSI_TEXTURE_2D;
2080 case 3: return TGSI_TEXTURE_3D;
2081 default:
2082 return TGSI_TEXTURE_CUBE;
2083 }
2084 }
2085
2086 static const char *
sm1_sampler_type_name(BYTE sampler_type)2087 sm1_sampler_type_name(BYTE sampler_type)
2088 {
2089 switch (sampler_type) {
2090 case NINED3DSTT_1D: return "1D";
2091 case NINED3DSTT_2D: return "2D";
2092 case NINED3DSTT_VOLUME: return "VOLUME";
2093 case NINED3DSTT_CUBE: return "CUBE";
2094 default:
2095 return "(D3DSTT_?)";
2096 }
2097 }
2098
2099 static inline unsigned
nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic * sem)2100 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
2101 {
2102 switch (sem->Name) {
2103 case TGSI_SEMANTIC_POSITION:
2104 case TGSI_SEMANTIC_NORMAL:
2105 return TGSI_INTERPOLATE_LINEAR;
2106 case TGSI_SEMANTIC_BCOLOR:
2107 case TGSI_SEMANTIC_COLOR:
2108 return TGSI_INTERPOLATE_COLOR;
2109 case TGSI_SEMANTIC_FOG:
2110 case TGSI_SEMANTIC_GENERIC:
2111 case TGSI_SEMANTIC_TEXCOORD:
2112 case TGSI_SEMANTIC_CLIPDIST:
2113 case TGSI_SEMANTIC_CLIPVERTEX:
2114 return TGSI_INTERPOLATE_PERSPECTIVE;
2115 case TGSI_SEMANTIC_EDGEFLAG:
2116 case TGSI_SEMANTIC_FACE:
2117 case TGSI_SEMANTIC_INSTANCEID:
2118 case TGSI_SEMANTIC_PCOORD:
2119 case TGSI_SEMANTIC_PRIMID:
2120 case TGSI_SEMANTIC_PSIZE:
2121 case TGSI_SEMANTIC_VERTEXID:
2122 return TGSI_INTERPOLATE_CONSTANT;
2123 default:
2124 assert(0);
2125 return TGSI_INTERPOLATE_CONSTANT;
2126 }
2127 }
2128
DECL_SPECIAL(DCL)2129 DECL_SPECIAL(DCL)
2130 {
2131 struct ureg_program *ureg = tx->ureg;
2132 boolean is_input;
2133 boolean is_sampler;
2134 struct tgsi_declaration_semantic tgsi;
2135 struct sm1_semantic sem;
2136 sm1_read_semantic(tx, &sem);
2137
2138 is_input = sem.reg.file == D3DSPR_INPUT;
2139 is_sampler =
2140 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
2141
2142 DUMP("DCL ");
2143 sm1_dump_dst_param(&sem.reg);
2144 if (is_sampler)
2145 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
2146 else
2147 if (tx->version.major >= 3)
2148 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
2149 else
2150 if (sem.usage | sem.usage_idx)
2151 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
2152 else
2153 DUMP("\n");
2154
2155 if (is_sampler) {
2156 const unsigned m = 1 << sem.reg.idx;
2157 ureg_DECL_sampler(ureg, sem.reg.idx);
2158 tx->info->sampler_mask |= m;
2159 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
2160 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
2161 d3dstt_to_tgsi_tex(sem.sampler_type);
2162 return D3D_OK;
2163 }
2164
2165 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
2166 if (IS_VS) {
2167 if (is_input) {
2168 /* linkage outside of shader with vertex declaration */
2169 ureg_DECL_vs_input(ureg, sem.reg.idx);
2170 assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map));
2171 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
2172 tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1);
2173 /* NOTE: preserving order in case of indirect access */
2174 } else
2175 if (tx->version.major >= 3) {
2176 /* SM2 output semantic determined by file */
2177 assert(sem.reg.mask != 0);
2178 if (sem.usage == D3DDECLUSAGE_POSITIONT)
2179 tx->info->position_t = TRUE;
2180 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o));
2181 assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing");
2182 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
2183 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
2184 nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx);
2185 if (tx->info->process_vertices && sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) {
2186 tx->regs.oPos_out = tx->regs.o[sem.reg.idx];
2187 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2188 tx->regs.oPos = tx->regs.o[sem.reg.idx];
2189 }
2190
2191 if (tgsi.Name == TGSI_SEMANTIC_PSIZE) {
2192 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2193 tx->regs.oPts = tx->regs.o[sem.reg.idx];
2194 }
2195 }
2196 } else {
2197 if (is_input && tx->version.major >= 3) {
2198 unsigned interp_location = 0;
2199 /* SM3 only, SM2 input semantic determined by file */
2200 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v));
2201 assert(ureg_src_is_undef(tx->regs.v[sem.reg.idx]) && "Nine doesn't support yet packing");
2202 /* PositionT and tessfactor forbidden */
2203 if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR)
2204 return D3DERR_INVALIDCALL;
2205
2206 if (tgsi.Name == TGSI_SEMANTIC_POSITION) {
2207 /* Position0 is forbidden (likely because vPos already does that) */
2208 if (sem.usage == D3DDECLUSAGE_POSITION)
2209 return D3DERR_INVALIDCALL;
2210 /* Following code is for depth */
2211 tx->regs.v[sem.reg.idx] = nine_get_position_input(tx);
2212 return D3D_OK;
2213 }
2214
2215 if (sem.reg.mod & NINED3DSPDM_CENTROID ||
2216 (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
2217 interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
2218
2219 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid(
2220 ureg, tgsi.Name, tgsi.Index,
2221 nine_tgsi_to_interp_mode(&tgsi),
2222 0, /* cylwrap */
2223 interp_location, 0, 1);
2224 } else
2225 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
2226 /* FragColor or FragDepth */
2227 assert(sem.reg.mask != 0);
2228 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
2229 0, 1);
2230 }
2231 }
2232 return D3D_OK;
2233 }
2234
DECL_SPECIAL(DEF)2235 DECL_SPECIAL(DEF)
2236 {
2237 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
2238 return D3D_OK;
2239 }
2240
DECL_SPECIAL(DEFB)2241 DECL_SPECIAL(DEFB)
2242 {
2243 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2244 return D3D_OK;
2245 }
2246
DECL_SPECIAL(DEFI)2247 DECL_SPECIAL(DEFI)
2248 {
2249 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2250 return D3D_OK;
2251 }
2252
DECL_SPECIAL(POW)2253 DECL_SPECIAL(POW)
2254 {
2255 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2256 struct ureg_src src[2] = {
2257 tx_src_param(tx, &tx->insn.src[0]),
2258 tx_src_param(tx, &tx->insn.src[1])
2259 };
2260 ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2261 return D3D_OK;
2262 }
2263
DECL_SPECIAL(RSQ)2264 DECL_SPECIAL(RSQ)
2265 {
2266 struct ureg_program *ureg = tx->ureg;
2267 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2268 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2269 struct ureg_dst tmp = tx_scratch(tx);
2270 ureg_RSQ(ureg, tmp, ureg_abs(src));
2271 ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2272 return D3D_OK;
2273 }
2274
DECL_SPECIAL(LOG)2275 DECL_SPECIAL(LOG)
2276 {
2277 struct ureg_program *ureg = tx->ureg;
2278 struct ureg_dst tmp = tx_scratch_scalar(tx);
2279 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2280 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2281 ureg_LG2(ureg, tmp, ureg_abs(src));
2282 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2283 return D3D_OK;
2284 }
2285
DECL_SPECIAL(LIT)2286 DECL_SPECIAL(LIT)
2287 {
2288 struct ureg_program *ureg = tx->ureg;
2289 struct ureg_dst tmp = tx_scratch(tx);
2290 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2291 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2292 ureg_LIT(ureg, tmp, src);
2293 /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2294 * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2295 * it 0^0 if src.w=0, which value is driver dependent. */
2296 ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
2297 ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
2298 ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
2299 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
2300 return D3D_OK;
2301 }
2302
DECL_SPECIAL(NRM)2303 DECL_SPECIAL(NRM)
2304 {
2305 struct ureg_program *ureg = tx->ureg;
2306 struct ureg_dst tmp = tx_scratch_scalar(tx);
2307 struct ureg_src nrm = tx_src_scalar(tmp);
2308 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2309 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2310 ureg_DP3(ureg, tmp, src, src);
2311 ureg_RSQ(ureg, tmp, nrm);
2312 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2313 ureg_MUL(ureg, dst, src, nrm);
2314 return D3D_OK;
2315 }
2316
DECL_SPECIAL(DP2ADD)2317 DECL_SPECIAL(DP2ADD)
2318 {
2319 struct ureg_dst tmp = tx_scratch_scalar(tx);
2320 struct ureg_src dp2 = tx_src_scalar(tmp);
2321 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2322 struct ureg_src src[3];
2323 int i;
2324 for (i = 0; i < 3; ++i)
2325 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2326 assert_replicate_swizzle(&src[2]);
2327
2328 ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2329 ureg_ADD(tx->ureg, dst, src[2], dp2);
2330
2331 return D3D_OK;
2332 }
2333
DECL_SPECIAL(TEXCOORD)2334 DECL_SPECIAL(TEXCOORD)
2335 {
2336 struct ureg_program *ureg = tx->ureg;
2337 const unsigned s = tx->insn.dst[0].idx;
2338 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2339
2340 tx_texcoord_alloc(tx, s);
2341 ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2342 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2343
2344 return D3D_OK;
2345 }
2346
DECL_SPECIAL(TEXCOORD_ps14)2347 DECL_SPECIAL(TEXCOORD_ps14)
2348 {
2349 struct ureg_program *ureg = tx->ureg;
2350 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2351 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2352
2353 assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2354
2355 ureg_MOV(ureg, dst, src);
2356
2357 return D3D_OK;
2358 }
2359
DECL_SPECIAL(TEXKILL)2360 DECL_SPECIAL(TEXKILL)
2361 {
2362 struct ureg_src reg;
2363
2364 if (tx->version.major > 1 || tx->version.minor > 3) {
2365 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2366 } else {
2367 tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2368 reg = tx->regs.vT[tx->insn.dst[0].idx];
2369 }
2370 if (tx->version.major < 2)
2371 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2372 ureg_KILL_IF(tx->ureg, reg);
2373
2374 return D3D_OK;
2375 }
2376
DECL_SPECIAL(TEXBEM)2377 DECL_SPECIAL(TEXBEM)
2378 {
2379 struct ureg_program *ureg = tx->ureg;
2380 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2381 struct ureg_dst tmp, tmp2, texcoord;
2382 struct ureg_src sample, m00, m01, m10, m11;
2383 struct ureg_src bumpenvlscale, bumpenvloffset;
2384 const int m = tx->insn.dst[0].idx;
2385 const int n = tx->insn.src[0].idx;
2386
2387 assert(tx->version.major == 1);
2388
2389 sample = ureg_DECL_sampler(ureg, m);
2390 tx->info->sampler_mask |= 1 << m;
2391
2392 tx_texcoord_alloc(tx, m);
2393
2394 tmp = tx_scratch(tx);
2395 tmp2 = tx_scratch(tx);
2396 texcoord = tx_scratch(tx);
2397 /*
2398 * Bump-env-matrix:
2399 * 00 is X
2400 * 01 is Y
2401 * 10 is Z
2402 * 11 is W
2403 */
2404 nine_info_mark_const_f_used(tx->info, 8 + 8 + m/2);
2405 m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
2406 m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
2407 m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
2408 m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
2409
2410 /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
2411 if (m % 2 == 0) {
2412 bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, X);
2413 bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Y);
2414 } else {
2415 bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Z);
2416 bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, W);
2417 }
2418
2419 apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
2420
2421 /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */
2422 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2423 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
2424 /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
2425 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2426 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
2427 NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2428
2429 /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
2430 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2431 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord));
2432 /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
2433 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2434 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y),
2435 NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2436
2437 /* Now the texture coordinates are in tmp.xy */
2438
2439 if (tx->insn.opcode == D3DSIO_TEXBEM) {
2440 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2441 } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
2442 /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
2443 ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2444 ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Z),
2445 bumpenvlscale, bumpenvloffset);
2446 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
2447 }
2448
2449 tx->info->bumpenvmat_needed = 1;
2450
2451 return D3D_OK;
2452 }
2453
DECL_SPECIAL(TEXREG2AR)2454 DECL_SPECIAL(TEXREG2AR)
2455 {
2456 struct ureg_program *ureg = tx->ureg;
2457 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2458 struct ureg_src sample;
2459 const int m = tx->insn.dst[0].idx;
2460 const int n = tx->insn.src[0].idx;
2461 assert(m >= 0 && m > n);
2462
2463 sample = ureg_DECL_sampler(ureg, m);
2464 tx->info->sampler_mask |= 1 << m;
2465 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(W,X,X,X)), sample);
2466
2467 return D3D_OK;
2468 }
2469
DECL_SPECIAL(TEXREG2GB)2470 DECL_SPECIAL(TEXREG2GB)
2471 {
2472 struct ureg_program *ureg = tx->ureg;
2473 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2474 struct ureg_src sample;
2475 const int m = tx->insn.dst[0].idx;
2476 const int n = tx->insn.src[0].idx;
2477 assert(m >= 0 && m > n);
2478
2479 sample = ureg_DECL_sampler(ureg, m);
2480 tx->info->sampler_mask |= 1 << m;
2481 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2482
2483 return D3D_OK;
2484 }
2485
DECL_SPECIAL(TEXM3x2PAD)2486 DECL_SPECIAL(TEXM3x2PAD)
2487 {
2488 return D3D_OK; /* this is just padding */
2489 }
2490
DECL_SPECIAL(TEXM3x2TEX)2491 DECL_SPECIAL(TEXM3x2TEX)
2492 {
2493 struct ureg_program *ureg = tx->ureg;
2494 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2495 struct ureg_src sample;
2496 const int m = tx->insn.dst[0].idx - 1;
2497 const int n = tx->insn.src[0].idx;
2498 assert(m >= 0 && m > n);
2499
2500 tx_texcoord_alloc(tx, m);
2501 tx_texcoord_alloc(tx, m+1);
2502
2503 /* performs the matrix multiplication */
2504 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2505 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2506
2507 sample = ureg_DECL_sampler(ureg, m + 1);
2508 tx->info->sampler_mask |= 1 << (m + 1);
2509 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2510
2511 return D3D_OK;
2512 }
2513
DECL_SPECIAL(TEXM3x3PAD)2514 DECL_SPECIAL(TEXM3x3PAD)
2515 {
2516 return D3D_OK; /* this is just padding */
2517 }
2518
DECL_SPECIAL(TEXM3x3SPEC)2519 DECL_SPECIAL(TEXM3x3SPEC)
2520 {
2521 struct ureg_program *ureg = tx->ureg;
2522 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2523 struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2524 struct ureg_src sample;
2525 struct ureg_dst tmp;
2526 const int m = tx->insn.dst[0].idx - 2;
2527 const int n = tx->insn.src[0].idx;
2528 assert(m >= 0 && m > n);
2529
2530 tx_texcoord_alloc(tx, m);
2531 tx_texcoord_alloc(tx, m+1);
2532 tx_texcoord_alloc(tx, m+2);
2533
2534 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2535 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2536 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2537
2538 sample = ureg_DECL_sampler(ureg, m + 2);
2539 tx->info->sampler_mask |= 1 << (m + 2);
2540 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2541
2542 /* At this step, dst = N = (u', w', z').
2543 * We want dst to be the texture sampled at (u'', w'', z''), with
2544 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2545 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2546 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2547 /* at this step tmp.x = 1/N.N */
2548 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2549 /* at this step tmp.y = N.E */
2550 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2551 /* at this step tmp.x = N.E/N.N */
2552 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2553 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2554 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2555 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(E));
2556 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2557
2558 return D3D_OK;
2559 }
2560
DECL_SPECIAL(TEXREG2RGB)2561 DECL_SPECIAL(TEXREG2RGB)
2562 {
2563 struct ureg_program *ureg = tx->ureg;
2564 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2565 struct ureg_src sample;
2566 const int m = tx->insn.dst[0].idx;
2567 const int n = tx->insn.src[0].idx;
2568 assert(m >= 0 && m > n);
2569
2570 sample = ureg_DECL_sampler(ureg, m);
2571 tx->info->sampler_mask |= 1 << m;
2572 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tx->regs.tS[n]), sample);
2573
2574 return D3D_OK;
2575 }
2576
DECL_SPECIAL(TEXDP3TEX)2577 DECL_SPECIAL(TEXDP3TEX)
2578 {
2579 struct ureg_program *ureg = tx->ureg;
2580 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2581 struct ureg_dst tmp;
2582 struct ureg_src sample;
2583 const int m = tx->insn.dst[0].idx;
2584 const int n = tx->insn.src[0].idx;
2585 assert(m >= 0 && m > n);
2586
2587 tx_texcoord_alloc(tx, m);
2588
2589 tmp = tx_scratch(tx);
2590 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2591 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2592
2593 sample = ureg_DECL_sampler(ureg, m);
2594 tx->info->sampler_mask |= 1 << m;
2595 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2596
2597 return D3D_OK;
2598 }
2599
DECL_SPECIAL(TEXM3x2DEPTH)2600 DECL_SPECIAL(TEXM3x2DEPTH)
2601 {
2602 struct ureg_program *ureg = tx->ureg;
2603 struct ureg_dst tmp;
2604 const int m = tx->insn.dst[0].idx - 1;
2605 const int n = tx->insn.src[0].idx;
2606 assert(m >= 0 && m > n);
2607
2608 tx_texcoord_alloc(tx, m);
2609 tx_texcoord_alloc(tx, m+1);
2610
2611 tmp = tx_scratch(tx);
2612
2613 /* performs the matrix multiplication */
2614 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2615 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2616
2617 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2618 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2619 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2620 /* res = 'w' == 0 ? 1.0 : z/w */
2621 ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2622 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2623 /* replace the depth for depth testing with the result */
2624 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2625 TGSI_WRITEMASK_Z, 0, 1);
2626 ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2627 /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2628 return D3D_OK;
2629 }
2630
DECL_SPECIAL(TEXDP3)2631 DECL_SPECIAL(TEXDP3)
2632 {
2633 struct ureg_program *ureg = tx->ureg;
2634 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2635 const int m = tx->insn.dst[0].idx;
2636 const int n = tx->insn.src[0].idx;
2637 assert(m >= 0 && m > n);
2638
2639 tx_texcoord_alloc(tx, m);
2640
2641 ureg_DP3(ureg, dst, tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2642
2643 return D3D_OK;
2644 }
2645
DECL_SPECIAL(TEXM3x3)2646 DECL_SPECIAL(TEXM3x3)
2647 {
2648 struct ureg_program *ureg = tx->ureg;
2649 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2650 struct ureg_src sample;
2651 struct ureg_dst E, tmp;
2652 const int m = tx->insn.dst[0].idx - 2;
2653 const int n = tx->insn.src[0].idx;
2654 assert(m >= 0 && m > n);
2655
2656 tx_texcoord_alloc(tx, m);
2657 tx_texcoord_alloc(tx, m+1);
2658 tx_texcoord_alloc(tx, m+2);
2659
2660 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n]));
2661 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n]));
2662 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n]));
2663
2664 switch (tx->insn.opcode) {
2665 case D3DSIO_TEXM3x3:
2666 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2667 break;
2668 case D3DSIO_TEXM3x3TEX:
2669 sample = ureg_DECL_sampler(ureg, m + 2);
2670 tx->info->sampler_mask |= 1 << (m + 2);
2671 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2672 break;
2673 case D3DSIO_TEXM3x3VSPEC:
2674 sample = ureg_DECL_sampler(ureg, m + 2);
2675 tx->info->sampler_mask |= 1 << (m + 2);
2676 E = tx_scratch(tx);
2677 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2678 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2679 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2680 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2681 /* At this step, dst = N = (u', w', z').
2682 * We want dst to be the texture sampled at (u'', w'', z''), with
2683 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2684 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2685 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2686 /* at this step tmp.x = 1/N.N */
2687 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2688 /* at this step tmp.y = N.E */
2689 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2690 /* at this step tmp.x = N.E/N.N */
2691 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2692 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2693 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2694 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(ureg_src(E)));
2695 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2696 break;
2697 default:
2698 return D3DERR_INVALIDCALL;
2699 }
2700 return D3D_OK;
2701 }
2702
DECL_SPECIAL(TEXDEPTH)2703 DECL_SPECIAL(TEXDEPTH)
2704 {
2705 struct ureg_program *ureg = tx->ureg;
2706 struct ureg_dst r5;
2707 struct ureg_src r5r, r5g;
2708
2709 assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2710
2711 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2712 * r5 won't be used afterward, thus we can use r5.ba */
2713 r5 = tx->regs.r[5];
2714 r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2715 r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2716
2717 ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2718 ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2719 /* r5.r = r/g */
2720 ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2721 r5r, ureg_imm1f(ureg, 1.0f));
2722 /* replace the depth for depth testing with the result */
2723 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2724 TGSI_WRITEMASK_Z, 0, 1);
2725 ureg_MOV(ureg, tx->regs.oDepth, r5r);
2726
2727 return D3D_OK;
2728 }
2729
DECL_SPECIAL(BEM)2730 DECL_SPECIAL(BEM)
2731 {
2732 struct ureg_program *ureg = tx->ureg;
2733 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2734 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
2735 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
2736 struct ureg_src m00, m01, m10, m11;
2737 const int m = tx->insn.dst[0].idx;
2738 struct ureg_dst tmp;
2739 /*
2740 * Bump-env-matrix:
2741 * 00 is X
2742 * 01 is Y
2743 * 10 is Z
2744 * 11 is W
2745 */
2746 nine_info_mark_const_f_used(tx->info, 8 + m);
2747 m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X);
2748 m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y);
2749 m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z);
2750 m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W);
2751 /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */
2752 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2753 NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
2754 /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
2755 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2756 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2757
2758 /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
2759 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2760 NINE_APPLY_SWIZZLE(src1, X), src0);
2761 /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
2762 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2763 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2764 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
2765
2766 tx->info->bumpenvmat_needed = 1;
2767
2768 return D3D_OK;
2769 }
2770
DECL_SPECIAL(TEXLD)2771 DECL_SPECIAL(TEXLD)
2772 {
2773 struct ureg_program *ureg = tx->ureg;
2774 unsigned target;
2775 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2776 struct ureg_src src[2] = {
2777 tx_src_param(tx, &tx->insn.src[0]),
2778 tx_src_param(tx, &tx->insn.src[1])
2779 };
2780 assert(tx->insn.src[1].idx >= 0 &&
2781 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2782 target = tx->sampler_targets[tx->insn.src[1].idx];
2783
2784 switch (tx->insn.flags) {
2785 case 0:
2786 ureg_TEX(ureg, dst, target, src[0], src[1]);
2787 break;
2788 case NINED3DSI_TEXLD_PROJECT:
2789 ureg_TXP(ureg, dst, target, src[0], src[1]);
2790 break;
2791 case NINED3DSI_TEXLD_BIAS:
2792 ureg_TXB(ureg, dst, target, src[0], src[1]);
2793 break;
2794 default:
2795 assert(0);
2796 return D3DERR_INVALIDCALL;
2797 }
2798 return D3D_OK;
2799 }
2800
DECL_SPECIAL(TEXLD_14)2801 DECL_SPECIAL(TEXLD_14)
2802 {
2803 struct ureg_program *ureg = tx->ureg;
2804 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2805 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2806 const unsigned s = tx->insn.dst[0].idx;
2807 const unsigned t = ps1x_sampler_type(tx->info, s);
2808
2809 tx->info->sampler_mask |= 1 << s;
2810 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
2811
2812 return D3D_OK;
2813 }
2814
DECL_SPECIAL(TEX)2815 DECL_SPECIAL(TEX)
2816 {
2817 struct ureg_program *ureg = tx->ureg;
2818 const unsigned s = tx->insn.dst[0].idx;
2819 const unsigned t = ps1x_sampler_type(tx->info, s);
2820 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2821 struct ureg_src src[2];
2822
2823 tx_texcoord_alloc(tx, s);
2824
2825 src[0] = tx->regs.vT[s];
2826 src[1] = ureg_DECL_sampler(ureg, s);
2827 tx->info->sampler_mask |= 1 << s;
2828
2829 TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
2830
2831 return D3D_OK;
2832 }
2833
DECL_SPECIAL(TEXLDD)2834 DECL_SPECIAL(TEXLDD)
2835 {
2836 unsigned target;
2837 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2838 struct ureg_src src[4] = {
2839 tx_src_param(tx, &tx->insn.src[0]),
2840 tx_src_param(tx, &tx->insn.src[1]),
2841 tx_src_param(tx, &tx->insn.src[2]),
2842 tx_src_param(tx, &tx->insn.src[3])
2843 };
2844 assert(tx->insn.src[1].idx >= 0 &&
2845 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2846 target = tx->sampler_targets[tx->insn.src[1].idx];
2847
2848 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
2849 return D3D_OK;
2850 }
2851
DECL_SPECIAL(TEXLDL)2852 DECL_SPECIAL(TEXLDL)
2853 {
2854 unsigned target;
2855 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2856 struct ureg_src src[2] = {
2857 tx_src_param(tx, &tx->insn.src[0]),
2858 tx_src_param(tx, &tx->insn.src[1])
2859 };
2860 assert(tx->insn.src[1].idx >= 0 &&
2861 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
2862 target = tx->sampler_targets[tx->insn.src[1].idx];
2863
2864 ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
2865 return D3D_OK;
2866 }
2867
DECL_SPECIAL(SETP)2868 DECL_SPECIAL(SETP)
2869 {
2870 STUB(D3DERR_INVALIDCALL);
2871 }
2872
DECL_SPECIAL(BREAKP)2873 DECL_SPECIAL(BREAKP)
2874 {
2875 STUB(D3DERR_INVALIDCALL);
2876 }
2877
DECL_SPECIAL(PHASE)2878 DECL_SPECIAL(PHASE)
2879 {
2880 return D3D_OK; /* we don't care about phase */
2881 }
2882
DECL_SPECIAL(COMMENT)2883 DECL_SPECIAL(COMMENT)
2884 {
2885 return D3D_OK; /* nothing to do */
2886 }
2887
2888
2889 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
2890 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
2891
2892 struct sm1_op_info inst_table[] =
2893 {
2894 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP)), /* 0 */
2895 _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
2896 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
2897 _OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */
2898 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
2899 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
2900 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */
2901 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
2902 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
2903 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
2904 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
2905 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
2906 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
2907 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
2908 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
2909 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
2910 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
2911 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
2912 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
2913 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
2914
2915 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
2916 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
2917 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
2918 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
2919 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
2920
2921 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
2922 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
2923 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
2924 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
2925 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
2926 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
2927
2928 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
2929
2930 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
2931 _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
2932 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
2933 _OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)),
2934 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
2935
2936 _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
2937 _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
2938
2939 /* More flow control */
2940 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
2941 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
2942 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
2943 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
2944 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
2945 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
2946 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
2947 _OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
2948 /* we don't write to the address register, but a normal register (copied
2949 * when needed to the address register), thus we don't use ARR */
2950 _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2951
2952 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
2953 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
2954
2955 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
2956 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
2957 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
2958 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
2959 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
2960 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
2961 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2962 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
2963 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
2964 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
2965 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
2966 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
2967 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
2968 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2969 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
2970 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2971
2972 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
2973 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
2974 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
2975 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
2976
2977 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
2978
2979 /* More tex stuff */
2980 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
2981 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
2982 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
2983 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
2984 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
2985 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
2986
2987 /* Misc */
2988 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
2989 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
2990 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
2991 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2992 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
2993 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
2994 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
2995 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
2996 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
2997 };
2998
2999 struct sm1_op_info inst_phase =
3000 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
3001
3002 struct sm1_op_info inst_comment =
3003 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
3004
3005 static void
create_op_info_map(struct shader_translator * tx)3006 create_op_info_map(struct shader_translator *tx)
3007 {
3008 const unsigned version = (tx->version.major << 8) | tx->version.minor;
3009 unsigned i;
3010
3011 for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i)
3012 tx->op_info_map[i] = -1;
3013
3014 if (tx->processor == PIPE_SHADER_VERTEX) {
3015 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3016 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3017 if (inst_table[i].vert_version.min <= version &&
3018 inst_table[i].vert_version.max >= version)
3019 tx->op_info_map[inst_table[i].sio] = i;
3020 }
3021 } else {
3022 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3023 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3024 if (inst_table[i].frag_version.min <= version &&
3025 inst_table[i].frag_version.max >= version)
3026 tx->op_info_map[inst_table[i].sio] = i;
3027 }
3028 }
3029 }
3030
3031 static inline HRESULT
NineTranslateInstruction_Generic(struct shader_translator * tx)3032 NineTranslateInstruction_Generic(struct shader_translator *tx)
3033 {
3034 struct ureg_dst dst[1];
3035 struct ureg_src src[4];
3036 unsigned i;
3037
3038 for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i)
3039 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
3040 for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i)
3041 src[i] = tx_src_param(tx, &tx->insn.src[i]);
3042
3043 ureg_insn(tx->ureg, tx->insn.info->opcode,
3044 dst, tx->insn.ndst,
3045 src, tx->insn.nsrc);
3046 return D3D_OK;
3047 }
3048
3049 static inline DWORD
TOKEN_PEEK(struct shader_translator * tx)3050 TOKEN_PEEK(struct shader_translator *tx)
3051 {
3052 return *(tx->parse);
3053 }
3054
3055 static inline DWORD
TOKEN_NEXT(struct shader_translator * tx)3056 TOKEN_NEXT(struct shader_translator *tx)
3057 {
3058 return *(tx->parse)++;
3059 }
3060
3061 static inline void
TOKEN_JUMP(struct shader_translator * tx)3062 TOKEN_JUMP(struct shader_translator *tx)
3063 {
3064 if (tx->parse_next && tx->parse != tx->parse_next) {
3065 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
3066 tx->parse = tx->parse_next;
3067 }
3068 }
3069
3070 static inline boolean
sm1_parse_eof(struct shader_translator * tx)3071 sm1_parse_eof(struct shader_translator *tx)
3072 {
3073 return TOKEN_PEEK(tx) == NINED3DSP_END;
3074 }
3075
3076 static void
sm1_read_version(struct shader_translator * tx)3077 sm1_read_version(struct shader_translator *tx)
3078 {
3079 const DWORD tok = TOKEN_NEXT(tx);
3080
3081 tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
3082 tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
3083
3084 switch (tok >> 16) {
3085 case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break;
3086 case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break;
3087 default:
3088 DBG("Invalid shader type: %x\n", tok);
3089 tx->processor = ~0;
3090 break;
3091 }
3092 }
3093
3094 /* This is just to check if we parsed the instruction properly. */
3095 static void
sm1_parse_get_skip(struct shader_translator * tx)3096 sm1_parse_get_skip(struct shader_translator *tx)
3097 {
3098 const DWORD tok = TOKEN_PEEK(tx);
3099
3100 if (tx->version.major >= 2) {
3101 tx->parse_next = tx->parse + 1 /* this */ +
3102 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
3103 } else {
3104 tx->parse_next = NULL; /* TODO: determine from param count */
3105 }
3106 }
3107
3108 static void
sm1_print_comment(const char * comment,UINT size)3109 sm1_print_comment(const char *comment, UINT size)
3110 {
3111 if (!size)
3112 return;
3113 /* TODO */
3114 }
3115
3116 static void
sm1_parse_comments(struct shader_translator * tx,BOOL print)3117 sm1_parse_comments(struct shader_translator *tx, BOOL print)
3118 {
3119 DWORD tok = TOKEN_PEEK(tx);
3120
3121 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
3122 {
3123 const char *comment = "";
3124 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
3125 tx->parse += size + 1;
3126
3127 if (print)
3128 sm1_print_comment(comment, size);
3129
3130 tok = TOKEN_PEEK(tx);
3131 }
3132 }
3133
3134 static void
sm1_parse_get_param(struct shader_translator * tx,DWORD * reg,DWORD * rel)3135 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
3136 {
3137 *reg = TOKEN_NEXT(tx);
3138
3139 if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
3140 {
3141 if (tx->version.major < 2)
3142 *rel = (1 << 31) |
3143 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
3144 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
3145 D3DSP_NOSWIZZLE;
3146 else
3147 *rel = TOKEN_NEXT(tx);
3148 }
3149 }
3150
3151 static void
sm1_parse_dst_param(struct sm1_dst_param * dst,DWORD tok)3152 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
3153 {
3154 int8_t shift;
3155 dst->file =
3156 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
3157 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
3158 dst->type = TGSI_RETURN_TYPE_FLOAT;
3159 dst->idx = tok & D3DSP_REGNUM_MASK;
3160 dst->rel = NULL;
3161 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
3162 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
3163 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
3164 dst->shift = (shift & 0x7) - (shift & 0x8);
3165 }
3166
3167 static void
sm1_parse_src_param(struct sm1_src_param * src,DWORD tok)3168 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
3169 {
3170 src->file =
3171 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
3172 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
3173 src->type = TGSI_RETURN_TYPE_FLOAT;
3174 src->idx = tok & D3DSP_REGNUM_MASK;
3175 src->rel = NULL;
3176 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
3177 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
3178
3179 switch (src->file) {
3180 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
3181 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
3182 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
3183 default:
3184 break;
3185 }
3186 }
3187
3188 static void
sm1_parse_immediate(struct shader_translator * tx,struct sm1_src_param * imm)3189 sm1_parse_immediate(struct shader_translator *tx,
3190 struct sm1_src_param *imm)
3191 {
3192 imm->file = NINED3DSPR_IMMEDIATE;
3193 imm->idx = INT_MIN;
3194 imm->rel = NULL;
3195 imm->swizzle = NINED3DSP_NOSWIZZLE;
3196 imm->mod = 0;
3197 switch (tx->insn.opcode) {
3198 case D3DSIO_DEF:
3199 imm->type = NINED3DSPTYPE_FLOAT4;
3200 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3201 tx->parse += 4;
3202 break;
3203 case D3DSIO_DEFI:
3204 imm->type = NINED3DSPTYPE_INT4;
3205 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3206 tx->parse += 4;
3207 break;
3208 case D3DSIO_DEFB:
3209 imm->type = NINED3DSPTYPE_BOOL;
3210 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
3211 tx->parse += 1;
3212 break;
3213 default:
3214 assert(0);
3215 break;
3216 }
3217 }
3218
3219 static void
sm1_read_dst_param(struct shader_translator * tx,struct sm1_dst_param * dst,struct sm1_src_param * rel)3220 sm1_read_dst_param(struct shader_translator *tx,
3221 struct sm1_dst_param *dst,
3222 struct sm1_src_param *rel)
3223 {
3224 DWORD tok_dst, tok_rel = 0;
3225
3226 sm1_parse_get_param(tx, &tok_dst, &tok_rel);
3227 sm1_parse_dst_param(dst, tok_dst);
3228 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
3229 sm1_parse_src_param(rel, tok_rel);
3230 dst->rel = rel;
3231 }
3232 }
3233
3234 static void
sm1_read_src_param(struct shader_translator * tx,struct sm1_src_param * src,struct sm1_src_param * rel)3235 sm1_read_src_param(struct shader_translator *tx,
3236 struct sm1_src_param *src,
3237 struct sm1_src_param *rel)
3238 {
3239 DWORD tok_src, tok_rel = 0;
3240
3241 sm1_parse_get_param(tx, &tok_src, &tok_rel);
3242 sm1_parse_src_param(src, tok_src);
3243 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
3244 assert(rel);
3245 sm1_parse_src_param(rel, tok_rel);
3246 src->rel = rel;
3247 }
3248 }
3249
3250 static void
sm1_read_semantic(struct shader_translator * tx,struct sm1_semantic * sem)3251 sm1_read_semantic(struct shader_translator *tx,
3252 struct sm1_semantic *sem)
3253 {
3254 const DWORD tok_usg = TOKEN_NEXT(tx);
3255 const DWORD tok_dst = TOKEN_NEXT(tx);
3256
3257 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
3258 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
3259 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
3260
3261 sm1_parse_dst_param(&sem->reg, tok_dst);
3262 }
3263
3264 static void
sm1_parse_instruction(struct shader_translator * tx)3265 sm1_parse_instruction(struct shader_translator *tx)
3266 {
3267 struct sm1_instruction *insn = &tx->insn;
3268 HRESULT hr;
3269 DWORD tok;
3270 struct sm1_op_info *info = NULL;
3271 unsigned i;
3272
3273 sm1_parse_comments(tx, TRUE);
3274 sm1_parse_get_skip(tx);
3275
3276 tok = TOKEN_NEXT(tx);
3277
3278 insn->opcode = tok & D3DSI_OPCODE_MASK;
3279 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
3280 insn->coissue = !!(tok & D3DSI_COISSUE);
3281 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
3282
3283 if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) {
3284 int k = tx->op_info_map[insn->opcode];
3285 if (k >= 0) {
3286 assert(k < ARRAY_SIZE(inst_table));
3287 info = &inst_table[k];
3288 }
3289 } else {
3290 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
3291 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
3292 }
3293 if (!info) {
3294 DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
3295 TOKEN_JUMP(tx);
3296 return;
3297 }
3298 insn->info = info;
3299 insn->ndst = info->ndst;
3300 insn->nsrc = info->nsrc;
3301
3302 assert(!insn->predicated && "TODO: predicated instructions");
3303
3304 /* check version */
3305 {
3306 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
3307 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
3308 unsigned ver = (tx->version.major << 8) | tx->version.minor;
3309 if (ver < min || ver > max) {
3310 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
3311 min, ver, max);
3312 return;
3313 }
3314 }
3315
3316 for (i = 0; i < insn->ndst; ++i)
3317 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
3318 if (insn->predicated)
3319 sm1_read_src_param(tx, &insn->pred, NULL);
3320 for (i = 0; i < insn->nsrc; ++i)
3321 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
3322
3323 /* parse here so we can dump them before processing */
3324 if (insn->opcode == D3DSIO_DEF ||
3325 insn->opcode == D3DSIO_DEFI ||
3326 insn->opcode == D3DSIO_DEFB)
3327 sm1_parse_immediate(tx, &tx->insn.src[0]);
3328
3329 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
3330 sm1_instruction_check(insn);
3331
3332 if (info->handler)
3333 hr = info->handler(tx);
3334 else
3335 hr = NineTranslateInstruction_Generic(tx);
3336 tx_apply_dst0_modifiers(tx);
3337
3338 if (hr != D3D_OK)
3339 tx->failure = TRUE;
3340 tx->num_scratch = 0; /* reset */
3341
3342 TOKEN_JUMP(tx);
3343 }
3344
3345 static void
tx_ctor(struct shader_translator * tx,struct nine_shader_info * info)3346 tx_ctor(struct shader_translator *tx, struct nine_shader_info *info)
3347 {
3348 unsigned i;
3349
3350 tx->info = info;
3351
3352 tx->byte_code = info->byte_code;
3353 tx->parse = info->byte_code;
3354
3355 for (i = 0; i < ARRAY_SIZE(info->input_map); ++i)
3356 info->input_map[i] = NINE_DECLUSAGE_NONE;
3357 info->num_inputs = 0;
3358
3359 info->position_t = FALSE;
3360 info->point_size = FALSE;
3361
3362 tx->info->const_float_slots = 0;
3363 tx->info->const_int_slots = 0;
3364 tx->info->const_bool_slots = 0;
3365
3366 info->sampler_mask = 0x0;
3367 info->rt_mask = 0x0;
3368
3369 info->lconstf.data = NULL;
3370 info->lconstf.ranges = NULL;
3371
3372 info->bumpenvmat_needed = 0;
3373
3374 for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) {
3375 tx->regs.rL[i] = ureg_dst_undef();
3376 }
3377 tx->regs.address = ureg_dst_undef();
3378 tx->regs.a0 = ureg_dst_undef();
3379 tx->regs.p = ureg_dst_undef();
3380 tx->regs.oDepth = ureg_dst_undef();
3381 tx->regs.vPos = ureg_src_undef();
3382 tx->regs.vFace = ureg_src_undef();
3383 for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i)
3384 tx->regs.o[i] = ureg_dst_undef();
3385 for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i)
3386 tx->regs.oCol[i] = ureg_dst_undef();
3387 for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i)
3388 tx->regs.vC[i] = ureg_src_undef();
3389 for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i)
3390 tx->regs.vT[i] = ureg_src_undef();
3391
3392 sm1_read_version(tx);
3393
3394 info->version = (tx->version.major << 4) | tx->version.minor;
3395
3396 tx->num_outputs = 0;
3397
3398 create_op_info_map(tx);
3399 }
3400
3401 static void
tx_dtor(struct shader_translator * tx)3402 tx_dtor(struct shader_translator *tx)
3403 {
3404 if (tx->num_inst_labels)
3405 FREE(tx->inst_labels);
3406 FREE(tx->lconstf);
3407 FREE(tx->regs.r);
3408 FREE(tx);
3409 }
3410
3411 /* CONST[0].xyz = width/2, -height/2, zmax-zmin
3412 * CONST[1].xyz = x+width/2, y+height/2, zmin */
3413 static void
shader_add_vs_viewport_transform(struct shader_translator * tx)3414 shader_add_vs_viewport_transform(struct shader_translator *tx)
3415 {
3416 struct ureg_program *ureg = tx->ureg;
3417 struct ureg_src c0 = NINE_CONSTANT_SRC(0);
3418 struct ureg_src c1 = NINE_CONSTANT_SRC(1);
3419 /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/
3420
3421 c0 = ureg_src_dimension(c0, 4);
3422 c1 = ureg_src_dimension(c1, 4);
3423 /* TODO: find out when we need to apply the viewport transformation or not.
3424 * Likely will be XYZ vs XYZRHW in vdecl_out
3425 * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0);
3426 * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1);
3427 */
3428 ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos));
3429 }
3430
3431 static void
shader_add_ps_fog_stage(struct shader_translator * tx,struct ureg_src src_col)3432 shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col)
3433 {
3434 struct ureg_program *ureg = tx->ureg;
3435 struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
3436 struct ureg_src fog_end, fog_coeff, fog_density;
3437 struct ureg_src fog_vs, depth, fog_color;
3438 struct ureg_dst fog_factor;
3439
3440 if (!tx->info->fog_enable) {
3441 ureg_MOV(ureg, oCol0, src_col);
3442 return;
3443 }
3444
3445 if (tx->info->fog_mode != D3DFOG_NONE) {
3446 depth = nine_get_position_input(tx);
3447 depth = ureg_scalar(depth, TGSI_SWIZZLE_Z);
3448 }
3449
3450 nine_info_mark_const_f_used(tx->info, 33);
3451 fog_color = NINE_CONSTANT_SRC(32);
3452 fog_factor = tx_scratch_scalar(tx);
3453
3454 if (tx->info->fog_mode == D3DFOG_LINEAR) {
3455 fog_end = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3456 fog_coeff = NINE_CONSTANT_SRC_SWIZZLE(33, Y);
3457 ureg_ADD(ureg, fog_factor, fog_end, ureg_negate(depth));
3458 ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
3459 } else if (tx->info->fog_mode == D3DFOG_EXP) {
3460 fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3461 ureg_MUL(ureg, fog_factor, depth, fog_density);
3462 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3463 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3464 } else if (tx->info->fog_mode == D3DFOG_EXP2) {
3465 fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X);
3466 ureg_MUL(ureg, fog_factor, depth, fog_density);
3467 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
3468 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3469 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3470 } else {
3471 fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0,
3472 TGSI_INTERPOLATE_PERSPECTIVE),
3473 TGSI_SWIZZLE_X);
3474 ureg_MOV(ureg, fog_factor, fog_vs);
3475 }
3476
3477 ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ),
3478 tx_src_scalar(fog_factor), src_col, fog_color);
3479 ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col);
3480 }
3481
3482 #define GET_CAP(n) screen->get_param( \
3483 screen, PIPE_CAP_##n)
3484 #define GET_SHADER_CAP(n) screen->get_shader_param( \
3485 screen, info->type, PIPE_SHADER_CAP_##n)
3486
3487 HRESULT
nine_translate_shader(struct NineDevice9 * device,struct nine_shader_info * info,struct pipe_context * pipe)3488 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, struct pipe_context *pipe)
3489 {
3490 struct shader_translator *tx;
3491 HRESULT hr = D3D_OK;
3492 const unsigned processor = info->type;
3493 struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen;
3494
3495 user_assert(processor != ~0, D3DERR_INVALIDCALL);
3496
3497 tx = CALLOC_STRUCT(shader_translator);
3498 if (!tx)
3499 return E_OUTOFMEMORY;
3500 tx_ctor(tx, info);
3501
3502 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
3503 hr = D3DERR_INVALIDCALL;
3504 DBG("Unsupported shader version: %u.%u !\n",
3505 tx->version.major, tx->version.minor);
3506 goto out;
3507 }
3508 if (tx->processor != processor) {
3509 hr = D3DERR_INVALIDCALL;
3510 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
3511 goto out;
3512 }
3513 DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS",
3514 tx->version.major, tx->version.minor);
3515
3516 tx->ureg = ureg_create(processor);
3517 if (!tx->ureg) {
3518 hr = E_OUTOFMEMORY;
3519 goto out;
3520 }
3521
3522 tx->native_integers = GET_SHADER_CAP(INTEGERS);
3523 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3524 tx->lower_preds = !GET_SHADER_CAP(MAX_PREDS);
3525 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3526 tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3527 tx->texcoord_sn = tx->want_texcoord ?
3528 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3529 tx->wpos_is_sysval = GET_CAP(TGSI_FS_POSITION_IS_SYSVAL);
3530 tx->face_is_sysval_integer = GET_CAP(TGSI_FS_FACE_IS_INTEGER_SYSVAL);
3531
3532 if (IS_VS) {
3533 tx->num_constf_allowed = NINE_MAX_CONST_F;
3534 } else if (tx->version.major < 2) {/* IS_PS v1 */
3535 tx->num_constf_allowed = 8;
3536 } else if (tx->version.major == 2) {/* IS_PS v2 */
3537 tx->num_constf_allowed = 32;
3538 } else {/* IS_PS v3 */
3539 tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
3540 }
3541
3542 if (tx->version.major < 2) {
3543 tx->num_consti_allowed = 0;
3544 tx->num_constb_allowed = 0;
3545 } else {
3546 tx->num_consti_allowed = NINE_MAX_CONST_I;
3547 tx->num_constb_allowed = NINE_MAX_CONST_B;
3548 }
3549
3550 if (IS_VS && tx->version.major >= 2 && info->swvp_on) {
3551 tx->num_constf_allowed = 8192;
3552 tx->num_consti_allowed = 2048;
3553 tx->num_constb_allowed = 2048;
3554 }
3555
3556 /* VS must always write position. Declare it here to make it the 1st output.
3557 * (Some drivers like nv50 are buggy and rely on that.)
3558 */
3559 if (IS_VS) {
3560 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3561 } else {
3562 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3563 if (!tx->shift_wpos)
3564 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3565 }
3566
3567 while (!sm1_parse_eof(tx) && !tx->failure)
3568 sm1_parse_instruction(tx);
3569 tx->parse++; /* for byte_size */
3570
3571 if (tx->failure) {
3572 /* For VS shaders, we print the warning later,
3573 * we first try with swvp. */
3574 if (IS_PS)
3575 ERR("Encountered buggy shader\n");
3576 ureg_destroy(tx->ureg);
3577 hr = D3DERR_INVALIDCALL;
3578 goto out;
3579 }
3580
3581 if (IS_PS && tx->version.major < 3) {
3582 if (tx->version.major < 2) {
3583 assert(tx->num_temp); /* there must be color output */
3584 info->rt_mask |= 0x1;
3585 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0]));
3586 } else {
3587 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0]));
3588 }
3589 }
3590
3591 if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
3592 tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0);
3593 ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
3594 }
3595
3596 if (info->position_t)
3597 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
3598
3599 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
3600 struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
3601 ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
3602 ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
3603 info->point_size = TRUE;
3604 }
3605
3606 if (info->process_vertices)
3607 shader_add_vs_viewport_transform(tx);
3608
3609 ureg_END(tx->ureg);
3610
3611 /* record local constants */
3612 if (tx->num_lconstf && tx->indirect_const_access) {
3613 struct nine_range *ranges;
3614 float *data;
3615 int *indices;
3616 unsigned i, k, n;
3617
3618 hr = E_OUTOFMEMORY;
3619
3620 data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
3621 if (!data)
3622 goto out;
3623 info->lconstf.data = data;
3624
3625 indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
3626 if (!indices)
3627 goto out;
3628
3629 /* lazy sort, num_lconstf should be small */
3630 for (n = 0; n < tx->num_lconstf; ++n) {
3631 for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
3632 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
3633 k = i;
3634 }
3635 indices[n] = tx->lconstf[k].idx;
3636 memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float));
3637 tx->lconstf[k].idx = INT_MAX;
3638 }
3639
3640 /* count ranges */
3641 for (n = 1, i = 1; i < tx->num_lconstf; ++i)
3642 if (indices[i] != indices[i - 1] + 1)
3643 ++n;
3644 ranges = MALLOC(n * sizeof(ranges[0]));
3645 if (!ranges) {
3646 FREE(indices);
3647 goto out;
3648 }
3649 info->lconstf.ranges = ranges;
3650
3651 k = 0;
3652 ranges[k].bgn = indices[0];
3653 for (i = 1; i < tx->num_lconstf; ++i) {
3654 if (indices[i] != indices[i - 1] + 1) {
3655 ranges[k].next = &ranges[k + 1];
3656 ranges[k].end = indices[i - 1] + 1;
3657 ++k;
3658 ranges[k].bgn = indices[i];
3659 }
3660 }
3661 ranges[k].end = indices[i - 1] + 1;
3662 ranges[k].next = NULL;
3663 assert(n == (k + 1));
3664
3665 FREE(indices);
3666 hr = D3D_OK;
3667 }
3668
3669 /* r500 */
3670 if (info->const_float_slots > device->max_vs_const_f &&
3671 (info->const_int_slots || info->const_bool_slots) &&
3672 (!IS_VS || !info->swvp_on))
3673 ERR("Overlapping constant slots. The shader is likely to be buggy\n");
3674
3675
3676 if (tx->indirect_const_access) /* vs only */
3677 info->const_float_slots = device->max_vs_const_f;
3678
3679 if (!IS_VS || !info->swvp_on) {
3680 unsigned s, slot_max;
3681 unsigned max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f;
3682
3683 slot_max = info->const_bool_slots > 0 ?
3684 max_const_f + NINE_MAX_CONST_I
3685 + DIV_ROUND_UP(info->const_bool_slots, 4) :
3686 info->const_int_slots > 0 ?
3687 max_const_f + info->const_int_slots :
3688 info->const_float_slots;
3689
3690 info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */
3691
3692 for (s = 0; s < slot_max; s++)
3693 ureg_DECL_constant(tx->ureg, s);
3694 } else {
3695 ureg_DECL_constant2D(tx->ureg, 0, 4095, 0);
3696 ureg_DECL_constant2D(tx->ureg, 0, 4095, 1);
3697 ureg_DECL_constant2D(tx->ureg, 0, 2047, 2);
3698 ureg_DECL_constant2D(tx->ureg, 0, 511, 3);
3699 }
3700
3701 if (info->process_vertices)
3702 ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */
3703
3704 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) {
3705 unsigned count;
3706 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count);
3707 tgsi_dump(toks, 0);
3708 ureg_free_tokens(toks);
3709 }
3710
3711 if (info->process_vertices) {
3712 NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out,
3713 tx->output_info,
3714 tx->num_outputs,
3715 &(info->so));
3716 info->cso = ureg_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so));
3717 } else
3718 info->cso = ureg_create_shader_and_destroy(tx->ureg, pipe);
3719 if (!info->cso) {
3720 hr = D3DERR_DRIVERINTERNALERROR;
3721 FREE(info->lconstf.data);
3722 FREE(info->lconstf.ranges);
3723 goto out;
3724 }
3725
3726 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
3727 out:
3728 tx_dtor(tx);
3729 return hr;
3730 }
3731