1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright 2013 Christoph Bumiller
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23
24 #include "nine_shader.h"
25
26 #include "device9.h"
27 #include "nine_debug.h"
28 #include "nine_state.h"
29 #include "vertexdeclaration9.h"
30
31 #include "util/bitscan.h"
32 #include "util/macros.h"
33 #include "util/u_memory.h"
34 #include "util/u_inlines.h"
35 #include "pipe/p_shader_tokens.h"
36 #include "tgsi/tgsi_ureg.h"
37 #include "tgsi/tgsi_dump.h"
38 #include "nir/tgsi_to_nir.h"
39
40 #define DBG_CHANNEL DBG_SHADER
41
42 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args)
43
44
45 struct shader_translator;
46
47 typedef HRESULT (*translate_instruction_func)(struct shader_translator *);
48
49 static inline const char *d3dsio_to_string(unsigned opcode);
50
51
52 #define NINED3D_SM1_VS 0xfffe
53 #define NINED3D_SM1_PS 0xffff
54
55 #define NINE_MAX_COND_DEPTH 64
56 #define NINE_MAX_LOOP_DEPTH 64
57
58 #define NINED3DSP_END 0x0000ffff
59
60 #define NINED3DSPTYPE_FLOAT4 0
61 #define NINED3DSPTYPE_INT4 1
62 #define NINED3DSPTYPE_BOOL 2
63
64 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1)
65
66 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL
67 #define NINED3DSP_WRITEMASK_SHIFT 16
68
69 #define NINED3DSHADER_INST_PREDICATED (1 << 28)
70
71 #define NINED3DSHADER_REL_OP_GT 1
72 #define NINED3DSHADER_REL_OP_EQ 2
73 #define NINED3DSHADER_REL_OP_GE 3
74 #define NINED3DSHADER_REL_OP_LT 4
75 #define NINED3DSHADER_REL_OP_NE 5
76 #define NINED3DSHADER_REL_OP_LE 6
77
78 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16
79 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT)
80
81 #define NINED3DSI_TEXLD_PROJECT 0x1
82 #define NINED3DSI_TEXLD_BIAS 0x2
83
84 #define NINED3DSP_WRITEMASK_0 0x1
85 #define NINED3DSP_WRITEMASK_1 0x2
86 #define NINED3DSP_WRITEMASK_2 0x4
87 #define NINED3DSP_WRITEMASK_3 0x8
88 #define NINED3DSP_WRITEMASK_ALL 0xf
89
90 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6))
91
92 #define NINE_SWIZZLE4(x,y,z,w) \
93 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w
94
95 #define NINE_APPLY_SWIZZLE(src, s) \
96 ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s))
97
98 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT)
99 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT)
100 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT)
101
102 /*
103 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4
104 * BIAS <= PS 1.4 (x-0.5)
105 * BIASNEG <= PS 1.4 (-(x-0.5))
106 * SIGN <= PS 1.4 (2(x-0.5))
107 * SIGNNEG <= PS 1.4 (-2(x-0.5))
108 * COMP <= PS 1.4 (1-x)
109 * X2 = PS 1.4 (2x)
110 * X2NEG = PS 1.4 (-2x)
111 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11
112 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11
113 * ABS >= SM 3.0 (abs(x))
114 * ABSNEG >= SM 3.0 (-abs(x))
115 * NOT >= SM 2.0 pedication only
116 */
117 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT)
118 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT)
119 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT)
120 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT)
121 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT)
122 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT)
123 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT)
124 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT)
125 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT)
126 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT)
127 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT)
128 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT)
129 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT)
130 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT)
131
132 static const char *sm1_mod_str[] =
133 {
134 [NINED3DSPSM_NONE] = "",
135 [NINED3DSPSM_NEG] = "-",
136 [NINED3DSPSM_BIAS] = "bias",
137 [NINED3DSPSM_BIASNEG] = "biasneg",
138 [NINED3DSPSM_SIGN] = "sign",
139 [NINED3DSPSM_SIGNNEG] = "signneg",
140 [NINED3DSPSM_COMP] = "comp",
141 [NINED3DSPSM_X2] = "x2",
142 [NINED3DSPSM_X2NEG] = "x2neg",
143 [NINED3DSPSM_DZ] = "dz",
144 [NINED3DSPSM_DW] = "dw",
145 [NINED3DSPSM_ABS] = "abs",
146 [NINED3DSPSM_ABSNEG] = "-abs",
147 [NINED3DSPSM_NOT] = "not"
148 };
149
150 static void
sm1_dump_writemask(BYTE mask)151 sm1_dump_writemask(BYTE mask)
152 {
153 if (mask & 1) DUMP("x"); else DUMP("_");
154 if (mask & 2) DUMP("y"); else DUMP("_");
155 if (mask & 4) DUMP("z"); else DUMP("_");
156 if (mask & 8) DUMP("w"); else DUMP("_");
157 }
158
159 static void
sm1_dump_swizzle(BYTE s)160 sm1_dump_swizzle(BYTE s)
161 {
162 char c[4] = { 'x', 'y', 'z', 'w' };
163 DUMP("%c%c%c%c",
164 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]);
165 }
166
167 static const char sm1_file_char[] =
168 {
169 [D3DSPR_TEMP] = 'r',
170 [D3DSPR_INPUT] = 'v',
171 [D3DSPR_CONST] = 'c',
172 [D3DSPR_ADDR] = 'A',
173 [D3DSPR_RASTOUT] = 'R',
174 [D3DSPR_ATTROUT] = 'D',
175 [D3DSPR_OUTPUT] = 'o',
176 [D3DSPR_CONSTINT] = 'I',
177 [D3DSPR_COLOROUT] = 'C',
178 [D3DSPR_DEPTHOUT] = 'D',
179 [D3DSPR_SAMPLER] = 's',
180 [D3DSPR_CONST2] = 'c',
181 [D3DSPR_CONST3] = 'c',
182 [D3DSPR_CONST4] = 'c',
183 [D3DSPR_CONSTBOOL] = 'B',
184 [D3DSPR_LOOP] = 'L',
185 [D3DSPR_TEMPFLOAT16] = 'h',
186 [D3DSPR_MISCTYPE] = 'M',
187 [D3DSPR_LABEL] = 'X',
188 [D3DSPR_PREDICATE] = 'p'
189 };
190
191 static void
sm1_dump_reg(BYTE file,INT index)192 sm1_dump_reg(BYTE file, INT index)
193 {
194 switch (file) {
195 case D3DSPR_LOOP:
196 DUMP("aL");
197 break;
198 case D3DSPR_COLOROUT:
199 DUMP("oC%i", index);
200 break;
201 case D3DSPR_DEPTHOUT:
202 DUMP("oDepth");
203 break;
204 case D3DSPR_RASTOUT:
205 DUMP("oRast%i", index);
206 break;
207 case D3DSPR_CONSTINT:
208 DUMP("iconst[%i]", index);
209 break;
210 case D3DSPR_CONSTBOOL:
211 DUMP("bconst[%i]", index);
212 break;
213 default:
214 DUMP("%c%i", sm1_file_char[file], index);
215 break;
216 }
217 }
218
219 struct sm1_src_param
220 {
221 INT idx;
222 struct sm1_src_param *rel;
223 BYTE file;
224 BYTE swizzle;
225 BYTE mod;
226 BYTE type;
227 union {
228 DWORD d[4];
229 float f[4];
230 int i[4];
231 BOOL b;
232 } imm;
233 };
234 static void
235 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *);
236
237 struct sm1_dst_param
238 {
239 INT idx;
240 struct sm1_src_param *rel;
241 BYTE file;
242 BYTE mask;
243 BYTE mod;
244 int8_t shift; /* sint4 */
245 BYTE type;
246 };
247
248 static inline void
assert_replicate_swizzle(const struct ureg_src * reg)249 assert_replicate_swizzle(const struct ureg_src *reg)
250 {
251 assert(reg->SwizzleY == reg->SwizzleX &&
252 reg->SwizzleZ == reg->SwizzleX &&
253 reg->SwizzleW == reg->SwizzleX);
254 }
255
256 static void
sm1_dump_immediate(const struct sm1_src_param * param)257 sm1_dump_immediate(const struct sm1_src_param *param)
258 {
259 switch (param->type) {
260 case NINED3DSPTYPE_FLOAT4:
261 DUMP("{ %f %f %f %f }",
262 param->imm.f[0], param->imm.f[1],
263 param->imm.f[2], param->imm.f[3]);
264 break;
265 case NINED3DSPTYPE_INT4:
266 DUMP("{ %i %i %i %i }",
267 param->imm.i[0], param->imm.i[1],
268 param->imm.i[2], param->imm.i[3]);
269 break;
270 case NINED3DSPTYPE_BOOL:
271 DUMP("%s", param->imm.b ? "TRUE" : "FALSE");
272 break;
273 default:
274 assert(0);
275 break;
276 }
277 }
278
279 static void
sm1_dump_src_param(const struct sm1_src_param * param)280 sm1_dump_src_param(const struct sm1_src_param *param)
281 {
282 if (param->file == NINED3DSPR_IMMEDIATE) {
283 assert(!param->mod &&
284 !param->rel &&
285 param->swizzle == NINED3DSP_NOSWIZZLE);
286 sm1_dump_immediate(param);
287 return;
288 }
289
290 if (param->mod)
291 DUMP("%s(", sm1_mod_str[param->mod]);
292 if (param->rel) {
293 DUMP("%c[", sm1_file_char[param->file]);
294 sm1_dump_src_param(param->rel);
295 DUMP("+%i]", param->idx);
296 } else {
297 sm1_dump_reg(param->file, param->idx);
298 }
299 if (param->mod)
300 DUMP(")");
301 if (param->swizzle != NINED3DSP_NOSWIZZLE) {
302 DUMP(".");
303 sm1_dump_swizzle(param->swizzle);
304 }
305 }
306
307 static void
sm1_dump_dst_param(const struct sm1_dst_param * param)308 sm1_dump_dst_param(const struct sm1_dst_param *param)
309 {
310 if (param->mod & NINED3DSPDM_SATURATE)
311 DUMP("sat ");
312 if (param->mod & NINED3DSPDM_PARTIALP)
313 DUMP("pp ");
314 if (param->mod & NINED3DSPDM_CENTROID)
315 DUMP("centroid ");
316 if (param->shift < 0)
317 DUMP("/%u ", 1 << -param->shift);
318 if (param->shift > 0)
319 DUMP("*%u ", 1 << param->shift);
320
321 if (param->rel) {
322 DUMP("%c[", sm1_file_char[param->file]);
323 sm1_dump_src_param(param->rel);
324 DUMP("+%i]", param->idx);
325 } else {
326 sm1_dump_reg(param->file, param->idx);
327 }
328 if (param->mask != NINED3DSP_WRITEMASK_ALL) {
329 DUMP(".");
330 sm1_dump_writemask(param->mask);
331 }
332 }
333
334 struct sm1_semantic
335 {
336 struct sm1_dst_param reg;
337 BYTE sampler_type;
338 D3DDECLUSAGE usage;
339 BYTE usage_idx;
340 };
341
342 struct sm1_op_info
343 {
344 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter
345 * should be ignored completely */
346 unsigned sio;
347 unsigned opcode; /* TGSI_OPCODE_x */
348
349 /* versions are still set even handler is set */
350 struct {
351 unsigned min;
352 unsigned max;
353 } vert_version, frag_version;
354
355 /* number of regs parsed outside of special handler */
356 unsigned ndst;
357 unsigned nsrc;
358
359 /* some instructions don't map perfectly, so use a special handler */
360 translate_instruction_func handler;
361 };
362
363 struct sm1_instruction
364 {
365 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
366 BYTE flags;
367 BOOL coissue;
368 BOOL predicated;
369 BYTE ndst;
370 BYTE nsrc;
371 struct sm1_src_param src[4];
372 struct sm1_src_param src_rel[4];
373 struct sm1_src_param pred;
374 struct sm1_src_param dst_rel[1];
375 struct sm1_dst_param dst[1];
376
377 const struct sm1_op_info *info;
378 };
379
380 static void
sm1_dump_instruction(struct sm1_instruction * insn,unsigned indent)381 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent)
382 {
383 unsigned i;
384
385 /* no info stored for these: */
386 if (insn->opcode == D3DSIO_DCL)
387 return;
388 for (i = 0; i < indent; ++i)
389 DUMP(" ");
390
391 if (insn->predicated) {
392 DUMP("@");
393 sm1_dump_src_param(&insn->pred);
394 DUMP(" ");
395 }
396 DUMP("%s", d3dsio_to_string(insn->opcode));
397 if (insn->flags) {
398 switch (insn->opcode) {
399 case D3DSIO_TEX:
400 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b");
401 break;
402 default:
403 DUMP("_%x", insn->flags);
404 break;
405 }
406 }
407 if (insn->coissue)
408 DUMP("_co");
409 DUMP(" ");
410
411 for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) {
412 sm1_dump_dst_param(&insn->dst[i]);
413 DUMP(" ");
414 }
415
416 for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) {
417 sm1_dump_src_param(&insn->src[i]);
418 DUMP(" ");
419 }
420 if (insn->opcode == D3DSIO_DEF ||
421 insn->opcode == D3DSIO_DEFI ||
422 insn->opcode == D3DSIO_DEFB)
423 sm1_dump_immediate(&insn->src[0]);
424
425 DUMP("\n");
426 }
427
428 struct sm1_local_const
429 {
430 INT idx;
431 struct ureg_src reg;
432 float f[4]; /* for indirect addressing of float constants */
433 };
434
435 struct shader_translator
436 {
437 const DWORD *byte_code;
438 const DWORD *parse;
439 const DWORD *parse_next;
440
441 struct ureg_program *ureg;
442
443 /* shader version */
444 struct {
445 BYTE major;
446 BYTE minor;
447 } version;
448 unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */
449 unsigned num_constf_allowed;
450 unsigned num_consti_allowed;
451 unsigned num_constb_allowed;
452
453 bool native_integers;
454 bool inline_subroutines;
455 bool want_texcoord;
456 bool shift_wpos;
457 bool wpos_is_sysval;
458 bool face_is_sysval_integer;
459 bool mul_zero_wins;
460 bool always_output_pointsize;
461 bool no_vs_window_space;
462 unsigned texcoord_sn;
463
464 struct sm1_instruction insn; /* current instruction */
465
466 struct {
467 struct ureg_dst *r;
468 struct ureg_dst oPos;
469 struct ureg_dst oPos_out; /* the real output when doing streamout or clipplane emulation */
470 struct ureg_dst oFog;
471 struct ureg_dst oPts;
472 struct ureg_dst oCol[4];
473 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS];
474 struct ureg_dst oDepth;
475 struct ureg_src v[PIPE_MAX_SHADER_INPUTS];
476 struct ureg_src v_consecutive; /* copy in temp array of ps inputs for rel addressing */
477 struct ureg_src vPos;
478 struct ureg_src vFace;
479 struct ureg_src s;
480 struct ureg_dst p;
481 struct ureg_dst address;
482 struct ureg_dst a0;
483 struct ureg_dst predicate;
484 struct ureg_dst predicate_tmp;
485 struct ureg_dst predicate_dst;
486 struct ureg_dst tS[8]; /* texture stage registers */
487 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */
488 struct ureg_dst t[8]; /* scratch TEMPs */
489 struct ureg_src vC[2]; /* PS color in */
490 struct ureg_src vT[8]; /* PS texcoord in */
491 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop/rep ctr */
492 struct ureg_dst aL[NINE_MAX_LOOP_DEPTH]; /* aL emulation */
493 } regs;
494 unsigned num_temp; /* ARRAY_SIZE(regs.r) */
495 unsigned num_scratch;
496 unsigned loop_depth;
497 unsigned loop_depth_max;
498 unsigned cond_depth;
499 unsigned loop_labels[NINE_MAX_LOOP_DEPTH];
500 unsigned cond_labels[NINE_MAX_COND_DEPTH];
501 bool loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */
502 bool predicated_activated;
503
504 unsigned *inst_labels; /* LABEL op */
505 unsigned num_inst_labels;
506
507 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */
508
509 struct sm1_local_const *lconstf;
510 unsigned num_lconstf;
511 struct sm1_local_const *lconsti;
512 unsigned num_lconsti;
513 struct sm1_local_const *lconstb;
514 unsigned num_lconstb;
515
516 bool slots_used[NINE_MAX_CONST_ALL_VS];
517 unsigned *slot_map;
518 unsigned num_slots;
519
520 bool indirect_const_access;
521 bool failure;
522
523 struct nine_vs_output_info output_info[16];
524 int num_outputs;
525
526 struct nine_shader_info *info;
527
528 int16_t op_info_map[D3DSIO_BREAKP + 1];
529 };
530
531 #define IS_VS (tx->processor == PIPE_SHADER_VERTEX)
532 #define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT)
533
534 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;}
535
536 static void
537 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *);
538
539 static void
sm1_instruction_check(const struct sm1_instruction * insn)540 sm1_instruction_check(const struct sm1_instruction *insn)
541 {
542 if (insn->opcode == D3DSIO_CRS)
543 {
544 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3)
545 {
546 DBG("CRS.mask.w\n");
547 }
548 }
549 }
550
551 static void
nine_record_outputs(struct shader_translator * tx,BYTE Usage,BYTE UsageIndex,int mask,int output_index)552 nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex,
553 int mask, int output_index)
554 {
555 tx->output_info[tx->num_outputs].output_semantic = Usage;
556 tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex;
557 tx->output_info[tx->num_outputs].mask = mask;
558 tx->output_info[tx->num_outputs].output_index = output_index;
559 tx->num_outputs++;
560 }
561
nine_float_constant_src(struct shader_translator * tx,int idx)562 static struct ureg_src nine_float_constant_src(struct shader_translator *tx, int idx)
563 {
564 struct ureg_src src;
565
566 if (tx->slot_map)
567 idx = tx->slot_map[idx];
568 /* vswp constant handling: we use two buffers
569 * to fit all the float constants. The special handling
570 * doesn't need to be elsewhere, because all the instructions
571 * accessing the constants directly are VS1, and swvp
572 * is VS >= 2 */
573 if (tx->info->swvp_on && idx >= 4096) {
574 /* TODO: swvp rel is broken if many constants are used */
575 src = ureg_src_register(TGSI_FILE_CONSTANT, idx - 4096);
576 src = ureg_src_dimension(src, 1);
577 } else {
578 src = ureg_src_register(TGSI_FILE_CONSTANT, idx);
579 src = ureg_src_dimension(src, 0);
580 }
581
582 if (!tx->info->swvp_on)
583 tx->slots_used[idx] = true;
584 if (tx->info->const_float_slots < (idx + 1))
585 tx->info->const_float_slots = idx + 1;
586 if (tx->num_slots < (idx + 1))
587 tx->num_slots = idx + 1;
588
589 return src;
590 }
591
nine_integer_constant_src(struct shader_translator * tx,int idx)592 static struct ureg_src nine_integer_constant_src(struct shader_translator *tx, int idx)
593 {
594 struct ureg_src src;
595
596 if (tx->info->swvp_on) {
597 src = ureg_src_register(TGSI_FILE_CONSTANT, idx);
598 src = ureg_src_dimension(src, 2);
599 } else {
600 unsigned slot_idx = tx->info->const_i_base + idx;
601 if (tx->slot_map)
602 slot_idx = tx->slot_map[slot_idx];
603 src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
604 src = ureg_src_dimension(src, 0);
605 tx->slots_used[slot_idx] = true;
606 tx->info->int_slots_used[idx] = true;
607 if (tx->num_slots < (slot_idx + 1))
608 tx->num_slots = slot_idx + 1;
609 }
610
611 if (tx->info->const_int_slots < (idx + 1))
612 tx->info->const_int_slots = idx + 1;
613
614 return src;
615 }
616
nine_boolean_constant_src(struct shader_translator * tx,int idx)617 static struct ureg_src nine_boolean_constant_src(struct shader_translator *tx, int idx)
618 {
619 struct ureg_src src;
620
621 char r = idx / 4;
622 char s = idx & 3;
623
624 if (tx->info->swvp_on) {
625 src = ureg_src_register(TGSI_FILE_CONSTANT, r);
626 src = ureg_src_dimension(src, 3);
627 } else {
628 unsigned slot_idx = tx->info->const_b_base + r;
629 if (tx->slot_map)
630 slot_idx = tx->slot_map[slot_idx];
631 src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
632 src = ureg_src_dimension(src, 0);
633 tx->slots_used[slot_idx] = true;
634 tx->info->bool_slots_used[idx] = true;
635 if (tx->num_slots < (slot_idx + 1))
636 tx->num_slots = slot_idx + 1;
637 }
638 src = ureg_swizzle(src, s, s, s, s);
639
640 if (tx->info->const_bool_slots < (idx + 1))
641 tx->info->const_bool_slots = idx + 1;
642
643 return src;
644 }
645
nine_special_constant_src(struct shader_translator * tx,int idx)646 static struct ureg_src nine_special_constant_src(struct shader_translator *tx, int idx)
647 {
648 struct ureg_src src;
649
650 unsigned slot_idx = idx + (IS_PS ? NINE_MAX_CONST_PS_SPE_OFFSET :
651 (tx->info->swvp_on ? NINE_MAX_CONST_SWVP_SPE_OFFSET : NINE_MAX_CONST_VS_SPE_OFFSET));
652
653 if (!tx->info->swvp_on && tx->slot_map)
654 slot_idx = tx->slot_map[slot_idx];
655 src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx);
656 src = ureg_src_dimension(src, 0);
657
658 if (!tx->info->swvp_on)
659 tx->slots_used[slot_idx] = true;
660 if (tx->num_slots < (slot_idx + 1))
661 tx->num_slots = slot_idx + 1;
662
663 return src;
664 }
665
666 static bool
tx_lconstf(struct shader_translator * tx,struct ureg_src * src,INT index)667 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index)
668 {
669 INT i;
670
671 if (index < 0 || index >= tx->num_constf_allowed) {
672 tx->failure = true;
673 return false;
674 }
675 for (i = 0; i < tx->num_lconstf; ++i) {
676 if (tx->lconstf[i].idx == index) {
677 *src = tx->lconstf[i].reg;
678 return true;
679 }
680 }
681 return false;
682 }
683 static bool
tx_lconsti(struct shader_translator * tx,struct ureg_src * src,INT index)684 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index)
685 {
686 int i;
687
688 if (index < 0 || index >= tx->num_consti_allowed) {
689 tx->failure = true;
690 return false;
691 }
692 for (i = 0; i < tx->num_lconsti; ++i) {
693 if (tx->lconsti[i].idx == index) {
694 *src = tx->lconsti[i].reg;
695 return true;
696 }
697 }
698 return false;
699 }
700 static bool
tx_lconstb(struct shader_translator * tx,struct ureg_src * src,INT index)701 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index)
702 {
703 int i;
704
705 if (index < 0 || index >= tx->num_constb_allowed) {
706 tx->failure = true;
707 return false;
708 }
709 for (i = 0; i < tx->num_lconstb; ++i) {
710 if (tx->lconstb[i].idx == index) {
711 *src = tx->lconstb[i].reg;
712 return true;
713 }
714 }
715 return false;
716 }
717
718 static void
tx_set_lconstf(struct shader_translator * tx,INT index,float f[4])719 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4])
720 {
721 unsigned n;
722
723 FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed)
724
725 for (n = 0; n < tx->num_lconstf; ++n)
726 if (tx->lconstf[n].idx == index)
727 break;
728 if (n == tx->num_lconstf) {
729 if ((n % 8) == 0) {
730 tx->lconstf = REALLOC(tx->lconstf,
731 (n + 0) * sizeof(tx->lconstf[0]),
732 (n + 8) * sizeof(tx->lconstf[0]));
733 assert(tx->lconstf);
734 }
735 tx->num_lconstf++;
736 }
737 tx->lconstf[n].idx = index;
738 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]);
739
740 memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f));
741 }
742 static void
tx_set_lconsti(struct shader_translator * tx,INT index,int i[4])743 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4])
744 {
745 unsigned n;
746
747 FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed)
748
749 for (n = 0; n < tx->num_lconsti; ++n)
750 if (tx->lconsti[n].idx == index)
751 break;
752 if (n == tx->num_lconsti) {
753 if ((n % 8) == 0) {
754 tx->lconsti = REALLOC(tx->lconsti,
755 (n + 0) * sizeof(tx->lconsti[0]),
756 (n + 8) * sizeof(tx->lconsti[0]));
757 assert(tx->lconsti);
758 }
759 tx->num_lconsti++;
760 }
761
762 tx->lconsti[n].idx = index;
763 tx->lconsti[n].reg = tx->native_integers ?
764 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) :
765 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]);
766 }
767 static void
tx_set_lconstb(struct shader_translator * tx,INT index,BOOL b)768 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b)
769 {
770 unsigned n;
771
772 FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed)
773
774 for (n = 0; n < tx->num_lconstb; ++n)
775 if (tx->lconstb[n].idx == index)
776 break;
777 if (n == tx->num_lconstb) {
778 if ((n % 8) == 0) {
779 tx->lconstb = REALLOC(tx->lconstb,
780 (n + 0) * sizeof(tx->lconstb[0]),
781 (n + 8) * sizeof(tx->lconstb[0]));
782 assert(tx->lconstb);
783 }
784 tx->num_lconstb++;
785 }
786
787 tx->lconstb[n].idx = index;
788 tx->lconstb[n].reg = tx->native_integers ?
789 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) :
790 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f);
791 }
792
793 static inline struct ureg_dst
tx_scratch(struct shader_translator * tx)794 tx_scratch(struct shader_translator *tx)
795 {
796 if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) {
797 tx->failure = true;
798 return tx->regs.t[0];
799 }
800 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch]))
801 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg);
802 return tx->regs.t[tx->num_scratch++];
803 }
804
805 static inline struct ureg_dst
tx_scratch_scalar(struct shader_translator * tx)806 tx_scratch_scalar(struct shader_translator *tx)
807 {
808 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
809 }
810
811 static inline struct ureg_src
tx_src_scalar(struct ureg_dst dst)812 tx_src_scalar(struct ureg_dst dst)
813 {
814 struct ureg_src src = ureg_src(dst);
815 int c = ffs(dst.WriteMask) - 1;
816 if (dst.WriteMask == (1 << c))
817 src = ureg_scalar(src, c);
818 return src;
819 }
820
821 static inline void
tx_temp_alloc(struct shader_translator * tx,INT idx)822 tx_temp_alloc(struct shader_translator *tx, INT idx)
823 {
824 assert(idx >= 0);
825 if (idx >= tx->num_temp) {
826 unsigned k = tx->num_temp;
827 unsigned n = idx + 1;
828 tx->regs.r = REALLOC(tx->regs.r,
829 k * sizeof(tx->regs.r[0]),
830 n * sizeof(tx->regs.r[0]));
831 for (; k < n; ++k)
832 tx->regs.r[k] = ureg_dst_undef();
833 tx->num_temp = n;
834 }
835 if (ureg_dst_is_undef(tx->regs.r[idx]))
836 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg);
837 }
838
839 static inline void
tx_addr_alloc(struct shader_translator * tx,INT idx)840 tx_addr_alloc(struct shader_translator *tx, INT idx)
841 {
842 assert(idx == 0);
843 if (ureg_dst_is_undef(tx->regs.address))
844 tx->regs.address = ureg_DECL_address(tx->ureg);
845 if (ureg_dst_is_undef(tx->regs.a0))
846 tx->regs.a0 = ureg_DECL_temporary(tx->ureg);
847 }
848
849 static inline bool
TEX_if_fetch4(struct shader_translator * tx,struct ureg_dst dst,unsigned target,struct ureg_src src0,struct ureg_src src1,INT idx)850 TEX_if_fetch4(struct shader_translator *tx, struct ureg_dst dst,
851 unsigned target, struct ureg_src src0,
852 struct ureg_src src1, INT idx)
853 {
854 struct ureg_dst tmp;
855 struct ureg_src src_tg4[3] = {src0, ureg_imm1f(tx->ureg, 0.f), src1};
856
857 if (!(tx->info->fetch4 & (1 << idx)))
858 return false;
859
860 /* TODO: needs more tests, but this feature is not much used at all */
861
862 tmp = tx_scratch(tx);
863 ureg_tex_insn(tx->ureg, TGSI_OPCODE_TG4, &tmp, 1, target, TGSI_RETURN_TYPE_FLOAT,
864 NULL, 0, src_tg4, 3);
865 ureg_MOV(tx->ureg, dst, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z, X, Y, W)));
866 return true;
867 }
868
869 /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions
870 * the projection should be applied on the texture. It doesn't
871 * apply on texkill.
872 * The doc is very imprecise here (it says the projection is done
873 * before rasterization, thus in vs, which seems wrong since ps instructions
874 * are affected differently)
875 * For now we only apply to the ps TEX instruction and TEXBEM.
876 * Perhaps some other instructions would need it */
877 static inline void
apply_ps1x_projection(struct shader_translator * tx,struct ureg_dst dst,struct ureg_src src,INT idx)878 apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
879 struct ureg_src src, INT idx)
880 {
881 struct ureg_dst tmp;
882 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
883
884 /* no projection */
885 if (dim == 1) {
886 ureg_MOV(tx->ureg, dst, src);
887 } else {
888 tmp = tx_scratch_scalar(tx);
889 ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1));
890 ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src);
891 }
892 }
893
894 static inline void
TEX_with_ps1x_projection(struct shader_translator * tx,struct ureg_dst dst,unsigned target,struct ureg_src src0,struct ureg_src src1,INT idx)895 TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst,
896 unsigned target, struct ureg_src src0,
897 struct ureg_src src1, INT idx)
898 {
899 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3);
900 struct ureg_dst tmp;
901 bool shadow = !!(tx->info->sampler_mask_shadow & (1 << idx));
902
903 /* dim == 1: no projection
904 * Looks like must be disabled when it makes no
905 * sense according the texture dimensions
906 */
907 if (dim == 1 || (dim <= target && !shadow)) {
908 ureg_TEX(tx->ureg, dst, target, src0, src1);
909 } else if (dim == 4) {
910 ureg_TXP(tx->ureg, dst, target, src0, src1);
911 } else {
912 tmp = tx_scratch(tx);
913 apply_ps1x_projection(tx, tmp, src0, idx);
914 ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1);
915 }
916 }
917
918 static inline void
tx_texcoord_alloc(struct shader_translator * tx,INT idx)919 tx_texcoord_alloc(struct shader_translator *tx, INT idx)
920 {
921 assert(IS_PS);
922 assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT));
923 if (ureg_src_is_undef(tx->regs.vT[idx]))
924 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx,
925 TGSI_INTERPOLATE_PERSPECTIVE);
926 }
927
928 static inline unsigned *
tx_bgnloop(struct shader_translator * tx)929 tx_bgnloop(struct shader_translator *tx)
930 {
931 tx->loop_depth++;
932 if (tx->loop_depth_max < tx->loop_depth)
933 tx->loop_depth_max = tx->loop_depth;
934 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH);
935 return &tx->loop_labels[tx->loop_depth - 1];
936 }
937
938 static inline unsigned *
tx_endloop(struct shader_translator * tx)939 tx_endloop(struct shader_translator *tx)
940 {
941 assert(tx->loop_depth);
942 tx->loop_depth--;
943 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth],
944 ureg_get_instruction_number(tx->ureg));
945 return &tx->loop_labels[tx->loop_depth];
946 }
947
948 static struct ureg_dst
tx_get_loopctr(struct shader_translator * tx,bool loop_or_rep)949 tx_get_loopctr(struct shader_translator *tx, bool loop_or_rep)
950 {
951 const unsigned l = tx->loop_depth - 1;
952
953 if (!tx->loop_depth)
954 {
955 DBG("loop counter requested outside of loop\n");
956 return ureg_dst_undef();
957 }
958
959 if (ureg_dst_is_undef(tx->regs.rL[l])) {
960 /* loop or rep ctr creation */
961 tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg);
962 if (loop_or_rep)
963 tx->regs.aL[l] = ureg_DECL_local_temporary(tx->ureg);
964 tx->loop_or_rep[l] = loop_or_rep;
965 }
966 /* loop - rep - endloop - endrep not allowed */
967 assert(tx->loop_or_rep[l] == loop_or_rep);
968
969 return tx->regs.rL[l];
970 }
971
972 static struct ureg_dst
tx_get_loopal(struct shader_translator * tx)973 tx_get_loopal(struct shader_translator *tx)
974 {
975 int loop_level = tx->loop_depth - 1;
976
977 while (loop_level >= 0) {
978 /* handle loop - rep - endrep - endloop case */
979 if (tx->loop_or_rep[loop_level])
980 /* the aL value is in the Y component (nine implementation) */
981 return tx->regs.aL[loop_level];
982 loop_level--;
983 }
984
985 DBG("aL counter requested outside of loop\n");
986 return ureg_dst_undef();
987 }
988
989 static inline unsigned *
tx_cond(struct shader_translator * tx)990 tx_cond(struct shader_translator *tx)
991 {
992 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH);
993 tx->cond_depth++;
994 return &tx->cond_labels[tx->cond_depth - 1];
995 }
996
997 static inline unsigned *
tx_elsecond(struct shader_translator * tx)998 tx_elsecond(struct shader_translator *tx)
999 {
1000 assert(tx->cond_depth);
1001 return &tx->cond_labels[tx->cond_depth - 1];
1002 }
1003
1004 static inline void
tx_endcond(struct shader_translator * tx)1005 tx_endcond(struct shader_translator *tx)
1006 {
1007 assert(tx->cond_depth);
1008 tx->cond_depth--;
1009 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth],
1010 ureg_get_instruction_number(tx->ureg));
1011 }
1012
1013 static inline struct ureg_dst
nine_ureg_dst_register(unsigned file,int index)1014 nine_ureg_dst_register(unsigned file, int index)
1015 {
1016 return ureg_dst(ureg_src_register(file, index));
1017 }
1018
1019 static inline struct ureg_src
nine_get_position_input(struct shader_translator * tx)1020 nine_get_position_input(struct shader_translator *tx)
1021 {
1022 struct ureg_program *ureg = tx->ureg;
1023
1024 if (tx->wpos_is_sysval)
1025 return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
1026 else
1027 return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION,
1028 0, TGSI_INTERPOLATE_LINEAR);
1029 }
1030
1031 static struct ureg_src
tx_src_param(struct shader_translator * tx,const struct sm1_src_param * param)1032 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param)
1033 {
1034 struct ureg_program *ureg = tx->ureg;
1035 struct ureg_src src;
1036 struct ureg_dst tmp;
1037
1038 assert(!param->rel || (IS_VS && param->file == D3DSPR_CONST) ||
1039 (param->file == D3DSPR_INPUT && tx->version.major == 3));
1040
1041 switch (param->file)
1042 {
1043 case D3DSPR_TEMP:
1044 tx_temp_alloc(tx, param->idx);
1045 src = ureg_src(tx->regs.r[param->idx]);
1046 break;
1047 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1048 case D3DSPR_ADDR:
1049 if (IS_VS) {
1050 assert(param->idx == 0);
1051 /* the address register (vs only) must be
1052 * assigned before use */
1053 assert(!ureg_dst_is_undef(tx->regs.a0));
1054 /* Round to lowest for vs1.1 (contrary to the doc), else
1055 * round to nearest */
1056 if (tx->version.major < 2 && tx->version.minor < 2)
1057 ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0));
1058 else
1059 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0));
1060 src = ureg_src(tx->regs.address);
1061 } else {
1062 if (tx->version.major < 2 && tx->version.minor < 4) {
1063 /* no subroutines, so should be defined */
1064 src = ureg_src(tx->regs.tS[param->idx]);
1065 } else {
1066 tx_texcoord_alloc(tx, param->idx);
1067 src = tx->regs.vT[param->idx];
1068 }
1069 }
1070 break;
1071 case D3DSPR_INPUT:
1072 if (IS_VS) {
1073 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1074 } else {
1075 if (tx->version.major < 3) {
1076 src = ureg_DECL_fs_input_centroid(
1077 ureg, TGSI_SEMANTIC_COLOR, param->idx,
1078 tx->info->color_flatshade ? TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE,
1079 tx->info->force_color_in_centroid ?
1080 TGSI_INTERPOLATE_LOC_CENTROID : 0,
1081 0, 1);
1082 } else {
1083 if(param->rel) {
1084 /* Copy all inputs (non consecutive)
1085 * to temp array (consecutive).
1086 * This is not good for performance.
1087 * A better way would be to have inputs
1088 * consecutive (would need implement alternative
1089 * way to match vs outputs and ps inputs).
1090 * However even with the better way, the temp array
1091 * copy would need to be used if some inputs
1092 * are not GENERIC or if they have different
1093 * interpolation flag. */
1094 if (ureg_src_is_undef(tx->regs.v_consecutive)) {
1095 int i;
1096 tx->regs.v_consecutive = ureg_src(ureg_DECL_array_temporary(ureg, 10, 0));
1097 for (i = 0; i < 10; i++) {
1098 if (!ureg_src_is_undef(tx->regs.v[i]))
1099 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), tx->regs.v[i]);
1100 else
1101 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
1102 }
1103 }
1104 src = ureg_src_array_offset(tx->regs.v_consecutive, param->idx);
1105 } else {
1106 assert(param->idx < ARRAY_SIZE(tx->regs.v));
1107 src = tx->regs.v[param->idx];
1108 }
1109 }
1110 }
1111 if (param->rel)
1112 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1113 break;
1114 case D3DSPR_PREDICATE:
1115 if (ureg_dst_is_undef(tx->regs.predicate)) {
1116 /* Forbidden to use the predicate register before being set */
1117 tx->failure = true;
1118 tx->regs.predicate = ureg_DECL_temporary(tx->ureg);
1119 }
1120 src = ureg_src(tx->regs.predicate);
1121 break;
1122 case D3DSPR_SAMPLER:
1123 assert(param->mod == NINED3DSPSM_NONE);
1124 /* assert(param->swizzle == NINED3DSP_NOSWIZZLE); Passed by wine tests */
1125 src = ureg_DECL_sampler(ureg, param->idx);
1126 break;
1127 case D3DSPR_CONST:
1128 if (param->rel || !tx_lconstf(tx, &src, param->idx)) {
1129 src = nine_float_constant_src(tx, param->idx);
1130 if (param->rel) {
1131 tx->indirect_const_access = true;
1132 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1133 }
1134 }
1135 if (!IS_VS && tx->version.major < 2) {
1136 /* ps 1.X clamps constants */
1137 tmp = tx_scratch(tx);
1138 ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f));
1139 ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
1140 src = ureg_src(tmp);
1141 }
1142 break;
1143 case D3DSPR_CONST2:
1144 case D3DSPR_CONST3:
1145 case D3DSPR_CONST4:
1146 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n");
1147 assert(!"CONST2/3/4");
1148 src = ureg_imm1f(ureg, 0.0f);
1149 break;
1150 case D3DSPR_CONSTINT:
1151 /* relative adressing only possible for float constants in vs */
1152 if (!tx_lconsti(tx, &src, param->idx))
1153 src = nine_integer_constant_src(tx, param->idx);
1154 break;
1155 case D3DSPR_CONSTBOOL:
1156 if (!tx_lconstb(tx, &src, param->idx))
1157 src = nine_boolean_constant_src(tx, param->idx);
1158 break;
1159 case D3DSPR_LOOP:
1160 if (ureg_dst_is_undef(tx->regs.address))
1161 tx->regs.address = ureg_DECL_address(ureg);
1162 if (!tx->native_integers)
1163 ureg_ARR(ureg, tx->regs.address,
1164 ureg_scalar(ureg_src(tx_get_loopal(tx)), TGSI_SWIZZLE_Y));
1165 else
1166 ureg_UARL(ureg, tx->regs.address,
1167 ureg_scalar(ureg_src(tx_get_loopal(tx)), TGSI_SWIZZLE_Y));
1168 src = ureg_src(tx->regs.address);
1169 break;
1170 case D3DSPR_MISCTYPE:
1171 switch (param->idx) {
1172 case D3DSMO_POSITION:
1173 if (ureg_src_is_undef(tx->regs.vPos))
1174 tx->regs.vPos = nine_get_position_input(tx);
1175 if (tx->shift_wpos) {
1176 /* TODO: do this only once */
1177 struct ureg_dst wpos = tx_scratch(tx);
1178 ureg_ADD(ureg, wpos, tx->regs.vPos,
1179 ureg_imm4f(ureg, -0.5f, -0.5f, 0.0f, 0.0f));
1180 src = ureg_src(wpos);
1181 } else {
1182 src = tx->regs.vPos;
1183 }
1184 break;
1185 case D3DSMO_FACE:
1186 if (ureg_src_is_undef(tx->regs.vFace)) {
1187 if (tx->face_is_sysval_integer) {
1188 tmp = ureg_DECL_temporary(ureg);
1189 tx->regs.vFace =
1190 ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0);
1191
1192 /* convert bool to float */
1193 ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X),
1194 ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1));
1195 tx->regs.vFace = ureg_src(tmp);
1196 } else {
1197 tx->regs.vFace = ureg_DECL_fs_input(ureg,
1198 TGSI_SEMANTIC_FACE, 0,
1199 TGSI_INTERPOLATE_CONSTANT);
1200 }
1201 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X);
1202 }
1203 src = tx->regs.vFace;
1204 break;
1205 default:
1206 assert(!"invalid src D3DSMO");
1207 break;
1208 }
1209 break;
1210 case D3DSPR_TEMPFLOAT16:
1211 break;
1212 default:
1213 assert(!"invalid src D3DSPR");
1214 }
1215
1216 switch (param->mod) {
1217 case NINED3DSPSM_DW:
1218 tmp = tx_scratch(tx);
1219 /* NOTE: app is not allowed to read w with this modifier */
1220 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), ureg_scalar(src, TGSI_SWIZZLE_W));
1221 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W)));
1222 src = ureg_src(tmp);
1223 break;
1224 case NINED3DSPSM_DZ:
1225 tmp = tx_scratch(tx);
1226 /* NOTE: app is not allowed to read z with this modifier */
1227 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), ureg_scalar(src, TGSI_SWIZZLE_Z));
1228 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z)));
1229 src = ureg_src(tmp);
1230 break;
1231 default:
1232 break;
1233 }
1234
1235 if (param->swizzle != NINED3DSP_NOSWIZZLE && param->file != D3DSPR_SAMPLER)
1236 src = ureg_swizzle(src,
1237 (param->swizzle >> 0) & 0x3,
1238 (param->swizzle >> 2) & 0x3,
1239 (param->swizzle >> 4) & 0x3,
1240 (param->swizzle >> 6) & 0x3);
1241
1242 switch (param->mod) {
1243 case NINED3DSPSM_ABS:
1244 src = ureg_abs(src);
1245 break;
1246 case NINED3DSPSM_ABSNEG:
1247 src = ureg_negate(ureg_abs(src));
1248 break;
1249 case NINED3DSPSM_NEG:
1250 src = ureg_negate(src);
1251 break;
1252 case NINED3DSPSM_BIAS:
1253 tmp = tx_scratch(tx);
1254 ureg_ADD(ureg, tmp, src, ureg_imm1f(ureg, -0.5f));
1255 src = ureg_src(tmp);
1256 break;
1257 case NINED3DSPSM_BIASNEG:
1258 tmp = tx_scratch(tx);
1259 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 0.5f), ureg_negate(src));
1260 src = ureg_src(tmp);
1261 break;
1262 case NINED3DSPSM_NOT:
1263 if (tx->native_integers && param->file == D3DSPR_CONSTBOOL) {
1264 tmp = tx_scratch(tx);
1265 ureg_NOT(ureg, tmp, src);
1266 src = ureg_src(tmp);
1267 break;
1268 } else { /* predicate */
1269 tmp = tx_scratch(tx);
1270 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
1271 src = ureg_src(tmp);
1272 }
1273 FALLTHROUGH;
1274 case NINED3DSPSM_COMP:
1275 tmp = tx_scratch(tx);
1276 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src));
1277 src = ureg_src(tmp);
1278 break;
1279 case NINED3DSPSM_DZ:
1280 case NINED3DSPSM_DW:
1281 /* Already handled*/
1282 break;
1283 case NINED3DSPSM_SIGN:
1284 tmp = tx_scratch(tx);
1285 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1286 src = ureg_src(tmp);
1287 break;
1288 case NINED3DSPSM_SIGNNEG:
1289 tmp = tx_scratch(tx);
1290 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f));
1291 src = ureg_src(tmp);
1292 break;
1293 case NINED3DSPSM_X2:
1294 tmp = tx_scratch(tx);
1295 ureg_ADD(ureg, tmp, src, src);
1296 src = ureg_src(tmp);
1297 break;
1298 case NINED3DSPSM_X2NEG:
1299 tmp = tx_scratch(tx);
1300 ureg_ADD(ureg, tmp, src, src);
1301 src = ureg_negate(ureg_src(tmp));
1302 break;
1303 default:
1304 assert(param->mod == NINED3DSPSM_NONE);
1305 break;
1306 }
1307
1308 return src;
1309 }
1310
1311 static struct ureg_dst
_tx_dst_param(struct shader_translator * tx,const struct sm1_dst_param * param)1312 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1313 {
1314 struct ureg_dst dst;
1315
1316 switch (param->file)
1317 {
1318 case D3DSPR_TEMP:
1319 assert(!param->rel);
1320 tx_temp_alloc(tx, param->idx);
1321 dst = tx->regs.r[param->idx];
1322 break;
1323 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */
1324 case D3DSPR_ADDR:
1325 assert(!param->rel);
1326 if (tx->version.major < 2 && !IS_VS) {
1327 if (ureg_dst_is_undef(tx->regs.tS[param->idx]))
1328 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg);
1329 dst = tx->regs.tS[param->idx];
1330 } else
1331 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */
1332 tx_texcoord_alloc(tx, param->idx);
1333 dst = ureg_dst(tx->regs.vT[param->idx]);
1334 } else {
1335 tx_addr_alloc(tx, param->idx);
1336 dst = tx->regs.a0;
1337 }
1338 break;
1339 case D3DSPR_RASTOUT:
1340 assert(!param->rel);
1341 switch (param->idx) {
1342 case 0:
1343 if (ureg_dst_is_undef(tx->regs.oPos)) {
1344 if (tx->info->clip_plane_emulation > 0) {
1345 tx->regs.oPos = ureg_DECL_temporary(tx->ureg);
1346 } else {
1347 tx->regs.oPos = tx->regs.oPos_out;
1348 }
1349 }
1350 dst = tx->regs.oPos;
1351 break;
1352 case 1:
1353 if (ureg_dst_is_undef(tx->regs.oFog))
1354 tx->regs.oFog =
1355 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16));
1356 dst = tx->regs.oFog;
1357 break;
1358 case 2:
1359 if (ureg_dst_is_undef(tx->regs.oPts))
1360 tx->regs.oPts = ureg_DECL_temporary(tx->ureg);
1361 dst = tx->regs.oPts;
1362 break;
1363 default:
1364 assert(0);
1365 break;
1366 }
1367 break;
1368 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */
1369 case D3DSPR_OUTPUT:
1370 if (tx->version.major < 3) {
1371 assert(!param->rel);
1372 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx);
1373 } else {
1374 assert(!param->rel); /* TODO */
1375 assert(param->idx < ARRAY_SIZE(tx->regs.o));
1376 dst = tx->regs.o[param->idx];
1377 }
1378 break;
1379 case D3DSPR_ATTROUT: /* VS */
1380 case D3DSPR_COLOROUT: /* PS */
1381 assert(param->idx >= 0 && param->idx < 4);
1382 assert(!param->rel);
1383 tx->info->rt_mask |= 1 << param->idx;
1384 if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) {
1385 /* ps < 3: oCol[0] will have fog blending afterward
1386 * ps: oCol[0] might have alphatest afterward */
1387 if (!IS_VS && param->idx == 0) {
1388 tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg);
1389 } else {
1390 tx->regs.oCol[param->idx] =
1391 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx);
1392 }
1393 }
1394 dst = tx->regs.oCol[param->idx];
1395 if (IS_VS && tx->version.major < 3)
1396 dst = ureg_saturate(dst);
1397 break;
1398 case D3DSPR_DEPTHOUT:
1399 assert(!param->rel);
1400 if (ureg_dst_is_undef(tx->regs.oDepth))
1401 tx->regs.oDepth =
1402 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0,
1403 TGSI_WRITEMASK_Z, 0, 1);
1404 dst = tx->regs.oDepth; /* XXX: must write .z component */
1405 break;
1406 case D3DSPR_PREDICATE:
1407 if (ureg_dst_is_undef(tx->regs.predicate))
1408 tx->regs.predicate = ureg_DECL_temporary(tx->ureg);
1409 dst = tx->regs.predicate;
1410 break;
1411 case D3DSPR_TEMPFLOAT16:
1412 DBG("unhandled D3DSPR: %u\n", param->file);
1413 break;
1414 default:
1415 assert(!"invalid dst D3DSPR");
1416 break;
1417 }
1418 if (param->rel)
1419 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel));
1420
1421 if (param->mask != NINED3DSP_WRITEMASK_ALL)
1422 dst = ureg_writemask(dst, param->mask);
1423 if (param->mod & NINED3DSPDM_SATURATE)
1424 dst = ureg_saturate(dst);
1425
1426 if (tx->predicated_activated) {
1427 tx->regs.predicate_dst = dst;
1428 dst = tx->regs.predicate_tmp;
1429 }
1430
1431 return dst;
1432 }
1433
1434 static struct ureg_dst
tx_dst_param(struct shader_translator * tx,const struct sm1_dst_param * param)1435 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param)
1436 {
1437 if (param->shift) {
1438 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask);
1439 return tx->regs.tdst;
1440 }
1441 return _tx_dst_param(tx, param);
1442 }
1443
1444 static void
tx_apply_dst0_modifiers(struct shader_translator * tx)1445 tx_apply_dst0_modifiers(struct shader_translator *tx)
1446 {
1447 struct ureg_dst rdst;
1448 float f;
1449
1450 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL)
1451 return;
1452 rdst = _tx_dst_param(tx, &tx->insn.dst[0]);
1453
1454 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */
1455
1456 if (tx->insn.dst[0].shift < 0)
1457 f = 1.0f / (1 << -tx->insn.dst[0].shift);
1458 else
1459 f = 1 << tx->insn.dst[0].shift;
1460
1461 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f));
1462 }
1463
1464 static struct ureg_src
tx_dst_param_as_src(struct shader_translator * tx,const struct sm1_dst_param * param)1465 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param)
1466 {
1467 struct ureg_src src;
1468
1469 assert(!param->shift);
1470 assert(!(param->mod & NINED3DSPDM_SATURATE));
1471
1472 switch (param->file) {
1473 case D3DSPR_INPUT:
1474 if (IS_VS) {
1475 src = ureg_src_register(TGSI_FILE_INPUT, param->idx);
1476 } else {
1477 assert(!param->rel);
1478 assert(param->idx < ARRAY_SIZE(tx->regs.v));
1479 src = tx->regs.v[param->idx];
1480 }
1481 break;
1482 default:
1483 src = ureg_src(tx_dst_param(tx, param));
1484 break;
1485 }
1486 if (param->rel)
1487 src = ureg_src_indirect(src, tx_src_param(tx, param->rel));
1488
1489 if (!param->mask)
1490 WARN("mask is 0, using identity swizzle\n");
1491
1492 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) {
1493 char s[4];
1494 int n;
1495 int c;
1496 for (n = 0, c = 0; c < 4; ++c)
1497 if (param->mask & (1 << c))
1498 s[n++] = c;
1499 assert(n);
1500 for (c = n; c < 4; ++c)
1501 s[c] = s[n - 1];
1502 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]);
1503 }
1504 return src;
1505 }
1506
1507 static HRESULT
NineTranslateInstruction_Mkxn(struct shader_translator * tx,const unsigned k,const unsigned n)1508 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n)
1509 {
1510 struct ureg_program *ureg = tx->ureg;
1511 struct ureg_dst dst;
1512 struct ureg_src src[2];
1513 struct sm1_src_param *src_mat = &tx->insn.src[1];
1514 unsigned i;
1515
1516 dst = tx_dst_param(tx, &tx->insn.dst[0]);
1517 src[0] = tx_src_param(tx, &tx->insn.src[0]);
1518
1519 for (i = 0; i < n; i++)
1520 {
1521 const unsigned m = (1 << i);
1522
1523 src[1] = tx_src_param(tx, src_mat);
1524 src_mat->idx++;
1525
1526 if (!(dst.WriteMask & m))
1527 continue;
1528
1529 /* XXX: src == dst case ? */
1530
1531 switch (k) {
1532 case 3:
1533 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]);
1534 break;
1535 case 4:
1536 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]);
1537 break;
1538 default:
1539 DBG("invalid operation: M%ux%u\n", m, n);
1540 break;
1541 }
1542 }
1543
1544 return D3D_OK;
1545 }
1546
1547 #define VNOTSUPPORTED 0, 0
1548 #define V(maj, min) (((maj) << 8) | (min))
1549
1550 static inline const char *
d3dsio_to_string(unsigned opcode)1551 d3dsio_to_string( unsigned opcode )
1552 {
1553 static const char *names[] = {
1554 "NOP",
1555 "MOV",
1556 "ADD",
1557 "SUB",
1558 "MAD",
1559 "MUL",
1560 "RCP",
1561 "RSQ",
1562 "DP3",
1563 "DP4",
1564 "MIN",
1565 "MAX",
1566 "SLT",
1567 "SGE",
1568 "EXP",
1569 "LOG",
1570 "LIT",
1571 "DST",
1572 "LRP",
1573 "FRC",
1574 "M4x4",
1575 "M4x3",
1576 "M3x4",
1577 "M3x3",
1578 "M3x2",
1579 "CALL",
1580 "CALLNZ",
1581 "LOOP",
1582 "RET",
1583 "ENDLOOP",
1584 "LABEL",
1585 "DCL",
1586 "POW",
1587 "CRS",
1588 "SGN",
1589 "ABS",
1590 "NRM",
1591 "SINCOS",
1592 "REP",
1593 "ENDREP",
1594 "IF",
1595 "IFC",
1596 "ELSE",
1597 "ENDIF",
1598 "BREAK",
1599 "BREAKC",
1600 "MOVA",
1601 "DEFB",
1602 "DEFI",
1603 NULL,
1604 NULL,
1605 NULL,
1606 NULL,
1607 NULL,
1608 NULL,
1609 NULL,
1610 NULL,
1611 NULL,
1612 NULL,
1613 NULL,
1614 NULL,
1615 NULL,
1616 NULL,
1617 NULL,
1618 "TEXCOORD",
1619 "TEXKILL",
1620 "TEX",
1621 "TEXBEM",
1622 "TEXBEML",
1623 "TEXREG2AR",
1624 "TEXREG2GB",
1625 "TEXM3x2PAD",
1626 "TEXM3x2TEX",
1627 "TEXM3x3PAD",
1628 "TEXM3x3TEX",
1629 NULL,
1630 "TEXM3x3SPEC",
1631 "TEXM3x3VSPEC",
1632 "EXPP",
1633 "LOGP",
1634 "CND",
1635 "DEF",
1636 "TEXREG2RGB",
1637 "TEXDP3TEX",
1638 "TEXM3x2DEPTH",
1639 "TEXDP3",
1640 "TEXM3x3",
1641 "TEXDEPTH",
1642 "CMP",
1643 "BEM",
1644 "DP2ADD",
1645 "DSX",
1646 "DSY",
1647 "TEXLDD",
1648 "SETP",
1649 "TEXLDL",
1650 "BREAKP"
1651 };
1652
1653 if (opcode < ARRAY_SIZE(names)) return names[opcode];
1654
1655 switch (opcode) {
1656 case D3DSIO_PHASE: return "PHASE";
1657 case D3DSIO_COMMENT: return "COMMENT";
1658 case D3DSIO_END: return "END";
1659 default:
1660 return NULL;
1661 }
1662 }
1663
1664 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL }
1665 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \
1666 (inst).vert_version.max | \
1667 (inst).frag_version.min | \
1668 (inst).frag_version.max)
1669
1670 #define SPECIAL(name) \
1671 NineTranslateInstruction_##name
1672
1673 #define DECL_SPECIAL(name) \
1674 static HRESULT \
1675 NineTranslateInstruction_##name( struct shader_translator *tx )
1676
1677 static HRESULT
1678 NineTranslateInstruction_Generic(struct shader_translator *);
1679
DECL_SPECIAL(NOP)1680 DECL_SPECIAL(NOP)
1681 {
1682 /* Nothing to do. NOP was used to avoid hangs
1683 * with very old d3d drivers. */
1684 return D3D_OK;
1685 }
1686
DECL_SPECIAL(SUB)1687 DECL_SPECIAL(SUB)
1688 {
1689 struct ureg_program *ureg = tx->ureg;
1690 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1691 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1692 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1693
1694 ureg_ADD(ureg, dst, src0, ureg_negate(src1));
1695 return D3D_OK;
1696 }
1697
DECL_SPECIAL(ABS)1698 DECL_SPECIAL(ABS)
1699 {
1700 struct ureg_program *ureg = tx->ureg;
1701 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1702 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1703
1704 ureg_MOV(ureg, dst, ureg_abs(src));
1705 return D3D_OK;
1706 }
1707
DECL_SPECIAL(XPD)1708 DECL_SPECIAL(XPD)
1709 {
1710 struct ureg_program *ureg = tx->ureg;
1711 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1712 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
1713 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
1714
1715 ureg_MUL(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
1716 ureg_swizzle(src0, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
1717 TGSI_SWIZZLE_X, 0),
1718 ureg_swizzle(src1, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
1719 TGSI_SWIZZLE_Y, 0));
1720 ureg_MAD(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ),
1721 ureg_swizzle(src0, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X,
1722 TGSI_SWIZZLE_Y, 0),
1723 ureg_negate(ureg_swizzle(src1, TGSI_SWIZZLE_Y,
1724 TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)),
1725 ureg_src(dst));
1726 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
1727 ureg_imm1f(ureg, 1));
1728 return D3D_OK;
1729 }
1730
DECL_SPECIAL(M4x4)1731 DECL_SPECIAL(M4x4)
1732 {
1733 return NineTranslateInstruction_Mkxn(tx, 4, 4);
1734 }
1735
DECL_SPECIAL(M4x3)1736 DECL_SPECIAL(M4x3)
1737 {
1738 return NineTranslateInstruction_Mkxn(tx, 4, 3);
1739 }
1740
DECL_SPECIAL(M3x4)1741 DECL_SPECIAL(M3x4)
1742 {
1743 return NineTranslateInstruction_Mkxn(tx, 3, 4);
1744 }
1745
DECL_SPECIAL(M3x3)1746 DECL_SPECIAL(M3x3)
1747 {
1748 return NineTranslateInstruction_Mkxn(tx, 3, 3);
1749 }
1750
DECL_SPECIAL(M3x2)1751 DECL_SPECIAL(M3x2)
1752 {
1753 return NineTranslateInstruction_Mkxn(tx, 3, 2);
1754 }
1755
DECL_SPECIAL(CMP)1756 DECL_SPECIAL(CMP)
1757 {
1758 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]),
1759 tx_src_param(tx, &tx->insn.src[0]),
1760 tx_src_param(tx, &tx->insn.src[2]),
1761 tx_src_param(tx, &tx->insn.src[1]));
1762 return D3D_OK;
1763 }
1764
DECL_SPECIAL(CND)1765 DECL_SPECIAL(CND)
1766 {
1767 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1768 struct ureg_dst cgt;
1769 struct ureg_src cnd;
1770
1771 /* the coissue flag was a tip for compilers to advise to
1772 * execute two operations at the same time, in cases
1773 * the two executions had same dst with different channels.
1774 * It has no effect on current hw. However it seems CND
1775 * is affected. The handling of this very specific case
1776 * handled below mimick wine behaviour */
1777 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) {
1778 ureg_MOV(tx->ureg,
1779 dst, tx_src_param(tx, &tx->insn.src[1]));
1780 return D3D_OK;
1781 }
1782
1783 cnd = tx_src_param(tx, &tx->insn.src[0]);
1784 cgt = tx_scratch(tx);
1785
1786 if (tx->version.major == 1 && tx->version.minor < 4)
1787 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
1788
1789 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f));
1790
1791 ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)),
1792 tx_src_param(tx, &tx->insn.src[1]),
1793 tx_src_param(tx, &tx->insn.src[2]));
1794 return D3D_OK;
1795 }
1796
DECL_SPECIAL(CALL)1797 DECL_SPECIAL(CALL)
1798 {
1799 assert(tx->insn.src[0].idx < tx->num_inst_labels);
1800 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1801 return D3D_OK;
1802 }
1803
DECL_SPECIAL(CALLNZ)1804 DECL_SPECIAL(CALLNZ)
1805 {
1806 struct ureg_program *ureg = tx->ureg;
1807 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1808
1809 if (!tx->native_integers)
1810 ureg_IF(ureg, src, tx_cond(tx));
1811 else
1812 ureg_UIF(ureg, src, tx_cond(tx));
1813 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]);
1814 tx_endcond(tx);
1815 ureg_ENDIF(ureg);
1816 return D3D_OK;
1817 }
1818
DECL_SPECIAL(LOOP)1819 DECL_SPECIAL(LOOP)
1820 {
1821 struct ureg_program *ureg = tx->ureg;
1822 unsigned *label;
1823 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]);
1824 struct ureg_dst ctr;
1825 struct ureg_dst aL;
1826 struct ureg_dst tmp;
1827 struct ureg_src ctrx;
1828
1829 label = tx_bgnloop(tx);
1830 ctr = tx_get_loopctr(tx, true);
1831 aL = tx_get_loopal(tx);
1832 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1833
1834 /* src: num_iterations*/
1835 ureg_MOV(ureg, ureg_writemask(ctr, NINED3DSP_WRITEMASK_0),
1836 ureg_scalar(src, TGSI_SWIZZLE_X));
1837 /* al: unused - start_value of al - step for al - unused */
1838 ureg_MOV(ureg, aL, src);
1839 ureg_BGNLOOP(tx->ureg, label);
1840 tmp = tx_scratch_scalar(tx);
1841 /* Initially ctr.x contains the number of iterations.
1842 * ctr.y will contain the updated value of al.
1843 * We decrease ctr.x at the end of every iteration,
1844 * and stop when it reaches 0. */
1845
1846 if (!tx->native_integers) {
1847 /* case src and ctr contain floats */
1848 /* to avoid precision issue, we stop when ctr <= 0.5 */
1849 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1850 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1851 } else {
1852 /* case src and ctr contain integers */
1853 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1854 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1855 }
1856 ureg_BRK(ureg);
1857 tx_endcond(tx);
1858 ureg_ENDIF(ureg);
1859 return D3D_OK;
1860 }
1861
DECL_SPECIAL(RET)1862 DECL_SPECIAL(RET)
1863 {
1864 /* RET as a last instruction could be safely ignored.
1865 * Remove it to prevent crashes/warnings in case underlying
1866 * driver doesn't implement arbitrary returns.
1867 */
1868 if (*(tx->parse_next) != NINED3DSP_END) {
1869 ureg_RET(tx->ureg);
1870 }
1871 return D3D_OK;
1872 }
1873
DECL_SPECIAL(ENDLOOP)1874 DECL_SPECIAL(ENDLOOP)
1875 {
1876 struct ureg_program *ureg = tx->ureg;
1877 struct ureg_dst ctr = tx_get_loopctr(tx, true);
1878 struct ureg_dst al = tx_get_loopal(tx);
1879 struct ureg_dst dst_ctrx, dst_al;
1880 struct ureg_src src_ctr, al_counter;
1881
1882 dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1883 dst_al = ureg_writemask(al, NINED3DSP_WRITEMASK_1);
1884 src_ctr = ureg_src(ctr);
1885 al_counter = ureg_scalar(ureg_src(al), TGSI_SWIZZLE_Z);
1886
1887 /* ctr.x -= 1
1888 * al.y (aL) += step */
1889 if (!tx->native_integers) {
1890 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1891 ureg_ADD(ureg, dst_al, ureg_src(al), al_counter);
1892 } else {
1893 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1894 ureg_UADD(ureg, dst_al, ureg_src(al), al_counter);
1895 }
1896 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1897 return D3D_OK;
1898 }
1899
DECL_SPECIAL(LABEL)1900 DECL_SPECIAL(LABEL)
1901 {
1902 unsigned k = tx->num_inst_labels;
1903 unsigned n = tx->insn.src[0].idx;
1904 assert(n < 2048);
1905 if (n >= k)
1906 tx->inst_labels = REALLOC(tx->inst_labels,
1907 k * sizeof(tx->inst_labels[0]),
1908 n * sizeof(tx->inst_labels[0]));
1909
1910 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg);
1911 return D3D_OK;
1912 }
1913
DECL_SPECIAL(SINCOS)1914 DECL_SPECIAL(SINCOS)
1915 {
1916 struct ureg_program *ureg = tx->ureg;
1917 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
1918 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
1919 struct ureg_dst tmp = tx_scratch_scalar(tx);
1920
1921 assert(!(dst.WriteMask & 0xc));
1922
1923 /* Copying to a temporary register avoids src/dst aliasing.
1924 * src is supposed to have replicated swizzle. */
1925 ureg_MOV(ureg, tmp, src);
1926
1927 /* z undefined, w untouched */
1928 ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
1929 tx_src_scalar(tmp));
1930 ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
1931 tx_src_scalar(tmp));
1932 return D3D_OK;
1933 }
1934
DECL_SPECIAL(SGN)1935 DECL_SPECIAL(SGN)
1936 {
1937 ureg_SSG(tx->ureg,
1938 tx_dst_param(tx, &tx->insn.dst[0]),
1939 tx_src_param(tx, &tx->insn.src[0]));
1940 return D3D_OK;
1941 }
1942
DECL_SPECIAL(REP)1943 DECL_SPECIAL(REP)
1944 {
1945 struct ureg_program *ureg = tx->ureg;
1946 unsigned *label;
1947 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]);
1948 struct ureg_dst ctr;
1949 struct ureg_dst tmp;
1950 struct ureg_src ctrx;
1951
1952 label = tx_bgnloop(tx);
1953 ctr = ureg_writemask(tx_get_loopctr(tx, false), NINED3DSP_WRITEMASK_0);
1954 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X);
1955
1956 /* NOTE: rep must be constant, so we don't have to save the count */
1957 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE);
1958
1959 /* rep: num_iterations - 0 - 0 - 0 */
1960 ureg_MOV(ureg, ctr, rep);
1961 ureg_BGNLOOP(ureg, label);
1962 tmp = tx_scratch_scalar(tx);
1963 /* Initially ctr.x contains the number of iterations.
1964 * We decrease ctr.x at the end of every iteration,
1965 * and stop when it reaches 0. */
1966
1967 if (!tx->native_integers) {
1968 /* case src and ctr contain floats */
1969 /* to avoid precision issue, we stop when ctr <= 0.5 */
1970 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx);
1971 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1972 } else {
1973 /* case src and ctr contain integers */
1974 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx);
1975 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx));
1976 }
1977 ureg_BRK(ureg);
1978 tx_endcond(tx);
1979 ureg_ENDIF(ureg);
1980
1981 return D3D_OK;
1982 }
1983
DECL_SPECIAL(ENDREP)1984 DECL_SPECIAL(ENDREP)
1985 {
1986 struct ureg_program *ureg = tx->ureg;
1987 struct ureg_dst ctr = tx_get_loopctr(tx, false);
1988 struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0);
1989 struct ureg_src src_ctr = ureg_src(ctr);
1990
1991 /* ctr.x -= 1 */
1992 if (!tx->native_integers)
1993 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f));
1994 else
1995 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1));
1996
1997 ureg_ENDLOOP(tx->ureg, tx_endloop(tx));
1998 return D3D_OK;
1999 }
2000
DECL_SPECIAL(ENDIF)2001 DECL_SPECIAL(ENDIF)
2002 {
2003 tx_endcond(tx);
2004 ureg_ENDIF(tx->ureg);
2005 return D3D_OK;
2006 }
2007
DECL_SPECIAL(IF)2008 DECL_SPECIAL(IF)
2009 {
2010 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2011
2012 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL)
2013 ureg_UIF(tx->ureg, src, tx_cond(tx));
2014 else
2015 ureg_IF(tx->ureg, src, tx_cond(tx));
2016
2017 return D3D_OK;
2018 }
2019
2020 static inline unsigned
sm1_insn_flags_to_tgsi_setop(BYTE flags)2021 sm1_insn_flags_to_tgsi_setop(BYTE flags)
2022 {
2023 switch (flags) {
2024 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT;
2025 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ;
2026 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE;
2027 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT;
2028 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE;
2029 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
2030 default:
2031 assert(!"invalid comparison flags");
2032 return TGSI_OPCODE_SGT;
2033 }
2034 }
2035
DECL_SPECIAL(IFC)2036 DECL_SPECIAL(IFC)
2037 {
2038 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
2039 struct ureg_src src[2];
2040 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
2041 src[0] = tx_src_param(tx, &tx->insn.src[0]);
2042 src[1] = tx_src_param(tx, &tx->insn.src[1]);
2043 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
2044 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
2045 return D3D_OK;
2046 }
2047
DECL_SPECIAL(ELSE)2048 DECL_SPECIAL(ELSE)
2049 {
2050 ureg_ELSE(tx->ureg, tx_elsecond(tx));
2051 return D3D_OK;
2052 }
2053
DECL_SPECIAL(BREAKC)2054 DECL_SPECIAL(BREAKC)
2055 {
2056 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
2057 struct ureg_src src[2];
2058 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X);
2059 src[0] = tx_src_param(tx, &tx->insn.src[0]);
2060 src[1] = tx_src_param(tx, &tx->insn.src[1]);
2061 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
2062 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx));
2063 ureg_BRK(tx->ureg);
2064 tx_endcond(tx);
2065 ureg_ENDIF(tx->ureg);
2066 return D3D_OK;
2067 }
2068
2069 static const char *sm1_declusage_names[] =
2070 {
2071 [D3DDECLUSAGE_POSITION] = "POSITION",
2072 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT",
2073 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES",
2074 [D3DDECLUSAGE_NORMAL] = "NORMAL",
2075 [D3DDECLUSAGE_PSIZE] = "PSIZE",
2076 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD",
2077 [D3DDECLUSAGE_TANGENT] = "TANGENT",
2078 [D3DDECLUSAGE_BINORMAL] = "BINORMAL",
2079 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR",
2080 [D3DDECLUSAGE_POSITIONT] = "POSITIONT",
2081 [D3DDECLUSAGE_COLOR] = "COLOR",
2082 [D3DDECLUSAGE_FOG] = "FOG",
2083 [D3DDECLUSAGE_DEPTH] = "DEPTH",
2084 [D3DDECLUSAGE_SAMPLE] = "SAMPLE"
2085 };
2086
2087 static inline unsigned
sm1_to_nine_declusage(struct sm1_semantic * dcl)2088 sm1_to_nine_declusage(struct sm1_semantic *dcl)
2089 {
2090 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx);
2091 }
2092
2093 static void
sm1_declusage_to_tgsi(struct tgsi_declaration_semantic * sem,bool tc,struct sm1_semantic * dcl)2094 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem,
2095 bool tc,
2096 struct sm1_semantic *dcl)
2097 {
2098 BYTE index = dcl->usage_idx;
2099
2100 /* For everything that is not matching to a TGSI_SEMANTIC_****,
2101 * we match to a TGSI_SEMANTIC_GENERIC with index.
2102 *
2103 * The index can be anything UINT16 and usage_idx is BYTE,
2104 * so we can fit everything. It doesn't matter if indices
2105 * are close together or low.
2106 *
2107 *
2108 * POSITION >= 1: 10 * index + 7
2109 * COLOR >= 2: 10 * (index-1) + 8
2110 * FOG: 16
2111 * TEXCOORD[0..15]: index
2112 * BLENDWEIGHT: 10 * index + 19
2113 * BLENDINDICES: 10 * index + 20
2114 * NORMAL: 10 * index + 21
2115 * TANGENT: 10 * index + 22
2116 * BINORMAL: 10 * index + 23
2117 * TESSFACTOR: 10 * index + 24
2118 */
2119
2120 switch (dcl->usage) {
2121 case D3DDECLUSAGE_POSITION:
2122 case D3DDECLUSAGE_POSITIONT:
2123 case D3DDECLUSAGE_DEPTH:
2124 if (index == 0) {
2125 sem->Name = TGSI_SEMANTIC_POSITION;
2126 sem->Index = 0;
2127 } else {
2128 sem->Name = TGSI_SEMANTIC_GENERIC;
2129 sem->Index = 10 * index + 7;
2130 }
2131 break;
2132 case D3DDECLUSAGE_COLOR:
2133 if (index < 2) {
2134 sem->Name = TGSI_SEMANTIC_COLOR;
2135 sem->Index = index;
2136 } else {
2137 sem->Name = TGSI_SEMANTIC_GENERIC;
2138 sem->Index = 10 * (index-1) + 8;
2139 }
2140 break;
2141 case D3DDECLUSAGE_FOG:
2142 assert(index == 0);
2143 sem->Name = TGSI_SEMANTIC_GENERIC;
2144 sem->Index = 16;
2145 break;
2146 case D3DDECLUSAGE_PSIZE:
2147 assert(index == 0);
2148 sem->Name = TGSI_SEMANTIC_PSIZE;
2149 sem->Index = 0;
2150 break;
2151 case D3DDECLUSAGE_TEXCOORD:
2152 assert(index < 16);
2153 if (index < 8 && tc)
2154 sem->Name = TGSI_SEMANTIC_TEXCOORD;
2155 else
2156 sem->Name = TGSI_SEMANTIC_GENERIC;
2157 sem->Index = index;
2158 break;
2159 case D3DDECLUSAGE_BLENDWEIGHT:
2160 sem->Name = TGSI_SEMANTIC_GENERIC;
2161 sem->Index = 10 * index + 19;
2162 break;
2163 case D3DDECLUSAGE_BLENDINDICES:
2164 sem->Name = TGSI_SEMANTIC_GENERIC;
2165 sem->Index = 10 * index + 20;
2166 break;
2167 case D3DDECLUSAGE_NORMAL:
2168 sem->Name = TGSI_SEMANTIC_GENERIC;
2169 sem->Index = 10 * index + 21;
2170 break;
2171 case D3DDECLUSAGE_TANGENT:
2172 sem->Name = TGSI_SEMANTIC_GENERIC;
2173 sem->Index = 10 * index + 22;
2174 break;
2175 case D3DDECLUSAGE_BINORMAL:
2176 sem->Name = TGSI_SEMANTIC_GENERIC;
2177 sem->Index = 10 * index + 23;
2178 break;
2179 case D3DDECLUSAGE_TESSFACTOR:
2180 sem->Name = TGSI_SEMANTIC_GENERIC;
2181 sem->Index = 10 * index + 24;
2182 break;
2183 case D3DDECLUSAGE_SAMPLE:
2184 sem->Name = TGSI_SEMANTIC_COUNT;
2185 sem->Index = 0;
2186 break;
2187 default:
2188 unreachable("Invalid DECLUSAGE.");
2189 break;
2190 }
2191 }
2192
2193 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT)
2194 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT)
2195 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT)
2196 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT)
2197 static inline unsigned
d3dstt_to_tgsi_tex(BYTE sampler_type)2198 d3dstt_to_tgsi_tex(BYTE sampler_type)
2199 {
2200 switch (sampler_type) {
2201 case NINED3DSTT_1D: return TGSI_TEXTURE_1D;
2202 case NINED3DSTT_2D: return TGSI_TEXTURE_2D;
2203 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D;
2204 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE;
2205 default:
2206 assert(0);
2207 return TGSI_TEXTURE_UNKNOWN;
2208 }
2209 }
2210 static inline unsigned
d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)2211 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type)
2212 {
2213 switch (sampler_type) {
2214 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D;
2215 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D;
2216 case NINED3DSTT_VOLUME:
2217 case NINED3DSTT_CUBE:
2218 default:
2219 assert(0);
2220 return TGSI_TEXTURE_UNKNOWN;
2221 }
2222 }
2223 static inline unsigned
ps1x_sampler_type(const struct nine_shader_info * info,unsigned stage)2224 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage)
2225 {
2226 bool shadow = !!(info->sampler_mask_shadow & (1 << stage));
2227 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) {
2228 case 1: return shadow ? TGSI_TEXTURE_SHADOW1D : TGSI_TEXTURE_1D;
2229 case 0: return shadow ? TGSI_TEXTURE_SHADOW2D : TGSI_TEXTURE_2D;
2230 case 3: return TGSI_TEXTURE_3D;
2231 default:
2232 return TGSI_TEXTURE_CUBE;
2233 }
2234 }
2235
2236 static const char *
sm1_sampler_type_name(BYTE sampler_type)2237 sm1_sampler_type_name(BYTE sampler_type)
2238 {
2239 switch (sampler_type) {
2240 case NINED3DSTT_1D: return "1D";
2241 case NINED3DSTT_2D: return "2D";
2242 case NINED3DSTT_VOLUME: return "VOLUME";
2243 case NINED3DSTT_CUBE: return "CUBE";
2244 default:
2245 return "(D3DSTT_?)";
2246 }
2247 }
2248
2249 static inline unsigned
nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic * sem)2250 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem)
2251 {
2252 switch (sem->Name) {
2253 case TGSI_SEMANTIC_POSITION:
2254 case TGSI_SEMANTIC_NORMAL:
2255 return TGSI_INTERPOLATE_LINEAR;
2256 case TGSI_SEMANTIC_BCOLOR:
2257 case TGSI_SEMANTIC_COLOR:
2258 return TGSI_INTERPOLATE_COLOR;
2259 case TGSI_SEMANTIC_FOG:
2260 case TGSI_SEMANTIC_GENERIC:
2261 case TGSI_SEMANTIC_TEXCOORD:
2262 case TGSI_SEMANTIC_CLIPDIST:
2263 case TGSI_SEMANTIC_CLIPVERTEX:
2264 return TGSI_INTERPOLATE_PERSPECTIVE;
2265 case TGSI_SEMANTIC_EDGEFLAG:
2266 case TGSI_SEMANTIC_FACE:
2267 case TGSI_SEMANTIC_INSTANCEID:
2268 case TGSI_SEMANTIC_PCOORD:
2269 case TGSI_SEMANTIC_PRIMID:
2270 case TGSI_SEMANTIC_PSIZE:
2271 case TGSI_SEMANTIC_VERTEXID:
2272 return TGSI_INTERPOLATE_CONSTANT;
2273 default:
2274 assert(0);
2275 return TGSI_INTERPOLATE_CONSTANT;
2276 }
2277 }
2278
DECL_SPECIAL(DCL)2279 DECL_SPECIAL(DCL)
2280 {
2281 struct ureg_program *ureg = tx->ureg;
2282 bool is_input;
2283 bool is_sampler;
2284 struct tgsi_declaration_semantic tgsi;
2285 struct sm1_semantic sem;
2286 sm1_read_semantic(tx, &sem);
2287
2288 is_input = sem.reg.file == D3DSPR_INPUT;
2289 is_sampler =
2290 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER;
2291
2292 DUMP("DCL ");
2293 sm1_dump_dst_param(&sem.reg);
2294 if (is_sampler)
2295 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type));
2296 else
2297 if (tx->version.major >= 3)
2298 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx);
2299 else
2300 if (sem.usage | sem.usage_idx)
2301 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx);
2302 else
2303 DUMP("\n");
2304
2305 if (is_sampler) {
2306 const unsigned m = 1 << sem.reg.idx;
2307 ureg_DECL_sampler(ureg, sem.reg.idx);
2308 tx->info->sampler_mask |= m;
2309 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ?
2310 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) :
2311 d3dstt_to_tgsi_tex(sem.sampler_type);
2312 return D3D_OK;
2313 }
2314
2315 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem);
2316 if (IS_VS) {
2317 if (is_input) {
2318 /* linkage outside of shader with vertex declaration */
2319 ureg_DECL_vs_input(ureg, sem.reg.idx);
2320 assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map));
2321 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem);
2322 tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1);
2323 /* NOTE: preserving order in case of indirect access */
2324 } else
2325 if (tx->version.major >= 3) {
2326 /* SM2 output semantic determined by file */
2327 assert(sem.reg.mask != 0);
2328 if (sem.usage == D3DDECLUSAGE_POSITIONT)
2329 tx->info->position_t = true;
2330 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o));
2331 assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing");
2332 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked(
2333 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1);
2334 nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx);
2335 if ((tx->info->process_vertices || tx->info->clip_plane_emulation > 0) &&
2336 sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) {
2337 tx->regs.oPos_out = tx->regs.o[sem.reg.idx]; /* TODO: probably not good declare it twice */
2338 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2339 tx->regs.oPos = tx->regs.o[sem.reg.idx];
2340 }
2341
2342 if (tgsi.Name == TGSI_SEMANTIC_PSIZE) {
2343 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg);
2344 tx->regs.oPts = tx->regs.o[sem.reg.idx];
2345 }
2346 }
2347 } else {
2348 if (is_input && tx->version.major >= 3) {
2349 unsigned interp_flag;
2350 unsigned interp_location = 0;
2351 /* SM3 only, SM2 input semantic determined by file */
2352 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v));
2353 assert(ureg_src_is_undef(tx->regs.v[sem.reg.idx]) && "Nine doesn't support yet packing");
2354 /* PositionT and tessfactor forbidden */
2355 if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR)
2356 return D3DERR_INVALIDCALL;
2357
2358 if (tgsi.Name == TGSI_SEMANTIC_POSITION) {
2359 /* Position0 is forbidden (likely because vPos already does that) */
2360 if (sem.usage == D3DDECLUSAGE_POSITION)
2361 return D3DERR_INVALIDCALL;
2362 /* Following code is for depth */
2363 tx->regs.v[sem.reg.idx] = nine_get_position_input(tx);
2364 return D3D_OK;
2365 }
2366
2367 if (sem.reg.mod & NINED3DSPDM_CENTROID ||
2368 (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid))
2369 interp_location = TGSI_INTERPOLATE_LOC_CENTROID;
2370 interp_flag = nine_tgsi_to_interp_mode(&tgsi);
2371 /* We replace TGSI_INTERPOLATE_COLOR because some drivers don't support it,
2372 * and those who support it do the same replacement we do */
2373 if (interp_flag == TGSI_INTERPOLATE_COLOR)
2374 interp_flag = tx->info->color_flatshade ? TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
2375
2376 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_centroid(
2377 ureg, tgsi.Name, tgsi.Index,
2378 interp_flag,
2379 interp_location, 0, 1);
2380 } else
2381 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */
2382 /* FragColor or FragDepth */
2383 assert(sem.reg.mask != 0);
2384 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask,
2385 0, 1);
2386 }
2387 }
2388 return D3D_OK;
2389 }
2390
DECL_SPECIAL(DEF)2391 DECL_SPECIAL(DEF)
2392 {
2393 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f);
2394 return D3D_OK;
2395 }
2396
DECL_SPECIAL(DEFB)2397 DECL_SPECIAL(DEFB)
2398 {
2399 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b);
2400 return D3D_OK;
2401 }
2402
DECL_SPECIAL(DEFI)2403 DECL_SPECIAL(DEFI)
2404 {
2405 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i);
2406 return D3D_OK;
2407 }
2408
DECL_SPECIAL(POW)2409 DECL_SPECIAL(POW)
2410 {
2411 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2412 struct ureg_src src[2] = {
2413 tx_src_param(tx, &tx->insn.src[0]),
2414 tx_src_param(tx, &tx->insn.src[1])
2415 };
2416 /* Anything^0 is 1, including 0^0.
2417 * Assume mul_zero_wins drivers already have
2418 * this behaviour. Emulate for the others. */
2419 if (tx->mul_zero_wins) {
2420 ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]);
2421 } else {
2422 struct ureg_dst tmp = tx_scratch_scalar(tx);
2423 ureg_POW(tx->ureg, tmp, ureg_abs(src[0]), src[1]);
2424 ureg_CMP(tx->ureg, dst,
2425 ureg_negate(ureg_abs(ureg_scalar(src[1], TGSI_SWIZZLE_X))),
2426 tx_src_scalar(tmp), ureg_imm1f(tx->ureg, 1.0f));
2427 }
2428 return D3D_OK;
2429 }
2430
2431 /* Tests results on Win 10:
2432 * NV (NVIDIA GeForce GT 635M)
2433 * AMD (AMD Radeon HD 7730M)
2434 * INTEL (Intel(R) HD Graphics 4000)
2435 * PS2 and PS3:
2436 * RCP and RSQ can generate inf on NV and AMD.
2437 * RCP and RSQ are clamped on INTEL (+- FLT_MAX),
2438 * NV: log not clamped
2439 * AMD: log(0) is -FLT_MAX (but log(inf) is inf)
2440 * INTEL: log(0) is -FLT_MAX and log(inf) is 127
2441 * All devices have 0*anything = 0
2442 *
2443 * INTEL VS2 and VS3: same behaviour.
2444 * Some differences VS2 and VS3 for constants defined with inf/NaN.
2445 * While PS3, VS3 and PS2 keep NaN and Inf shader constants without change,
2446 * VS2 seems to clamp to zero (may be test failure).
2447 * AMD VS2: unknown, VS3: very likely behaviour of PS3
2448 * NV VS2 and VS3: very likely behaviour of PS3
2449 * For both, Inf in VS becomes NaN is PS
2450 * "Very likely" because the test was less extensive.
2451 *
2452 * Thus all clamping can be removed for shaders 2 and 3,
2453 * as long as 0*anything = 0.
2454 * Else clamps to enforce 0*anything = 0 (anything being then
2455 * neither inf or NaN, the user being unlikely to pass them
2456 * as constant).
2457 * The status for VS1 and PS1 is unknown.
2458 */
2459
DECL_SPECIAL(RCP)2460 DECL_SPECIAL(RCP)
2461 {
2462 struct ureg_program *ureg = tx->ureg;
2463 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2464 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2465 struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx);
2466 ureg_RCP(ureg, tmp, src);
2467 if (!tx->mul_zero_wins) {
2468 /* FLT_MAX has issues with Rayman */
2469 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX/2.f), ureg_src(tmp));
2470 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX/2.f), ureg_src(tmp));
2471 }
2472 return D3D_OK;
2473 }
2474
DECL_SPECIAL(RSQ)2475 DECL_SPECIAL(RSQ)
2476 {
2477 struct ureg_program *ureg = tx->ureg;
2478 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2479 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2480 struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx);
2481 ureg_RSQ(ureg, tmp, ureg_abs(src));
2482 if (!tx->mul_zero_wins)
2483 ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp));
2484 return D3D_OK;
2485 }
2486
DECL_SPECIAL(LOG)2487 DECL_SPECIAL(LOG)
2488 {
2489 struct ureg_program *ureg = tx->ureg;
2490 struct ureg_dst tmp = tx_scratch_scalar(tx);
2491 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2492 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2493 ureg_LG2(ureg, tmp, ureg_abs(src));
2494 if (tx->mul_zero_wins) {
2495 ureg_MOV(ureg, dst, tx_src_scalar(tmp));
2496 } else {
2497 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp));
2498 }
2499 return D3D_OK;
2500 }
2501
DECL_SPECIAL(LIT)2502 DECL_SPECIAL(LIT)
2503 {
2504 struct ureg_program *ureg = tx->ureg;
2505 struct ureg_dst tmp = tx_scratch(tx);
2506 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2507 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2508 ureg_LIT(ureg, tmp, src);
2509 /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9
2510 * states that dst.z is 0 when src.y <= 0. Gallium definition can assign
2511 * it 0^0 if src.w=0, which value is driver dependent. */
2512 ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
2513 ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)),
2514 ureg_src(tmp), ureg_imm1f(ureg, 0.0f));
2515 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp));
2516 return D3D_OK;
2517 }
2518
DECL_SPECIAL(NRM)2519 DECL_SPECIAL(NRM)
2520 {
2521 struct ureg_program *ureg = tx->ureg;
2522 struct ureg_dst tmp = tx_scratch_scalar(tx);
2523 struct ureg_src nrm = tx_src_scalar(tmp);
2524 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2525 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2526 ureg_DP3(ureg, tmp, src, src);
2527 ureg_RSQ(ureg, tmp, nrm);
2528 if (!tx->mul_zero_wins)
2529 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm);
2530 ureg_MUL(ureg, dst, src, nrm);
2531 return D3D_OK;
2532 }
2533
DECL_SPECIAL(DP2ADD)2534 DECL_SPECIAL(DP2ADD)
2535 {
2536 struct ureg_dst tmp = tx_scratch_scalar(tx);
2537 struct ureg_src dp2 = tx_src_scalar(tmp);
2538 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2539 struct ureg_src src[3];
2540 int i;
2541 for (i = 0; i < 3; ++i)
2542 src[i] = tx_src_param(tx, &tx->insn.src[i]);
2543 assert_replicate_swizzle(&src[2]);
2544
2545 ureg_DP2(tx->ureg, tmp, src[0], src[1]);
2546 ureg_ADD(tx->ureg, dst, src[2], dp2);
2547
2548 return D3D_OK;
2549 }
2550
DECL_SPECIAL(TEXCOORD)2551 DECL_SPECIAL(TEXCOORD)
2552 {
2553 struct ureg_program *ureg = tx->ureg;
2554 const unsigned s = tx->insn.dst[0].idx;
2555 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2556
2557 tx_texcoord_alloc(tx, s);
2558 ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]);
2559 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f));
2560
2561 return D3D_OK;
2562 }
2563
DECL_SPECIAL(TEXCOORD_ps14)2564 DECL_SPECIAL(TEXCOORD_ps14)
2565 {
2566 struct ureg_program *ureg = tx->ureg;
2567 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
2568 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2569
2570 assert(tx->insn.src[0].file == D3DSPR_TEXTURE);
2571
2572 ureg_MOV(ureg, dst, src);
2573
2574 return D3D_OK;
2575 }
2576
DECL_SPECIAL(TEXKILL)2577 DECL_SPECIAL(TEXKILL)
2578 {
2579 struct ureg_src reg;
2580
2581 if (tx->version.major > 1 || tx->version.minor > 3) {
2582 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]);
2583 } else {
2584 tx_texcoord_alloc(tx, tx->insn.dst[0].idx);
2585 reg = tx->regs.vT[tx->insn.dst[0].idx];
2586 }
2587 if (tx->version.major < 2)
2588 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z));
2589 ureg_KILL_IF(tx->ureg, reg);
2590
2591 return D3D_OK;
2592 }
2593
DECL_SPECIAL(TEXBEM)2594 DECL_SPECIAL(TEXBEM)
2595 {
2596 struct ureg_program *ureg = tx->ureg;
2597 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2598 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2599 struct ureg_dst tmp, tmp2, texcoord;
2600 struct ureg_src sample, m00, m01, m10, m11, c8m, c16m2;
2601 struct ureg_src bumpenvlscale, bumpenvloffset;
2602 const int m = tx->insn.dst[0].idx;
2603
2604 assert(tx->version.major == 1);
2605
2606 sample = ureg_DECL_sampler(ureg, m);
2607 tx->info->sampler_mask |= 1 << m;
2608
2609 tx_texcoord_alloc(tx, m);
2610
2611 tmp = tx_scratch(tx);
2612 tmp2 = tx_scratch(tx);
2613 texcoord = tx_scratch(tx);
2614 /*
2615 * Bump-env-matrix:
2616 * 00 is X
2617 * 01 is Y
2618 * 10 is Z
2619 * 11 is W
2620 */
2621 c8m = nine_special_constant_src(tx, m);
2622 c16m2 = nine_special_constant_src(tx, 8+m/2);
2623
2624 m00 = NINE_APPLY_SWIZZLE(c8m, X);
2625 m01 = NINE_APPLY_SWIZZLE(c8m, Y);
2626 m10 = NINE_APPLY_SWIZZLE(c8m, Z);
2627 m11 = NINE_APPLY_SWIZZLE(c8m, W);
2628
2629 /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */
2630 if (m % 2 == 0) {
2631 bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, X);
2632 bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, Y);
2633 } else {
2634 bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, Z);
2635 bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, W);
2636 }
2637
2638 apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m);
2639
2640 /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */
2641 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2642 NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord));
2643 /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */
2644 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2645 NINE_APPLY_SWIZZLE(src, Y),
2646 NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2647
2648 /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */
2649 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2650 NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord));
2651 /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/
2652 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2653 NINE_APPLY_SWIZZLE(src, Y),
2654 NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2655
2656 /* Now the texture coordinates are in tmp.xy */
2657
2658 if (tx->insn.opcode == D3DSIO_TEXBEM) {
2659 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2660 } else if (tx->insn.opcode == D3DSIO_TEXBEML) {
2661 /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */
2662 ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2663 ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(src, Z),
2664 bumpenvlscale, bumpenvloffset);
2665 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2));
2666 }
2667
2668 tx->info->bumpenvmat_needed = 1;
2669
2670 return D3D_OK;
2671 }
2672
DECL_SPECIAL(TEXREG2AR)2673 DECL_SPECIAL(TEXREG2AR)
2674 {
2675 struct ureg_program *ureg = tx->ureg;
2676 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2677 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2678 struct ureg_src sample;
2679 const int m = tx->insn.dst[0].idx;
2680 ASSERTED const int n = tx->insn.src[0].idx;
2681 assert(m >= 0 && m > n);
2682
2683 sample = ureg_DECL_sampler(ureg, m);
2684 tx->info->sampler_mask |= 1 << m;
2685 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(W,X,X,X)), sample);
2686
2687 return D3D_OK;
2688 }
2689
DECL_SPECIAL(TEXREG2GB)2690 DECL_SPECIAL(TEXREG2GB)
2691 {
2692 struct ureg_program *ureg = tx->ureg;
2693 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2694 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2695 struct ureg_src sample;
2696 const int m = tx->insn.dst[0].idx;
2697 ASSERTED const int n = tx->insn.src[0].idx;
2698 assert(m >= 0 && m > n);
2699
2700 sample = ureg_DECL_sampler(ureg, m);
2701 tx->info->sampler_mask |= 1 << m;
2702 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(Y,Z,Z,Z)), sample);
2703
2704 return D3D_OK;
2705 }
2706
DECL_SPECIAL(TEXM3x2PAD)2707 DECL_SPECIAL(TEXM3x2PAD)
2708 {
2709 return D3D_OK; /* this is just padding */
2710 }
2711
DECL_SPECIAL(TEXM3x2TEX)2712 DECL_SPECIAL(TEXM3x2TEX)
2713 {
2714 struct ureg_program *ureg = tx->ureg;
2715 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2716 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2717 struct ureg_src sample;
2718 const int m = tx->insn.dst[0].idx - 1;
2719 ASSERTED const int n = tx->insn.src[0].idx;
2720 assert(m >= 0 && m > n);
2721
2722 tx_texcoord_alloc(tx, m);
2723 tx_texcoord_alloc(tx, m+1);
2724
2725 /* performs the matrix multiplication */
2726 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2727 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2728
2729 sample = ureg_DECL_sampler(ureg, m + 1);
2730 tx->info->sampler_mask |= 1 << (m + 1);
2731 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample);
2732
2733 return D3D_OK;
2734 }
2735
DECL_SPECIAL(TEXM3x3PAD)2736 DECL_SPECIAL(TEXM3x3PAD)
2737 {
2738 return D3D_OK; /* this is just padding */
2739 }
2740
DECL_SPECIAL(TEXM3x3SPEC)2741 DECL_SPECIAL(TEXM3x3SPEC)
2742 {
2743 struct ureg_program *ureg = tx->ureg;
2744 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2745 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2746 struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]);
2747 struct ureg_src sample;
2748 struct ureg_dst tmp;
2749 const int m = tx->insn.dst[0].idx - 2;
2750 ASSERTED const int n = tx->insn.src[0].idx;
2751 assert(m >= 0 && m > n);
2752
2753 tx_texcoord_alloc(tx, m);
2754 tx_texcoord_alloc(tx, m+1);
2755 tx_texcoord_alloc(tx, m+2);
2756
2757 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2758 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2759 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src);
2760
2761 sample = ureg_DECL_sampler(ureg, m + 2);
2762 tx->info->sampler_mask |= 1 << (m + 2);
2763 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2764
2765 /* At this step, dst = N = (u', w', z').
2766 * We want dst to be the texture sampled at (u'', w'', z''), with
2767 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2768 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2769 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2770 /* at this step tmp.x = 1/N.N */
2771 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E);
2772 /* at this step tmp.y = N.E */
2773 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2774 /* at this step tmp.x = N.E/N.N */
2775 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2776 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2777 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2778 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(E));
2779 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2780
2781 return D3D_OK;
2782 }
2783
DECL_SPECIAL(TEXREG2RGB)2784 DECL_SPECIAL(TEXREG2RGB)
2785 {
2786 struct ureg_program *ureg = tx->ureg;
2787 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2788 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2789 struct ureg_src sample;
2790 const int m = tx->insn.dst[0].idx;
2791 ASSERTED const int n = tx->insn.src[0].idx;
2792 assert(m >= 0 && m > n);
2793
2794 sample = ureg_DECL_sampler(ureg, m);
2795 tx->info->sampler_mask |= 1 << m;
2796 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), src, sample);
2797
2798 return D3D_OK;
2799 }
2800
DECL_SPECIAL(TEXDP3TEX)2801 DECL_SPECIAL(TEXDP3TEX)
2802 {
2803 struct ureg_program *ureg = tx->ureg;
2804 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2805 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2806 struct ureg_dst tmp;
2807 struct ureg_src sample;
2808 const int m = tx->insn.dst[0].idx;
2809 ASSERTED const int n = tx->insn.src[0].idx;
2810 assert(m >= 0 && m > n);
2811
2812 tx_texcoord_alloc(tx, m);
2813
2814 tmp = tx_scratch(tx);
2815 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2816 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f));
2817
2818 sample = ureg_DECL_sampler(ureg, m);
2819 tx->info->sampler_mask |= 1 << m;
2820 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample);
2821
2822 return D3D_OK;
2823 }
2824
DECL_SPECIAL(TEXM3x2DEPTH)2825 DECL_SPECIAL(TEXM3x2DEPTH)
2826 {
2827 struct ureg_program *ureg = tx->ureg;
2828 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2829 struct ureg_dst tmp;
2830 const int m = tx->insn.dst[0].idx - 1;
2831 ASSERTED const int n = tx->insn.src[0].idx;
2832 assert(m >= 0 && m > n);
2833
2834 tx_texcoord_alloc(tx, m);
2835 tx_texcoord_alloc(tx, m+1);
2836
2837 tmp = tx_scratch(tx);
2838
2839 /* performs the matrix multiplication */
2840 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2841 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2842
2843 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2844 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */
2845 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z));
2846 /* res = 'w' == 0 ? 1.0 : z/w */
2847 ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))),
2848 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f));
2849 /* replace the depth for depth testing with the result */
2850 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2851 TGSI_WRITEMASK_Z, 0, 1);
2852 ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2853 /* note that we write nothing to the destination, since it's disallowed to use it afterward */
2854 return D3D_OK;
2855 }
2856
DECL_SPECIAL(TEXDP3)2857 DECL_SPECIAL(TEXDP3)
2858 {
2859 struct ureg_program *ureg = tx->ureg;
2860 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2861 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2862 const int m = tx->insn.dst[0].idx;
2863 ASSERTED const int n = tx->insn.src[0].idx;
2864 assert(m >= 0 && m > n);
2865
2866 tx_texcoord_alloc(tx, m);
2867
2868 ureg_DP3(ureg, dst, tx->regs.vT[m], src);
2869
2870 return D3D_OK;
2871 }
2872
DECL_SPECIAL(TEXM3x3)2873 DECL_SPECIAL(TEXM3x3)
2874 {
2875 struct ureg_program *ureg = tx->ureg;
2876 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2877 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */
2878 struct ureg_src sample;
2879 struct ureg_dst E, tmp;
2880 const int m = tx->insn.dst[0].idx - 2;
2881 ASSERTED const int n = tx->insn.src[0].idx;
2882 assert(m >= 0 && m > n);
2883
2884 tx_texcoord_alloc(tx, m);
2885 tx_texcoord_alloc(tx, m+1);
2886 tx_texcoord_alloc(tx, m+2);
2887
2888 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src);
2889 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src);
2890 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src);
2891
2892 switch (tx->insn.opcode) {
2893 case D3DSIO_TEXM3x3:
2894 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
2895 break;
2896 case D3DSIO_TEXM3x3TEX:
2897 sample = ureg_DECL_sampler(ureg, m + 2);
2898 tx->info->sampler_mask |= 1 << (m + 2);
2899 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample);
2900 break;
2901 case D3DSIO_TEXM3x3VSPEC:
2902 sample = ureg_DECL_sampler(ureg, m + 2);
2903 tx->info->sampler_mask |= 1 << (m + 2);
2904 E = tx_scratch(tx);
2905 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ);
2906 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W));
2907 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W));
2908 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W));
2909 /* At this step, dst = N = (u', w', z').
2910 * We want dst to be the texture sampled at (u'', w'', z''), with
2911 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */
2912 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst));
2913 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X));
2914 /* at this step tmp.x = 1/N.N */
2915 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E));
2916 /* at this step tmp.y = N.E */
2917 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y));
2918 /* at this step tmp.x = N.E/N.N */
2919 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f));
2920 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst));
2921 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */
2922 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(ureg_src(E)));
2923 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample);
2924 break;
2925 default:
2926 return D3DERR_INVALIDCALL;
2927 }
2928 return D3D_OK;
2929 }
2930
DECL_SPECIAL(TEXDEPTH)2931 DECL_SPECIAL(TEXDEPTH)
2932 {
2933 struct ureg_program *ureg = tx->ureg;
2934 struct ureg_dst r5;
2935 struct ureg_src r5r, r5g;
2936
2937 assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */
2938
2939 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g.
2940 * r5 won't be used afterward, thus we can use r5.ba */
2941 r5 = tx->regs.r[5];
2942 r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X);
2943 r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y);
2944
2945 ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g);
2946 ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z));
2947 /* r5.r = r/g */
2948 ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)),
2949 r5r, ureg_imm1f(ureg, 1.0f));
2950 /* replace the depth for depth testing with the result */
2951 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0,
2952 TGSI_WRITEMASK_Z, 0, 1);
2953 ureg_MOV(ureg, tx->regs.oDepth, r5r);
2954
2955 return D3D_OK;
2956 }
2957
DECL_SPECIAL(BEM)2958 DECL_SPECIAL(BEM)
2959 {
2960 struct ureg_program *ureg = tx->ureg;
2961 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
2962 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]);
2963 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]);
2964 struct ureg_src m00, m01, m10, m11, c8m;
2965 const int m = tx->insn.dst[0].idx;
2966 struct ureg_dst tmp = tx_scratch(tx);
2967 /*
2968 * Bump-env-matrix:
2969 * 00 is X
2970 * 01 is Y
2971 * 10 is Z
2972 * 11 is W
2973 */
2974 c8m = nine_special_constant_src(tx, m);
2975 m00 = NINE_APPLY_SWIZZLE(c8m, X);
2976 m01 = NINE_APPLY_SWIZZLE(c8m, Y);
2977 m10 = NINE_APPLY_SWIZZLE(c8m, Z);
2978 m11 = NINE_APPLY_SWIZZLE(c8m, W);
2979 /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */
2980 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00,
2981 NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X));
2982 /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */
2983 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10,
2984 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X));
2985
2986 /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */
2987 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01,
2988 NINE_APPLY_SWIZZLE(src1, X), src0);
2989 /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */
2990 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11,
2991 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y));
2992 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp));
2993
2994 tx->info->bumpenvmat_needed = 1;
2995
2996 return D3D_OK;
2997 }
2998
DECL_SPECIAL(TEXLD)2999 DECL_SPECIAL(TEXLD)
3000 {
3001 struct ureg_program *ureg = tx->ureg;
3002 unsigned target;
3003 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3004 struct ureg_src src[2] = {
3005 tx_src_param(tx, &tx->insn.src[0]),
3006 tx_src_param(tx, &tx->insn.src[1])
3007 };
3008 assert(tx->insn.src[1].idx >= 0 &&
3009 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
3010 target = tx->sampler_targets[tx->insn.src[1].idx];
3011
3012 if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
3013 return D3D_OK;
3014
3015 switch (tx->insn.flags) {
3016 case 0:
3017 ureg_TEX(ureg, dst, target, src[0], src[1]);
3018 break;
3019 case NINED3DSI_TEXLD_PROJECT:
3020 ureg_TXP(ureg, dst, target, src[0], src[1]);
3021 break;
3022 case NINED3DSI_TEXLD_BIAS:
3023 ureg_TXB(ureg, dst, target, src[0], src[1]);
3024 break;
3025 default:
3026 assert(0);
3027 return D3DERR_INVALIDCALL;
3028 }
3029 return D3D_OK;
3030 }
3031
DECL_SPECIAL(TEXLD_14)3032 DECL_SPECIAL(TEXLD_14)
3033 {
3034 struct ureg_program *ureg = tx->ureg;
3035 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3036 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
3037 const unsigned s = tx->insn.dst[0].idx;
3038 const unsigned t = ps1x_sampler_type(tx->info, s);
3039
3040 tx->info->sampler_mask |= 1 << s;
3041 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s));
3042
3043 return D3D_OK;
3044 }
3045
DECL_SPECIAL(TEX)3046 DECL_SPECIAL(TEX)
3047 {
3048 struct ureg_program *ureg = tx->ureg;
3049 const unsigned s = tx->insn.dst[0].idx;
3050 const unsigned t = ps1x_sampler_type(tx->info, s);
3051 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3052 struct ureg_src src[2];
3053
3054 tx_texcoord_alloc(tx, s);
3055
3056 src[0] = tx->regs.vT[s];
3057 src[1] = ureg_DECL_sampler(ureg, s);
3058 tx->info->sampler_mask |= 1 << s;
3059
3060 TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s);
3061
3062 return D3D_OK;
3063 }
3064
DECL_SPECIAL(TEXLDD)3065 DECL_SPECIAL(TEXLDD)
3066 {
3067 unsigned target;
3068 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3069 struct ureg_src src[4] = {
3070 tx_src_param(tx, &tx->insn.src[0]),
3071 tx_src_param(tx, &tx->insn.src[1]),
3072 tx_src_param(tx, &tx->insn.src[2]),
3073 tx_src_param(tx, &tx->insn.src[3])
3074 };
3075 assert(tx->insn.src[1].idx >= 0 &&
3076 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
3077 target = tx->sampler_targets[tx->insn.src[1].idx];
3078
3079 if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
3080 return D3D_OK;
3081
3082 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]);
3083 return D3D_OK;
3084 }
3085
DECL_SPECIAL(TEXLDL)3086 DECL_SPECIAL(TEXLDL)
3087 {
3088 unsigned target;
3089 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3090 struct ureg_src src[2] = {
3091 tx_src_param(tx, &tx->insn.src[0]),
3092 tx_src_param(tx, &tx->insn.src[1])
3093 };
3094 assert(tx->insn.src[1].idx >= 0 &&
3095 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets));
3096 target = tx->sampler_targets[tx->insn.src[1].idx];
3097
3098 if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx))
3099 return D3D_OK;
3100
3101 ureg_TXL(tx->ureg, dst, target, src[0], src[1]);
3102 return D3D_OK;
3103 }
3104
DECL_SPECIAL(SETP)3105 DECL_SPECIAL(SETP)
3106 {
3107 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags);
3108 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]);
3109 struct ureg_src src[2] = {
3110 tx_src_param(tx, &tx->insn.src[0]),
3111 tx_src_param(tx, &tx->insn.src[1])
3112 };
3113 ureg_insn(tx->ureg, cmp_op, &dst, 1, src, 2, 0);
3114 return D3D_OK;
3115 }
3116
DECL_SPECIAL(BREAKP)3117 DECL_SPECIAL(BREAKP)
3118 {
3119 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]);
3120 ureg_IF(tx->ureg, src, tx_cond(tx));
3121 ureg_BRK(tx->ureg);
3122 tx_endcond(tx);
3123 ureg_ENDIF(tx->ureg);
3124 return D3D_OK;
3125 }
3126
DECL_SPECIAL(PHASE)3127 DECL_SPECIAL(PHASE)
3128 {
3129 return D3D_OK; /* we don't care about phase */
3130 }
3131
DECL_SPECIAL(COMMENT)3132 DECL_SPECIAL(COMMENT)
3133 {
3134 return D3D_OK; /* nothing to do */
3135 }
3136
3137
3138 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \
3139 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h }
3140
3141 static const struct sm1_op_info inst_table[] =
3142 {
3143 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP)), /* 0 */
3144 _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
3145 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */
3146 _OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */
3147 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */
3148 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */
3149 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RCP)), /* 6 */
3150 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */
3151 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */
3152 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */
3153 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */
3154 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */
3155 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */
3156 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */
3157 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */
3158 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */
3159 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */
3160 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */
3161 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */
3162 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */
3163
3164 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)),
3165 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)),
3166 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)),
3167 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)),
3168 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)),
3169
3170 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)),
3171 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)),
3172 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)),
3173 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)),
3174 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)),
3175 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)),
3176
3177 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)),
3178
3179 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)),
3180 _OPI(CRS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(XPD)), /* XXX: .w */
3181 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */
3182 _OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)),
3183 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */
3184
3185 _OPI(SINCOS, NOP, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
3186 _OPI(SINCOS, NOP, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),
3187
3188 /* More flow control */
3189 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)),
3190 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)),
3191 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)),
3192 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)),
3193 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)),
3194 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)),
3195 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL),
3196 _OPI(BREAKC, NOP, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)),
3197 /* we don't write to the address register, but a normal register (copied
3198 * when needed to the address register), thus we don't use ARR */
3199 _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
3200
3201 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)),
3202 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)),
3203
3204 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)),
3205 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)),
3206 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)),
3207 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)),
3208 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)),
3209 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)),
3210 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
3211 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)),
3212 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)),
3213 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)),
3214 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)),
3215 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)),
3216 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)),
3217 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3218 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)),
3219 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3220
3221 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
3222 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
3223 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)),
3224 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
3225
3226 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
3227
3228 /* More tex stuff */
3229 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)),
3230 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)),
3231 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)),
3232 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)),
3233 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)),
3234 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)),
3235
3236 /* Misc */
3237 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */
3238 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)),
3239 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)),
3240 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
3241 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL),
3242 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)),
3243 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)),
3244 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)),
3245 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP))
3246 };
3247
3248 static const struct sm1_op_info inst_phase =
3249 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE));
3250
3251 static const struct sm1_op_info inst_comment =
3252 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT));
3253
3254 static void
create_op_info_map(struct shader_translator * tx)3255 create_op_info_map(struct shader_translator *tx)
3256 {
3257 const unsigned version = (tx->version.major << 8) | tx->version.minor;
3258 unsigned i;
3259
3260 for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i)
3261 tx->op_info_map[i] = -1;
3262
3263 if (tx->processor == PIPE_SHADER_VERTEX) {
3264 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3265 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3266 if (inst_table[i].vert_version.min <= version &&
3267 inst_table[i].vert_version.max >= version)
3268 tx->op_info_map[inst_table[i].sio] = i;
3269 }
3270 } else {
3271 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) {
3272 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map));
3273 if (inst_table[i].frag_version.min <= version &&
3274 inst_table[i].frag_version.max >= version)
3275 tx->op_info_map[inst_table[i].sio] = i;
3276 }
3277 }
3278 }
3279
3280 static inline HRESULT
NineTranslateInstruction_Generic(struct shader_translator * tx)3281 NineTranslateInstruction_Generic(struct shader_translator *tx)
3282 {
3283 struct ureg_dst dst[1];
3284 struct ureg_src src[4];
3285 unsigned i;
3286
3287 for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i)
3288 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]);
3289 for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i)
3290 src[i] = tx_src_param(tx, &tx->insn.src[i]);
3291
3292 ureg_insn(tx->ureg, tx->insn.info->opcode,
3293 dst, tx->insn.ndst,
3294 src, tx->insn.nsrc, 0);
3295 return D3D_OK;
3296 }
3297
3298 static inline DWORD
TOKEN_PEEK(struct shader_translator * tx)3299 TOKEN_PEEK(struct shader_translator *tx)
3300 {
3301 return *(tx->parse);
3302 }
3303
3304 static inline DWORD
TOKEN_NEXT(struct shader_translator * tx)3305 TOKEN_NEXT(struct shader_translator *tx)
3306 {
3307 return *(tx->parse)++;
3308 }
3309
3310 static inline void
TOKEN_JUMP(struct shader_translator * tx)3311 TOKEN_JUMP(struct shader_translator *tx)
3312 {
3313 if (tx->parse_next && tx->parse != tx->parse_next) {
3314 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next);
3315 tx->parse = tx->parse_next;
3316 }
3317 }
3318
3319 static inline bool
sm1_parse_eof(struct shader_translator * tx)3320 sm1_parse_eof(struct shader_translator *tx)
3321 {
3322 return TOKEN_PEEK(tx) == NINED3DSP_END;
3323 }
3324
3325 static void
sm1_read_version(struct shader_translator * tx)3326 sm1_read_version(struct shader_translator *tx)
3327 {
3328 const DWORD tok = TOKEN_NEXT(tx);
3329
3330 tx->version.major = D3DSHADER_VERSION_MAJOR(tok);
3331 tx->version.minor = D3DSHADER_VERSION_MINOR(tok);
3332
3333 switch (tok >> 16) {
3334 case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break;
3335 case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break;
3336 default:
3337 DBG("Invalid shader type: %x\n", tok);
3338 tx->processor = ~0;
3339 break;
3340 }
3341 }
3342
3343 /* This is just to check if we parsed the instruction properly. */
3344 static void
sm1_parse_get_skip(struct shader_translator * tx)3345 sm1_parse_get_skip(struct shader_translator *tx)
3346 {
3347 const DWORD tok = TOKEN_PEEK(tx);
3348
3349 if (tx->version.major >= 2) {
3350 tx->parse_next = tx->parse + 1 /* this */ +
3351 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT);
3352 } else {
3353 tx->parse_next = NULL; /* TODO: determine from param count */
3354 }
3355 }
3356
3357 static void
sm1_print_comment(const char * comment,UINT size)3358 sm1_print_comment(const char *comment, UINT size)
3359 {
3360 if (!size)
3361 return;
3362 /* TODO */
3363 }
3364
3365 static void
sm1_parse_comments(struct shader_translator * tx,BOOL print)3366 sm1_parse_comments(struct shader_translator *tx, BOOL print)
3367 {
3368 DWORD tok = TOKEN_PEEK(tx);
3369
3370 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT)
3371 {
3372 const char *comment = "";
3373 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT;
3374 tx->parse += size + 1;
3375
3376 if (print)
3377 sm1_print_comment(comment, size);
3378
3379 tok = TOKEN_PEEK(tx);
3380 }
3381 }
3382
3383 static void
sm1_parse_get_param(struct shader_translator * tx,DWORD * reg,DWORD * rel)3384 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel)
3385 {
3386 *reg = TOKEN_NEXT(tx);
3387
3388 if (*reg & D3DSHADER_ADDRMODE_RELATIVE)
3389 {
3390 if (tx->version.major < 2)
3391 *rel = (1 << 31) |
3392 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) |
3393 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) |
3394 D3DSP_NOSWIZZLE;
3395 else
3396 *rel = TOKEN_NEXT(tx);
3397 }
3398 }
3399
3400 static void
sm1_parse_dst_param(struct sm1_dst_param * dst,DWORD tok)3401 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok)
3402 {
3403 int8_t shift;
3404 dst->file =
3405 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT |
3406 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2;
3407 dst->type = TGSI_RETURN_TYPE_FLOAT;
3408 dst->idx = tok & D3DSP_REGNUM_MASK;
3409 dst->rel = NULL;
3410 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT;
3411 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT;
3412 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT;
3413 dst->shift = (shift & 0x7) - (shift & 0x8);
3414 }
3415
3416 static void
sm1_parse_src_param(struct sm1_src_param * src,DWORD tok)3417 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok)
3418 {
3419 src->file =
3420 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) |
3421 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2);
3422 src->type = TGSI_RETURN_TYPE_FLOAT;
3423 src->idx = tok & D3DSP_REGNUM_MASK;
3424 src->rel = NULL;
3425 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT;
3426 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT;
3427
3428 switch (src->file) {
3429 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break;
3430 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break;
3431 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break;
3432 default:
3433 break;
3434 }
3435 }
3436
3437 static void
sm1_parse_immediate(struct shader_translator * tx,struct sm1_src_param * imm)3438 sm1_parse_immediate(struct shader_translator *tx,
3439 struct sm1_src_param *imm)
3440 {
3441 imm->file = NINED3DSPR_IMMEDIATE;
3442 imm->idx = INT_MIN;
3443 imm->rel = NULL;
3444 imm->swizzle = NINED3DSP_NOSWIZZLE;
3445 imm->mod = 0;
3446 switch (tx->insn.opcode) {
3447 case D3DSIO_DEF:
3448 imm->type = NINED3DSPTYPE_FLOAT4;
3449 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3450 tx->parse += 4;
3451 break;
3452 case D3DSIO_DEFI:
3453 imm->type = NINED3DSPTYPE_INT4;
3454 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD));
3455 tx->parse += 4;
3456 break;
3457 case D3DSIO_DEFB:
3458 imm->type = NINED3DSPTYPE_BOOL;
3459 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD));
3460 tx->parse += 1;
3461 break;
3462 default:
3463 assert(0);
3464 break;
3465 }
3466 }
3467
3468 static void
sm1_read_dst_param(struct shader_translator * tx,struct sm1_dst_param * dst,struct sm1_src_param * rel)3469 sm1_read_dst_param(struct shader_translator *tx,
3470 struct sm1_dst_param *dst,
3471 struct sm1_src_param *rel)
3472 {
3473 DWORD tok_dst, tok_rel = 0;
3474
3475 sm1_parse_get_param(tx, &tok_dst, &tok_rel);
3476 sm1_parse_dst_param(dst, tok_dst);
3477 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) {
3478 sm1_parse_src_param(rel, tok_rel);
3479 dst->rel = rel;
3480 }
3481 }
3482
3483 static void
sm1_read_src_param(struct shader_translator * tx,struct sm1_src_param * src,struct sm1_src_param * rel)3484 sm1_read_src_param(struct shader_translator *tx,
3485 struct sm1_src_param *src,
3486 struct sm1_src_param *rel)
3487 {
3488 DWORD tok_src, tok_rel = 0;
3489
3490 sm1_parse_get_param(tx, &tok_src, &tok_rel);
3491 sm1_parse_src_param(src, tok_src);
3492 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) {
3493 assert(rel);
3494 sm1_parse_src_param(rel, tok_rel);
3495 src->rel = rel;
3496 }
3497 }
3498
3499 static void
sm1_read_semantic(struct shader_translator * tx,struct sm1_semantic * sem)3500 sm1_read_semantic(struct shader_translator *tx,
3501 struct sm1_semantic *sem)
3502 {
3503 const DWORD tok_usg = TOKEN_NEXT(tx);
3504 const DWORD tok_dst = TOKEN_NEXT(tx);
3505
3506 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT;
3507 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT;
3508 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT;
3509
3510 sm1_parse_dst_param(&sem->reg, tok_dst);
3511 }
3512
3513 static void
sm1_parse_instruction(struct shader_translator * tx)3514 sm1_parse_instruction(struct shader_translator *tx)
3515 {
3516 struct sm1_instruction *insn = &tx->insn;
3517 HRESULT hr;
3518 DWORD tok;
3519 const struct sm1_op_info *info = NULL;
3520 unsigned i;
3521
3522 sm1_parse_comments(tx, true);
3523 sm1_parse_get_skip(tx);
3524
3525 tok = TOKEN_NEXT(tx);
3526
3527 insn->opcode = tok & D3DSI_OPCODE_MASK;
3528 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT;
3529 insn->coissue = !!(tok & D3DSI_COISSUE);
3530 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED);
3531
3532 if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) {
3533 int k = tx->op_info_map[insn->opcode];
3534 if (k >= 0) {
3535 assert(k < ARRAY_SIZE(inst_table));
3536 info = &inst_table[k];
3537 }
3538 } else {
3539 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase;
3540 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment;
3541 }
3542 if (!info) {
3543 DBG("illegal or unhandled opcode: %08x\n", insn->opcode);
3544 TOKEN_JUMP(tx);
3545 return;
3546 }
3547 insn->info = info;
3548 insn->ndst = info->ndst;
3549 insn->nsrc = info->nsrc;
3550
3551 /* check version */
3552 {
3553 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min;
3554 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max;
3555 unsigned ver = (tx->version.major << 8) | tx->version.minor;
3556 if (ver < min || ver > max) {
3557 DBG("opcode not supported in this shader version: %x <= %x <= %x\n",
3558 min, ver, max);
3559 return;
3560 }
3561 }
3562
3563 for (i = 0; i < insn->ndst; ++i)
3564 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]);
3565 if (insn->predicated)
3566 sm1_read_src_param(tx, &insn->pred, NULL);
3567 for (i = 0; i < insn->nsrc; ++i)
3568 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]);
3569
3570 /* parse here so we can dump them before processing */
3571 if (insn->opcode == D3DSIO_DEF ||
3572 insn->opcode == D3DSIO_DEFI ||
3573 insn->opcode == D3DSIO_DEFB)
3574 sm1_parse_immediate(tx, &tx->insn.src[0]);
3575
3576 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth);
3577 sm1_instruction_check(insn);
3578
3579 if (insn->predicated) {
3580 tx->predicated_activated = true;
3581 if (ureg_dst_is_undef(tx->regs.predicate_tmp)) {
3582 tx->regs.predicate_tmp = ureg_DECL_temporary(tx->ureg);
3583 tx->regs.predicate_dst = ureg_DECL_temporary(tx->ureg);
3584 }
3585 }
3586
3587 if (info->handler)
3588 hr = info->handler(tx);
3589 else
3590 hr = NineTranslateInstruction_Generic(tx);
3591 tx_apply_dst0_modifiers(tx);
3592
3593 if (insn->predicated) {
3594 tx->predicated_activated = false;
3595 /* TODO: predicate might be allowed on outputs,
3596 * which cannot be src. Workaround it. */
3597 ureg_CMP(tx->ureg, tx->regs.predicate_dst,
3598 ureg_negate(tx_src_param(tx, &insn->pred)),
3599 ureg_src(tx->regs.predicate_tmp),
3600 ureg_src(tx->regs.predicate_dst));
3601 }
3602
3603 if (hr != D3D_OK)
3604 tx->failure = true;
3605 tx->num_scratch = 0; /* reset */
3606
3607 TOKEN_JUMP(tx);
3608 }
3609
3610 #define GET_CAP(n) screen->get_param( \
3611 screen, PIPE_CAP_##n)
3612 #define GET_SHADER_CAP(n) screen->get_shader_param( \
3613 screen, info->type, PIPE_SHADER_CAP_##n)
3614
3615 static HRESULT
tx_ctor(struct shader_translator * tx,struct pipe_screen * screen,struct nine_shader_info * info)3616 tx_ctor(struct shader_translator *tx, struct pipe_screen *screen, struct nine_shader_info *info)
3617 {
3618 unsigned i;
3619
3620 memset(tx, 0, sizeof(*tx));
3621
3622 tx->info = info;
3623
3624 tx->byte_code = info->byte_code;
3625 tx->parse = info->byte_code;
3626
3627 for (i = 0; i < ARRAY_SIZE(info->input_map); ++i)
3628 info->input_map[i] = NINE_DECLUSAGE_NONE;
3629 info->num_inputs = 0;
3630
3631 info->position_t = false;
3632 info->point_size = false;
3633
3634 memset(tx->slots_used, 0, sizeof(tx->slots_used));
3635 memset(info->int_slots_used, 0, sizeof(info->int_slots_used));
3636 memset(info->bool_slots_used, 0, sizeof(info->bool_slots_used));
3637
3638 tx->info->const_float_slots = 0;
3639 tx->info->const_int_slots = 0;
3640 tx->info->const_bool_slots = 0;
3641
3642 info->sampler_mask = 0x0;
3643 info->rt_mask = 0x0;
3644
3645 info->lconstf.data = NULL;
3646 info->lconstf.ranges = NULL;
3647
3648 info->bumpenvmat_needed = 0;
3649
3650 for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) {
3651 tx->regs.rL[i] = ureg_dst_undef();
3652 }
3653 tx->regs.address = ureg_dst_undef();
3654 tx->regs.a0 = ureg_dst_undef();
3655 tx->regs.p = ureg_dst_undef();
3656 tx->regs.oDepth = ureg_dst_undef();
3657 tx->regs.vPos = ureg_src_undef();
3658 tx->regs.vFace = ureg_src_undef();
3659 for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i)
3660 tx->regs.o[i] = ureg_dst_undef();
3661 for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i)
3662 tx->regs.oCol[i] = ureg_dst_undef();
3663 for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i)
3664 tx->regs.vC[i] = ureg_src_undef();
3665 for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i)
3666 tx->regs.vT[i] = ureg_src_undef();
3667
3668 sm1_read_version(tx);
3669
3670 info->version = (tx->version.major << 4) | tx->version.minor;
3671
3672 tx->num_outputs = 0;
3673
3674 create_op_info_map(tx);
3675
3676 tx->ureg = ureg_create(info->type);
3677 if (!tx->ureg) {
3678 return E_OUTOFMEMORY;
3679 }
3680
3681 tx->native_integers = GET_SHADER_CAP(INTEGERS);
3682 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES);
3683 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD);
3684 tx->shift_wpos = !GET_CAP(FS_COORD_PIXEL_CENTER_INTEGER);
3685 tx->texcoord_sn = tx->want_texcoord ?
3686 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC;
3687 tx->wpos_is_sysval = GET_CAP(FS_POSITION_IS_SYSVAL);
3688 tx->face_is_sysval_integer = GET_CAP(FS_FACE_IS_INTEGER_SYSVAL);
3689 tx->no_vs_window_space = !GET_CAP(VS_WINDOW_SPACE_POSITION);
3690 tx->mul_zero_wins = GET_CAP(LEGACY_MATH_RULES);
3691
3692 if (info->emulate_features) {
3693 tx->shift_wpos = true;
3694 tx->no_vs_window_space = true;
3695 tx->mul_zero_wins = false;
3696 }
3697
3698 if (IS_VS) {
3699 tx->num_constf_allowed = NINE_MAX_CONST_F;
3700 } else if (tx->version.major < 2) {/* IS_PS v1 */
3701 tx->num_constf_allowed = 8;
3702 } else if (tx->version.major == 2) {/* IS_PS v2 */
3703 tx->num_constf_allowed = 32;
3704 } else {/* IS_PS v3 */
3705 tx->num_constf_allowed = NINE_MAX_CONST_F_PS3;
3706 }
3707
3708 if (tx->version.major < 2) {
3709 tx->num_consti_allowed = 0;
3710 tx->num_constb_allowed = 0;
3711 } else {
3712 tx->num_consti_allowed = NINE_MAX_CONST_I;
3713 tx->num_constb_allowed = NINE_MAX_CONST_B;
3714 }
3715
3716 if (info->swvp_on) {
3717 /* TODO: The values tx->version.major == 1 */
3718 tx->num_constf_allowed = 8192;
3719 tx->num_consti_allowed = 2048;
3720 tx->num_constb_allowed = 2048;
3721 }
3722
3723 /* VS must always write position. Declare it here to make it the 1st output.
3724 * (Some drivers like nv50 are buggy and rely on that.)
3725 */
3726 if (IS_VS) {
3727 tx->regs.oPos_out = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0);
3728 } else {
3729 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
3730 if (!tx->shift_wpos)
3731 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3732 }
3733
3734 if (tx->mul_zero_wins)
3735 ureg_property(tx->ureg, TGSI_PROPERTY_LEGACY_MATH_RULES, 1);
3736
3737 /* Add additional definition of constants */
3738 if (info->add_constants_defs.c_combination) {
3739 unsigned i;
3740
3741 assert(info->add_constants_defs.int_const_added);
3742 assert(info->add_constants_defs.bool_const_added);
3743 /* We only add constants that are used by the shader
3744 * and that are not defined in the shader */
3745 for (i = 0; i < NINE_MAX_CONST_I; ++i) {
3746 if ((*info->add_constants_defs.int_const_added)[i]) {
3747 DBG("Defining const i%i : { %i %i %i %i }\n", i,
3748 info->add_constants_defs.c_combination->const_i[i][0],
3749 info->add_constants_defs.c_combination->const_i[i][1],
3750 info->add_constants_defs.c_combination->const_i[i][2],
3751 info->add_constants_defs.c_combination->const_i[i][3]);
3752 tx_set_lconsti(tx, i, info->add_constants_defs.c_combination->const_i[i]);
3753 }
3754 }
3755 for (i = 0; i < NINE_MAX_CONST_B; ++i) {
3756 if ((*info->add_constants_defs.bool_const_added)[i]) {
3757 DBG("Defining const b%i : %i\n", i, (int)(info->add_constants_defs.c_combination->const_b[i] != 0));
3758 tx_set_lconstb(tx, i, info->add_constants_defs.c_combination->const_b[i]);
3759 }
3760 }
3761 }
3762 return D3D_OK;
3763 }
3764
3765 static void
tx_dtor(struct shader_translator * tx)3766 tx_dtor(struct shader_translator *tx)
3767 {
3768 if (tx->slot_map)
3769 FREE(tx->slot_map);
3770 if (tx->num_inst_labels)
3771 FREE(tx->inst_labels);
3772 FREE(tx->lconstf);
3773 FREE(tx->regs.r);
3774 FREE(tx);
3775 }
3776
3777 /* CONST[0].xyz = width/2, -height/2, zmax-zmin
3778 * CONST[1].xyz = x+width/2, y+height/2, zmin */
3779 static void
shader_add_vs_viewport_transform(struct shader_translator * tx)3780 shader_add_vs_viewport_transform(struct shader_translator *tx)
3781 {
3782 struct ureg_program *ureg = tx->ureg;
3783 struct ureg_src c0 = ureg_src_register(TGSI_FILE_CONSTANT, 0);
3784 struct ureg_src c1 = ureg_src_register(TGSI_FILE_CONSTANT, 1);
3785 /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/
3786
3787 c0 = ureg_src_dimension(c0, 4);
3788 c1 = ureg_src_dimension(c1, 4);
3789 /* TODO: find out when we need to apply the viewport transformation or not.
3790 * Likely will be XYZ vs XYZRHW in vdecl_out
3791 * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0);
3792 * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1);
3793 */
3794 ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos));
3795 }
3796
3797 static void
shader_add_ps_fog_stage(struct shader_translator * tx,struct ureg_dst dst_col,struct ureg_src src_col)3798 shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_dst dst_col, struct ureg_src src_col)
3799 {
3800 struct ureg_program *ureg = tx->ureg;
3801 struct ureg_src fog_end, fog_coeff, fog_density, fog_params;
3802 struct ureg_src fog_vs, fog_color;
3803 struct ureg_dst fog_factor, depth;
3804
3805 if (!tx->info->fog_enable) {
3806 ureg_MOV(ureg, dst_col, src_col);
3807 return;
3808 }
3809
3810 if (tx->info->fog_mode != D3DFOG_NONE) {
3811 depth = tx_scratch_scalar(tx);
3812 if (tx->info->zfog)
3813 ureg_MOV(ureg, depth, ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_Z));
3814 else /* wfog: use w. position's w contains 1/w */
3815 ureg_RCP(ureg, depth, ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_W));
3816 }
3817
3818 fog_color = nine_special_constant_src(tx, 12);
3819 fog_params = nine_special_constant_src(tx, 13);
3820 fog_factor = tx_scratch_scalar(tx);
3821
3822 if (tx->info->fog_mode == D3DFOG_LINEAR) {
3823 fog_end = NINE_APPLY_SWIZZLE(fog_params, X);
3824 fog_coeff = NINE_APPLY_SWIZZLE(fog_params, Y);
3825 ureg_ADD(ureg, fog_factor, fog_end, ureg_negate(ureg_src(depth)));
3826 ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff);
3827 } else if (tx->info->fog_mode == D3DFOG_EXP) {
3828 fog_density = NINE_APPLY_SWIZZLE(fog_params, X);
3829 ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density);
3830 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3831 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3832 } else if (tx->info->fog_mode == D3DFOG_EXP2) {
3833 fog_density = NINE_APPLY_SWIZZLE(fog_params, X);
3834 ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density);
3835 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor));
3836 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f));
3837 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor));
3838 } else {
3839 fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 16,
3840 TGSI_INTERPOLATE_PERSPECTIVE),
3841 TGSI_SWIZZLE_X);
3842 ureg_MOV(ureg, fog_factor, fog_vs);
3843 }
3844
3845 ureg_LRP(ureg, ureg_writemask(dst_col, TGSI_WRITEMASK_XYZ),
3846 tx_src_scalar(fog_factor), src_col, fog_color);
3847 ureg_MOV(ureg, ureg_writemask(dst_col, TGSI_WRITEMASK_W), src_col);
3848 }
3849
3850 static void
shader_add_ps_alpha_test_stage(struct shader_translator * tx,struct ureg_src src_color)3851 shader_add_ps_alpha_test_stage(struct shader_translator *tx, struct ureg_src src_color)
3852 {
3853 struct ureg_program *ureg = tx->ureg;
3854 unsigned cmp_op;
3855 struct ureg_src src[2];
3856 struct ureg_dst tmp = tx_scratch(tx);
3857 if (tx->info->alpha_test_emulation == PIPE_FUNC_ALWAYS)
3858 return;
3859 if (tx->info->alpha_test_emulation == PIPE_FUNC_NEVER) {
3860 ureg_KILL(ureg);
3861 return;
3862 }
3863 cmp_op = pipe_comp_to_tgsi_opposite(tx->info->alpha_test_emulation);
3864 src[0] = ureg_scalar(src_color, TGSI_SWIZZLE_W); /* Read color alpha channel */
3865 src[1] = ureg_scalar(nine_special_constant_src(tx, 14), TGSI_SWIZZLE_X); /* Read alphatest */
3866 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0);
3867 ureg_KILL_IF(tx->ureg, ureg_negate(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X))); /* if opposite test passes, discard */
3868 }
3869
parse_shader(struct shader_translator * tx)3870 static void parse_shader(struct shader_translator *tx)
3871 {
3872 struct nine_shader_info *info = tx->info;
3873
3874 while (!sm1_parse_eof(tx) && !tx->failure)
3875 sm1_parse_instruction(tx);
3876 tx->parse++; /* for byte_size */
3877
3878 if (tx->failure)
3879 return;
3880
3881 if (IS_PS) {
3882 struct ureg_dst oCol0 = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, 0);
3883 struct ureg_dst tmp_oCol0;
3884 if (tx->version.major < 3) {
3885 tmp_oCol0 = ureg_DECL_temporary(tx->ureg);
3886 if (tx->version.major < 2) {
3887 assert(tx->num_temp); /* there must be color output */
3888 info->rt_mask |= 0x1;
3889 shader_add_ps_fog_stage(tx, tmp_oCol0, ureg_src(tx->regs.r[0]));
3890 } else {
3891 shader_add_ps_fog_stage(tx, tmp_oCol0, ureg_src(tx->regs.oCol[0]));
3892 }
3893 } else {
3894 assert(!ureg_dst_is_undef(tx->regs.oCol[0]));
3895 tmp_oCol0 = tx->regs.oCol[0];
3896 }
3897 shader_add_ps_alpha_test_stage(tx, ureg_src(tmp_oCol0));
3898 ureg_MOV(tx->ureg, oCol0, ureg_src(tmp_oCol0));
3899 }
3900
3901 if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) {
3902 tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16);
3903 ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f));
3904 }
3905
3906 if (info->position_t) {
3907 if (tx->no_vs_window_space) {
3908 ERR("POSITIONT is not yet implemented for your device.\n");
3909 } else {
3910 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, true);
3911 }
3912 }
3913
3914 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) {
3915 struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
3916 ureg_MAX(tx->ureg, ureg_writemask(tx->regs.oPts, TGSI_WRITEMASK_X), ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min));
3917 ureg_MIN(tx->ureg, ureg_writemask(oPts, TGSI_WRITEMASK_X), ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max));
3918 info->point_size = true;
3919 } else if (IS_VS && tx->always_output_pointsize) {
3920 struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0);
3921 ureg_MOV(tx->ureg, ureg_writemask(oPts, TGSI_WRITEMASK_X), nine_special_constant_src(tx, 8));
3922 info->point_size = true;
3923 }
3924
3925 if (IS_VS && tx->info->clip_plane_emulation > 0) {
3926 struct ureg_dst clipdist[2] = {ureg_dst_undef(), ureg_dst_undef()};
3927 int num_clipdist = ffs(tx->info->clip_plane_emulation);
3928 int i;
3929 /* TODO: handle undefined channels of oPos (w is not always written to I think. default is 1) *
3930 * Note in d3d9 it's not possible to output clipvert, so we don't need to check
3931 * for its existence */
3932 clipdist[0] = ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_CLIPDIST, 0, ((1 << num_clipdist) - 1) & 0xf, 0, 1);
3933 if (num_clipdist >= 5)
3934 clipdist[1] = ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_CLIPDIST, 1, ((1 << (num_clipdist - 4)) - 1) & 0xf, 0, 1);
3935 ureg_property(tx->ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED, num_clipdist);
3936 for (i = 0; i < num_clipdist; i++) {
3937 assert(!ureg_dst_is_undef(clipdist[i>>2]));
3938 if (!(tx->info->clip_plane_emulation & (1 << i)))
3939 ureg_MOV(tx->ureg, ureg_writemask(clipdist[i>>2], 1 << (i & 0x2)), ureg_imm1f(tx->ureg, 0.f));
3940 else
3941 ureg_DP4(tx->ureg, ureg_writemask(clipdist[i>>2], 1 << (i & 0x2)),
3942 ureg_src(tx->regs.oPos), nine_special_constant_src(tx, i));
3943 }
3944
3945 ureg_MOV(tx->ureg, tx->regs.oPos_out, ureg_src(tx->regs.oPos));
3946 }
3947
3948 if (info->process_vertices)
3949 shader_add_vs_viewport_transform(tx);
3950
3951 ureg_END(tx->ureg);
3952 }
3953
3954 #define NINE_SHADER_DEBUG_OPTION_NO_NIR_VS (1 << 2)
3955 #define NINE_SHADER_DEBUG_OPTION_NO_NIR_PS (1 << 3)
3956 #define NINE_SHADER_DEBUG_OPTION_DUMP_NIR (1 << 4)
3957 #define NINE_SHADER_DEBUG_OPTION_DUMP_TGSI (1 << 5)
3958
3959 static const struct debug_named_value nine_shader_debug_options[] = {
3960 { "no_nir_vs", NINE_SHADER_DEBUG_OPTION_NO_NIR_VS, "Never use NIR for vertex shaders even if the driver prefers it." },
3961 { "no_nir_ps", NINE_SHADER_DEBUG_OPTION_NO_NIR_PS, "Never use NIR for pixel shaders even if the driver prefers it." },
3962 { "dump_nir", NINE_SHADER_DEBUG_OPTION_DUMP_NIR, "Print translated NIR shaders." },
3963 { "dump_tgsi", NINE_SHADER_DEBUG_OPTION_DUMP_TGSI, "Print TGSI shaders." },
3964 DEBUG_NAMED_VALUE_END /* must be last */
3965 };
3966
3967 static inline bool
nine_shader_get_debug_flag(uint64_t flag)3968 nine_shader_get_debug_flag(uint64_t flag)
3969 {
3970 static uint64_t flags = 0;
3971 static bool first_run = true;
3972
3973 if (unlikely(first_run)) {
3974 first_run = false;
3975 flags = debug_get_flags_option("NINE_SHADER", nine_shader_debug_options, 0);
3976
3977 // Check old TGSI dump envvar too
3978 if (debug_get_bool_option("NINE_TGSI_DUMP", false)) {
3979 flags |= NINE_SHADER_DEBUG_OPTION_DUMP_TGSI;
3980 }
3981 }
3982
3983 return !!(flags & flag);
3984 }
3985
3986 static void
nine_pipe_nir_shader_state_from_tgsi(struct pipe_shader_state * state,const struct tgsi_token * tgsi_tokens,struct pipe_screen * screen)3987 nine_pipe_nir_shader_state_from_tgsi(struct pipe_shader_state *state, const struct tgsi_token *tgsi_tokens,
3988 struct pipe_screen *screen)
3989 {
3990 struct nir_shader *nir = tgsi_to_nir(tgsi_tokens, screen, screen->get_disk_shader_cache != NULL);
3991
3992 if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_NIR))) {
3993 nir_print_shader(nir, stdout);
3994 }
3995
3996 state->type = PIPE_SHADER_IR_NIR;
3997 state->tokens = NULL;
3998 state->ir.nir = nir;
3999 memset(&state->stream_output, 0, sizeof(state->stream_output));
4000 }
4001
4002 static void *
nine_ureg_create_shader(struct ureg_program * ureg,struct pipe_context * pipe,const struct pipe_stream_output_info * so)4003 nine_ureg_create_shader(struct ureg_program *ureg,
4004 struct pipe_context *pipe,
4005 const struct pipe_stream_output_info *so)
4006 {
4007 struct pipe_shader_state state;
4008 const struct tgsi_token *tgsi_tokens;
4009 struct pipe_screen *screen = pipe->screen;
4010
4011 tgsi_tokens = ureg_finalize(ureg);
4012 if (!tgsi_tokens)
4013 return NULL;
4014
4015 assert(((struct tgsi_header *) &tgsi_tokens[0])->HeaderSize >= 2);
4016 enum pipe_shader_type shader_type = ((struct tgsi_processor *) &tgsi_tokens[1])->Processor;
4017
4018 bool use_nir = true;
4019
4020 /* Allow user to override preferred IR, this is very useful for debugging */
4021 if (unlikely(shader_type == PIPE_SHADER_VERTEX && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_VS)))
4022 use_nir = false;
4023 if (unlikely(shader_type == PIPE_SHADER_FRAGMENT && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_PS)))
4024 use_nir = false;
4025
4026 DUMP("shader type: %s, selected IR: %s\n",
4027 shader_type == PIPE_SHADER_VERTEX ? "VS" : "PS",
4028 use_nir ? "NIR" : "TGSI");
4029
4030 if (use_nir) {
4031 nine_pipe_nir_shader_state_from_tgsi(&state, tgsi_tokens, screen);
4032 } else {
4033 pipe_shader_state_from_tgsi(&state, tgsi_tokens);
4034 }
4035
4036 assert(state.tokens || state.ir.nir);
4037
4038 if (so)
4039 state.stream_output = *so;
4040
4041 switch (shader_type) {
4042 case PIPE_SHADER_VERTEX:
4043 return pipe->create_vs_state(pipe, &state);
4044 case PIPE_SHADER_FRAGMENT:
4045 return pipe->create_fs_state(pipe, &state);
4046 default:
4047 unreachable("unsupported shader type");
4048 }
4049 }
4050
4051
4052 void *
nine_create_shader_with_so_and_destroy(struct ureg_program * p,struct pipe_context * pipe,const struct pipe_stream_output_info * so)4053 nine_create_shader_with_so_and_destroy(struct ureg_program *p,
4054 struct pipe_context *pipe,
4055 const struct pipe_stream_output_info *so)
4056 {
4057 void *result = nine_ureg_create_shader(p, pipe, so);
4058 ureg_destroy(p);
4059 return result;
4060 }
4061
4062 HRESULT
nine_translate_shader(struct NineDevice9 * device,struct nine_shader_info * info,struct pipe_context * pipe)4063 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, struct pipe_context *pipe)
4064 {
4065 struct shader_translator *tx;
4066 HRESULT hr = D3D_OK;
4067 const unsigned processor = info->type;
4068 struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen;
4069 unsigned *const_ranges = NULL;
4070
4071 user_assert(processor != ~0, D3DERR_INVALIDCALL);
4072
4073 tx = MALLOC_STRUCT(shader_translator);
4074 if (!tx)
4075 return E_OUTOFMEMORY;
4076
4077 info->emulate_features = device->driver_caps.shader_emulate_features;
4078
4079 if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) {
4080 hr = E_OUTOFMEMORY;
4081 goto out;
4082 }
4083 tx->always_output_pointsize = device->driver_caps.always_output_pointsize;
4084
4085 assert(IS_VS || !info->swvp_on);
4086
4087 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) {
4088 hr = D3DERR_INVALIDCALL;
4089 DBG("Unsupported shader version: %u.%u !\n",
4090 tx->version.major, tx->version.minor);
4091 goto out;
4092 }
4093 if (tx->processor != processor) {
4094 hr = D3DERR_INVALIDCALL;
4095 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor);
4096 goto out;
4097 }
4098 DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS",
4099 tx->version.major, tx->version.minor);
4100
4101 parse_shader(tx);
4102
4103 if (tx->failure) {
4104 /* For VS shaders, we print the warning later,
4105 * we first try with swvp. */
4106 if (IS_PS)
4107 ERR("Encountered buggy shader\n");
4108 ureg_destroy(tx->ureg);
4109 hr = D3DERR_INVALIDCALL;
4110 goto out;
4111 }
4112
4113 /* Recompile after compacting constant slots if possible */
4114 if (!tx->indirect_const_access && !info->swvp_on && tx->num_slots > 0) {
4115 unsigned *slot_map;
4116 unsigned c;
4117 int i, j, num_ranges, prev;
4118
4119 DBG("Recompiling shader for constant compaction\n");
4120 ureg_destroy(tx->ureg);
4121
4122 if (tx->num_inst_labels)
4123 FREE(tx->inst_labels);
4124 FREE(tx->lconstf);
4125 FREE(tx->regs.r);
4126
4127 num_ranges = 0;
4128 prev = -2;
4129 for (i = 0; i < NINE_MAX_CONST_ALL_VS; i++) {
4130 if (tx->slots_used[i]) {
4131 if (prev != i - 1)
4132 num_ranges++;
4133 prev = i;
4134 }
4135 }
4136 slot_map = MALLOC(NINE_MAX_CONST_ALL_VS * sizeof(unsigned));
4137 const_ranges = CALLOC(num_ranges + 1, 2 * sizeof(unsigned)); /* ranges stop when last is of size 0 */
4138 if (!slot_map || !const_ranges) {
4139 hr = E_OUTOFMEMORY;
4140 goto out;
4141 }
4142 c = 0;
4143 j = -1;
4144 prev = -2;
4145 for (i = 0; i < NINE_MAX_CONST_ALL_VS; i++) {
4146 if (tx->slots_used[i]) {
4147 if (prev != i - 1)
4148 j++;
4149 /* Initialize first slot of the range */
4150 if (!const_ranges[2*j+1])
4151 const_ranges[2*j] = i;
4152 const_ranges[2*j+1]++;
4153 prev = i;
4154 slot_map[i] = c++;
4155 }
4156 }
4157
4158 if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) {
4159 hr = E_OUTOFMEMORY;
4160 goto out;
4161 }
4162 tx->always_output_pointsize = device->driver_caps.always_output_pointsize;
4163 tx->slot_map = slot_map;
4164 parse_shader(tx);
4165 assert(!tx->failure);
4166 #if !defined(NDEBUG)
4167 i = 0;
4168 j = 0;
4169 while (const_ranges[i*2+1] != 0) {
4170 j += const_ranges[i*2+1];
4171 i++;
4172 }
4173 assert(j == tx->num_slots);
4174 #endif
4175 }
4176
4177 /* record local constants */
4178 if (tx->num_lconstf && tx->indirect_const_access) {
4179 struct nine_range *ranges;
4180 float *data;
4181 int *indices;
4182 unsigned i, k, n;
4183
4184 hr = E_OUTOFMEMORY;
4185
4186 data = MALLOC(tx->num_lconstf * 4 * sizeof(float));
4187 if (!data)
4188 goto out;
4189 info->lconstf.data = data;
4190
4191 indices = MALLOC(tx->num_lconstf * sizeof(indices[0]));
4192 if (!indices)
4193 goto out;
4194
4195 /* lazy sort, num_lconstf should be small */
4196 for (n = 0; n < tx->num_lconstf; ++n) {
4197 for (k = 0, i = 0; i < tx->num_lconstf; ++i) {
4198 if (tx->lconstf[i].idx < tx->lconstf[k].idx)
4199 k = i;
4200 }
4201 indices[n] = tx->lconstf[k].idx;
4202 memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float));
4203 tx->lconstf[k].idx = INT_MAX;
4204 }
4205
4206 /* count ranges */
4207 for (n = 1, i = 1; i < tx->num_lconstf; ++i)
4208 if (indices[i] != indices[i - 1] + 1)
4209 ++n;
4210 ranges = MALLOC(n * sizeof(ranges[0]));
4211 if (!ranges) {
4212 FREE(indices);
4213 goto out;
4214 }
4215 info->lconstf.ranges = ranges;
4216
4217 k = 0;
4218 ranges[k].bgn = indices[0];
4219 for (i = 1; i < tx->num_lconstf; ++i) {
4220 if (indices[i] != indices[i - 1] + 1) {
4221 ranges[k].next = &ranges[k + 1];
4222 ranges[k].end = indices[i - 1] + 1;
4223 ++k;
4224 ranges[k].bgn = indices[i];
4225 }
4226 }
4227 ranges[k].end = indices[i - 1] + 1;
4228 ranges[k].next = NULL;
4229 assert(n == (k + 1));
4230
4231 FREE(indices);
4232 hr = D3D_OK;
4233 }
4234
4235 /* r500 */
4236 if (info->const_float_slots > device->max_vs_const_f &&
4237 (info->const_int_slots || info->const_bool_slots) &&
4238 !info->swvp_on)
4239 ERR("Overlapping constant slots. The shader is likely to be buggy\n");
4240
4241
4242 if (tx->indirect_const_access) { /* vs only */
4243 info->const_float_slots = device->max_vs_const_f;
4244 tx->num_slots = MAX2(tx->num_slots, device->max_vs_const_f);
4245 }
4246
4247 if (!info->swvp_on) {
4248 info->const_used_size = sizeof(float[4]) * tx->num_slots;
4249 if (tx->num_slots)
4250 ureg_DECL_constant2D(tx->ureg, 0, tx->num_slots-1, 0);
4251 } else {
4252 ureg_DECL_constant2D(tx->ureg, 0, 4095, 0);
4253 ureg_DECL_constant2D(tx->ureg, 0, 4095, 1);
4254 ureg_DECL_constant2D(tx->ureg, 0, 2047, 2);
4255 ureg_DECL_constant2D(tx->ureg, 0, 511, 3);
4256 }
4257
4258 if (info->process_vertices)
4259 ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */
4260
4261 if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_TGSI))) {
4262 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, NULL);
4263 tgsi_dump(toks, 0);
4264 ureg_free_tokens(toks);
4265 }
4266
4267 if (info->process_vertices) {
4268 NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out,
4269 tx->output_info,
4270 tx->num_outputs,
4271 &(info->so));
4272 info->cso = nine_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so));
4273 } else
4274 info->cso = nine_create_shader_with_so_and_destroy(tx->ureg, pipe, NULL);
4275 if (!info->cso) {
4276 hr = D3DERR_DRIVERINTERNALERROR;
4277 FREE(info->lconstf.data);
4278 FREE(info->lconstf.ranges);
4279 goto out;
4280 }
4281
4282 info->const_ranges = const_ranges;
4283 const_ranges = NULL;
4284 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD);
4285 out:
4286 if (const_ranges)
4287 FREE(const_ranges);
4288 tx_dtor(tx);
4289 return hr;
4290 }
4291