1
2 /* FF is big and ugly so feel free to write lines as long as you like.
3 * Aieeeeeeeee !
4 *
5 * Let me make that clearer:
6 * Aieeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee ! !! !!!
7 */
8
9 #include "device9.h"
10 #include "basetexture9.h"
11 #include "vertexdeclaration9.h"
12 #include "vertexshader9.h"
13 #include "pixelshader9.h"
14 #include "nine_ff.h"
15 #include "nine_defines.h"
16 #include "nine_helpers.h"
17 #include "nine_pipe.h"
18 #include "nine_dump.h"
19
20 #include "pipe/p_context.h"
21 #include "tgsi/tgsi_ureg.h"
22 #include "tgsi/tgsi_dump.h"
23 #include "util/u_box.h"
24 #include "util/u_hash_table.h"
25 #include "util/u_upload_mgr.h"
26
27 #define DBG_CHANNEL DBG_FF
28
29 #define NINE_FF_NUM_VS_CONST 196
30 #define NINE_FF_NUM_PS_CONST 24
31
32 struct fvec4
33 {
34 float x, y, z, w;
35 };
36
37 struct nine_ff_vs_key
38 {
39 union {
40 struct {
41 uint32_t position_t : 1;
42 uint32_t lighting : 1;
43 uint32_t darkness : 1; /* lighting enabled but no active lights */
44 uint32_t localviewer : 1;
45 uint32_t vertexpointsize : 1;
46 uint32_t pointscale : 1;
47 uint32_t vertexblend : 3;
48 uint32_t vertexblend_indexed : 1;
49 uint32_t vertextween : 1;
50 uint32_t mtl_diffuse : 2; /* 0 = material, 1 = color1, 2 = color2 */
51 uint32_t mtl_ambient : 2;
52 uint32_t mtl_specular : 2;
53 uint32_t mtl_emissive : 2;
54 uint32_t fog_mode : 2;
55 uint32_t fog_range : 1;
56 uint32_t color0in_one : 1;
57 uint32_t color1in_zero : 1;
58 uint32_t has_normal : 1;
59 uint32_t fog : 1;
60 uint32_t normalizenormals : 1;
61 uint32_t ucp : 1;
62 uint32_t pad1 : 4;
63 uint32_t tc_dim_input: 16; /* 8 * 2 bits */
64 uint32_t pad2 : 16;
65 uint32_t tc_dim_output: 24; /* 8 * 3 bits */
66 uint32_t pad3 : 8;
67 uint32_t tc_gen : 24; /* 8 * 3 bits */
68 uint32_t pad4 : 8;
69 uint32_t tc_idx : 24;
70 uint32_t pad5 : 8;
71 uint32_t passthrough;
72 };
73 uint64_t value64[3]; /* don't forget to resize VertexShader9.ff_key */
74 uint32_t value32[6];
75 };
76 };
77
78 /* Texture stage state:
79 *
80 * COLOROP D3DTOP 5 bit
81 * ALPHAOP D3DTOP 5 bit
82 * COLORARG0 D3DTA 3 bit
83 * COLORARG1 D3DTA 3 bit
84 * COLORARG2 D3DTA 3 bit
85 * ALPHAARG0 D3DTA 3 bit
86 * ALPHAARG1 D3DTA 3 bit
87 * ALPHAARG2 D3DTA 3 bit
88 * RESULTARG D3DTA 1 bit (CURRENT:0 or TEMP:1)
89 * TEXCOORDINDEX 0 - 7 3 bit
90 * ===========================
91 * 32 bit per stage
92 */
93 struct nine_ff_ps_key
94 {
95 union {
96 struct {
97 struct {
98 uint32_t colorop : 5;
99 uint32_t alphaop : 5;
100 uint32_t colorarg0 : 3;
101 uint32_t colorarg1 : 3;
102 uint32_t colorarg2 : 3;
103 uint32_t alphaarg0 : 3;
104 uint32_t alphaarg1 : 3;
105 uint32_t alphaarg2 : 3;
106 uint32_t resultarg : 1; /* CURRENT:0 or TEMP:1 */
107 uint32_t textarget : 2; /* 1D/2D/3D/CUBE */
108 uint32_t pad : 1;
109 /* that's 32 bit exactly */
110 } ts[8];
111 uint32_t projected : 16;
112 uint32_t fog : 1; /* for vFog coming from VS */
113 uint32_t fog_mode : 2;
114 uint32_t fog_source : 1; /* 0: Z, 1: W */
115 uint32_t specular : 1;
116 uint32_t pad1 : 11; /* 9 32-bit words with this */
117 uint8_t colorarg_b4[3];
118 uint8_t colorarg_b5[3];
119 uint8_t alphaarg_b4[3]; /* 11 32-bit words plus a byte */
120 uint8_t pad2[3];
121 };
122 uint64_t value64[6]; /* don't forget to resize PixelShader9.ff_key */
123 uint32_t value32[12];
124 };
125 };
126
nine_ff_vs_key_hash(void * key)127 static unsigned nine_ff_vs_key_hash(void *key)
128 {
129 struct nine_ff_vs_key *vs = key;
130 unsigned i;
131 uint32_t hash = vs->value32[0];
132 for (i = 1; i < ARRAY_SIZE(vs->value32); ++i)
133 hash ^= vs->value32[i];
134 return hash;
135 }
nine_ff_vs_key_comp(void * key1,void * key2)136 static int nine_ff_vs_key_comp(void *key1, void *key2)
137 {
138 struct nine_ff_vs_key *a = (struct nine_ff_vs_key *)key1;
139 struct nine_ff_vs_key *b = (struct nine_ff_vs_key *)key2;
140
141 return memcmp(a->value64, b->value64, sizeof(a->value64));
142 }
nine_ff_ps_key_hash(void * key)143 static unsigned nine_ff_ps_key_hash(void *key)
144 {
145 struct nine_ff_ps_key *ps = key;
146 unsigned i;
147 uint32_t hash = ps->value32[0];
148 for (i = 1; i < ARRAY_SIZE(ps->value32); ++i)
149 hash ^= ps->value32[i];
150 return hash;
151 }
nine_ff_ps_key_comp(void * key1,void * key2)152 static int nine_ff_ps_key_comp(void *key1, void *key2)
153 {
154 struct nine_ff_ps_key *a = (struct nine_ff_ps_key *)key1;
155 struct nine_ff_ps_key *b = (struct nine_ff_ps_key *)key2;
156
157 return memcmp(a->value64, b->value64, sizeof(a->value64));
158 }
nine_ff_fvf_key_hash(void * key)159 static unsigned nine_ff_fvf_key_hash(void *key)
160 {
161 return *(DWORD *)key;
162 }
nine_ff_fvf_key_comp(void * key1,void * key2)163 static int nine_ff_fvf_key_comp(void *key1, void *key2)
164 {
165 return *(DWORD *)key1 != *(DWORD *)key2;
166 }
167
168 static void nine_ff_prune_vs(struct NineDevice9 *);
169 static void nine_ff_prune_ps(struct NineDevice9 *);
170
nine_ureg_tgsi_dump(struct ureg_program * ureg,boolean override)171 static void nine_ureg_tgsi_dump(struct ureg_program *ureg, boolean override)
172 {
173 if (debug_get_bool_option("NINE_FF_DUMP", FALSE) || override) {
174 unsigned count;
175 const struct tgsi_token *toks = ureg_get_tokens(ureg, &count);
176 tgsi_dump(toks, 0);
177 ureg_free_tokens(toks);
178 }
179 }
180
181 #define _X(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_X)
182 #define _Y(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Y)
183 #define _Z(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Z)
184 #define _W(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_W)
185
186 #define _XXXX(r) ureg_scalar(r, TGSI_SWIZZLE_X)
187 #define _YYYY(r) ureg_scalar(r, TGSI_SWIZZLE_Y)
188 #define _ZZZZ(r) ureg_scalar(r, TGSI_SWIZZLE_Z)
189 #define _WWWW(r) ureg_scalar(r, TGSI_SWIZZLE_W)
190
191 #define _XYZW(r) (r)
192
193 /* AL should contain base address of lights table. */
194 #define LIGHT_CONST(i) \
195 ureg_src_indirect(ureg_DECL_constant(ureg, i), _X(AL))
196
197 #define MATERIAL_CONST(i) \
198 ureg_DECL_constant(ureg, 19 + (i))
199
200 #define _CONST(n) ureg_DECL_constant(ureg, n)
201
202 /* VS FF constants layout:
203 *
204 * CONST[ 0.. 3] D3DTS_WORLD * D3DTS_VIEW * D3DTS_PROJECTION
205 * CONST[ 4.. 7] D3DTS_WORLD * D3DTS_VIEW
206 * CONST[ 8..11] D3DTS_PROJECTION
207 * CONST[12..15] D3DTS_VIEW^(-1)
208 * CONST[16..18] Normal matrix
209 *
210 * CONST[19].xyz MATERIAL.Emissive + Material.Ambient * RS.Ambient
211 * CONST[20] MATERIAL.Diffuse
212 * CONST[21] MATERIAL.Ambient
213 * CONST[22] MATERIAL.Specular
214 * CONST[23].x___ MATERIAL.Power
215 * CONST[24] MATERIAL.Emissive
216 * CONST[25] RS.Ambient
217 *
218 * CONST[26].x___ RS.PointSizeMin
219 * CONST[26]._y__ RS.PointSizeMax
220 * CONST[26].__z_ RS.PointSize
221 * CONST[26].___w RS.PointScaleA
222 * CONST[27].x___ RS.PointScaleB
223 * CONST[27]._y__ RS.PointScaleC
224 *
225 * CONST[28].x___ RS.FogEnd
226 * CONST[28]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
227 * CONST[28].__z_ RS.FogDensity
228
229 * CONST[30].x___ TWEENFACTOR
230 *
231 * CONST[32].x___ LIGHT[0].Type
232 * CONST[32]._yzw LIGHT[0].Attenuation0,1,2
233 * CONST[33] LIGHT[0].Diffuse
234 * CONST[34] LIGHT[0].Specular
235 * CONST[35] LIGHT[0].Ambient
236 * CONST[36].xyz_ LIGHT[0].Position
237 * CONST[36].___w LIGHT[0].Range
238 * CONST[37].xyz_ LIGHT[0].Direction
239 * CONST[37].___w LIGHT[0].Falloff
240 * CONST[38].x___ cos(LIGHT[0].Theta / 2)
241 * CONST[38]._y__ cos(LIGHT[0].Phi / 2)
242 * CONST[38].__z_ 1.0f / (cos(LIGHT[0].Theta / 2) - cos(Light[0].Phi / 2))
243 * CONST[39].xyz_ LIGHT[0].HalfVector (for directional lights)
244 * CONST[39].___w 1 if this is the last active light, 0 if not
245 * CONST[40] LIGHT[1]
246 * CONST[48] LIGHT[2]
247 * CONST[56] LIGHT[3]
248 * CONST[64] LIGHT[4]
249 * CONST[72] LIGHT[5]
250 * CONST[80] LIGHT[6]
251 * CONST[88] LIGHT[7]
252 * NOTE: no lighting code is generated if there are no active lights
253 *
254 * CONST[100].x___ Viewport 2/width
255 * CONST[100]._y__ Viewport 2/height
256 * CONST[100].__z_ Viewport 1/(zmax - zmin)
257 * CONST[100].___w Viewport width
258 * CONST[101].x___ Viewport x0
259 * CONST[101]._y__ Viewport y0
260 * CONST[101].__z_ Viewport z0
261 *
262 * CONST[128..131] D3DTS_TEXTURE0
263 * CONST[132..135] D3DTS_TEXTURE1
264 * CONST[136..139] D3DTS_TEXTURE2
265 * CONST[140..143] D3DTS_TEXTURE3
266 * CONST[144..147] D3DTS_TEXTURE4
267 * CONST[148..151] D3DTS_TEXTURE5
268 * CONST[152..155] D3DTS_TEXTURE6
269 * CONST[156..159] D3DTS_TEXTURE7
270 *
271 * CONST[160] D3DTS_WORLDMATRIX[0] * D3DTS_VIEW
272 * CONST[164] D3DTS_WORLDMATRIX[1] * D3DTS_VIEW
273 * ...
274 * CONST[192] D3DTS_WORLDMATRIX[8] * D3DTS_VIEW
275 */
276 struct vs_build_ctx
277 {
278 struct ureg_program *ureg;
279 const struct nine_ff_vs_key *key;
280
281 uint16_t input[PIPE_MAX_ATTRIBS];
282 unsigned num_inputs;
283
284 struct ureg_src aVtx;
285 struct ureg_src aNrm;
286 struct ureg_src aCol[2];
287 struct ureg_src aTex[8];
288 struct ureg_src aPsz;
289 struct ureg_src aInd;
290 struct ureg_src aWgt;
291
292 struct ureg_src aVtx1; /* tweening */
293 struct ureg_src aNrm1;
294
295 struct ureg_src mtlA;
296 struct ureg_src mtlD;
297 struct ureg_src mtlS;
298 struct ureg_src mtlE;
299 };
300
301 static inline unsigned
get_texcoord_sn(struct pipe_screen * screen)302 get_texcoord_sn(struct pipe_screen *screen)
303 {
304 if (screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD))
305 return TGSI_SEMANTIC_TEXCOORD;
306 return TGSI_SEMANTIC_GENERIC;
307 }
308
309 static inline struct ureg_src
build_vs_add_input(struct vs_build_ctx * vs,uint16_t ndecl)310 build_vs_add_input(struct vs_build_ctx *vs, uint16_t ndecl)
311 {
312 const unsigned i = vs->num_inputs++;
313 assert(i < PIPE_MAX_ATTRIBS);
314 vs->input[i] = ndecl;
315 return ureg_DECL_vs_input(vs->ureg, i);
316 }
317
318 /* NOTE: dst may alias src */
319 static inline void
ureg_normalize3(struct ureg_program * ureg,struct ureg_dst dst,struct ureg_src src)320 ureg_normalize3(struct ureg_program *ureg,
321 struct ureg_dst dst, struct ureg_src src)
322 {
323 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
324 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
325
326 ureg_DP3(ureg, tmp_x, src, src);
327 ureg_RSQ(ureg, tmp_x, _X(tmp));
328 ureg_MUL(ureg, dst, src, _X(tmp));
329 ureg_release_temporary(ureg, tmp);
330 }
331
332 static void *
nine_ff_build_vs(struct NineDevice9 * device,struct vs_build_ctx * vs)333 nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
334 {
335 const struct nine_ff_vs_key *key = vs->key;
336 struct ureg_program *ureg = ureg_create(PIPE_SHADER_VERTEX);
337 struct ureg_dst oPos, oCol[2], oPsz, oFog;
338 struct ureg_dst AR;
339 unsigned i, c;
340 unsigned label[32], l = 0;
341 boolean need_aNrm = key->lighting || key->passthrough & (1 << NINE_DECLUSAGE_NORMAL);
342 boolean has_aNrm = need_aNrm && key->has_normal;
343 boolean need_aVtx = key->lighting || key->fog_mode || key->pointscale || key->ucp;
344 const unsigned texcoord_sn = get_texcoord_sn(device->screen);
345
346 vs->ureg = ureg;
347
348 /* Check which inputs we should transform. */
349 for (i = 0; i < 8 * 3; i += 3) {
350 switch ((key->tc_gen >> i) & 0x7) {
351 case NINED3DTSS_TCI_CAMERASPACENORMAL:
352 need_aNrm = TRUE;
353 break;
354 case NINED3DTSS_TCI_CAMERASPACEPOSITION:
355 need_aVtx = TRUE;
356 break;
357 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
358 need_aVtx = need_aNrm = TRUE;
359 break;
360 case NINED3DTSS_TCI_SPHEREMAP:
361 need_aVtx = need_aNrm = TRUE;
362 break;
363 default:
364 break;
365 }
366 }
367
368 /* Declare and record used inputs (needed for linkage with vertex format):
369 * (texture coordinates handled later)
370 */
371 vs->aVtx = build_vs_add_input(vs,
372 key->position_t ? NINE_DECLUSAGE_POSITIONT : NINE_DECLUSAGE_POSITION);
373
374 vs->aNrm = ureg_imm1f(ureg, 0.0f);
375 if (has_aNrm)
376 vs->aNrm = build_vs_add_input(vs, NINE_DECLUSAGE_NORMAL);
377
378 vs->aCol[0] = ureg_imm1f(ureg, 1.0f);
379 vs->aCol[1] = ureg_imm1f(ureg, 0.0f);
380
381 if (key->lighting || key->darkness) {
382 const unsigned mask = key->mtl_diffuse | key->mtl_specular |
383 key->mtl_ambient | key->mtl_emissive;
384 if ((mask & 0x1) && !key->color0in_one)
385 vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));
386 if ((mask & 0x2) && !key->color1in_zero)
387 vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));
388
389 vs->mtlD = MATERIAL_CONST(1);
390 vs->mtlA = MATERIAL_CONST(2);
391 vs->mtlS = MATERIAL_CONST(3);
392 vs->mtlE = MATERIAL_CONST(5);
393 if (key->mtl_diffuse == 1) vs->mtlD = vs->aCol[0]; else
394 if (key->mtl_diffuse == 2) vs->mtlD = vs->aCol[1];
395 if (key->mtl_ambient == 1) vs->mtlA = vs->aCol[0]; else
396 if (key->mtl_ambient == 2) vs->mtlA = vs->aCol[1];
397 if (key->mtl_specular == 1) vs->mtlS = vs->aCol[0]; else
398 if (key->mtl_specular == 2) vs->mtlS = vs->aCol[1];
399 if (key->mtl_emissive == 1) vs->mtlE = vs->aCol[0]; else
400 if (key->mtl_emissive == 2) vs->mtlE = vs->aCol[1];
401 } else {
402 if (!key->color0in_one) vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));
403 if (!key->color1in_zero) vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));
404 }
405
406 if (key->vertexpointsize)
407 vs->aPsz = build_vs_add_input(vs, NINE_DECLUSAGE_PSIZE);
408
409 if (key->vertexblend_indexed || key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES))
410 vs->aInd = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDINDICES);
411 if (key->vertexblend || key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT))
412 vs->aWgt = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDWEIGHT);
413 if (key->vertextween) {
414 vs->aVtx1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(POSITION,1));
415 vs->aNrm1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(NORMAL,1));
416 }
417
418 /* Declare outputs:
419 */
420 oPos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); /* HPOS */
421 oCol[0] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0));
422 oCol[1] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1));
423 if (key->fog || key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
424 oFog = ureg_DECL_output(ureg, TGSI_SEMANTIC_FOG, 0);
425 oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X);
426 }
427
428 if (key->vertexpointsize || key->pointscale) {
429 oPsz = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_PSIZE, 0,
430 TGSI_WRITEMASK_X, 0, 1);
431 oPsz = ureg_writemask(oPsz, TGSI_WRITEMASK_X);
432 }
433
434 if (key->lighting || key->vertexblend)
435 AR = ureg_DECL_address(ureg);
436
437 /* === Vertex transformation / vertex blending:
438 */
439
440 if (key->position_t) {
441 if (device->driver_caps.window_space_position_support) {
442 ureg_MOV(ureg, oPos, vs->aVtx);
443 } else {
444 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
445 /* vs->aVtx contains the coordinates buffer wise.
446 * later in the pipeline, clipping, viewport and division
447 * by w (rhw = 1/w) are going to be applied, so do the reverse
448 * of these transformations (except clipping) to have the good
449 * position at the end.*/
450 ureg_MOV(ureg, tmp, vs->aVtx);
451 /* X from [X_min, X_min + width] to [-1, 1], same for Y. Z to [0, 1] */
452 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_negate(_CONST(101)));
453 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(100));
454 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
455 /* Y needs to be reversed */
456 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_negate(ureg_src(tmp)));
457 /* inverse rhw */
458 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), _W(tmp));
459 /* multiply X, Y, Z by w */
460 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _W(tmp));
461 ureg_MOV(ureg, oPos, ureg_src(tmp));
462 ureg_release_temporary(ureg, tmp);
463 }
464 } else if (key->vertexblend) {
465 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
466 struct ureg_dst tmp2 = ureg_DECL_temporary(ureg);
467 struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg);
468 struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg);
469 struct ureg_dst sum_blendweights = ureg_DECL_temporary(ureg);
470 struct ureg_src cWM[4];
471
472 for (i = 160; i <= 195; ++i)
473 ureg_DECL_constant(ureg, i);
474
475 /* translate world matrix index to constant file index */
476 if (key->vertexblend_indexed) {
477 ureg_MAD(ureg, tmp, vs->aInd, ureg_imm1f(ureg, 4.0f), ureg_imm1f(ureg, 160.0f));
478 ureg_ARL(ureg, AR, ureg_src(tmp));
479 }
480
481 ureg_MOV(ureg, aVtx_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f));
482 ureg_MOV(ureg, aNrm_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f));
483 ureg_MOV(ureg, sum_blendweights, ureg_imm4f(ureg, 1.0f, 1.0f, 1.0f, 1.0f));
484
485 for (i = 0; i < key->vertexblend; ++i) {
486 for (c = 0; c < 4; ++c) {
487 cWM[c] = ureg_src_register(TGSI_FILE_CONSTANT, (160 + i * 4) * !key->vertexblend_indexed + c);
488 if (key->vertexblend_indexed)
489 cWM[c] = ureg_src_indirect(cWM[c], ureg_scalar(ureg_src(AR), i));
490 }
491
492 /* multiply by WORLD(index) */
493 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), cWM[0]);
494 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), cWM[1], ureg_src(tmp));
495 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), cWM[2], ureg_src(tmp));
496 ureg_MAD(ureg, tmp, _WWWW(vs->aVtx), cWM[3], ureg_src(tmp));
497
498 if (has_aNrm) {
499 /* Note: the spec says the transpose of the inverse of the
500 * WorldView matrices should be used, but all tests show
501 * otherwise.
502 * Only case unknown: D3DVBF_0WEIGHTS */
503 ureg_MUL(ureg, tmp2, _XXXX(vs->aNrm), cWM[0]);
504 ureg_MAD(ureg, tmp2, _YYYY(vs->aNrm), cWM[1], ureg_src(tmp2));
505 ureg_MAD(ureg, tmp2, _ZZZZ(vs->aNrm), cWM[2], ureg_src(tmp2));
506 }
507
508 if (i < (key->vertexblend - 1)) {
509 /* accumulate weighted position value */
510 ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(vs->aWgt, i), ureg_src(aVtx_dst));
511 if (has_aNrm)
512 ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(vs->aWgt, i), ureg_src(aNrm_dst));
513 /* subtract weighted position value for last value */
514 ureg_ADD(ureg, sum_blendweights, ureg_src(sum_blendweights), ureg_negate(ureg_scalar(vs->aWgt, i)));
515 }
516 }
517
518 /* the last weighted position is always 1 - sum_of_previous_weights */
519 ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aVtx_dst));
520 if (has_aNrm)
521 ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aNrm_dst));
522
523 /* multiply by VIEW_PROJ */
524 ureg_MUL(ureg, tmp, _X(aVtx_dst), _CONST(8));
525 ureg_MAD(ureg, tmp, _Y(aVtx_dst), _CONST(9), ureg_src(tmp));
526 ureg_MAD(ureg, tmp, _Z(aVtx_dst), _CONST(10), ureg_src(tmp));
527 ureg_MAD(ureg, oPos, _W(aVtx_dst), _CONST(11), ureg_src(tmp));
528
529 if (need_aVtx)
530 vs->aVtx = ureg_src(aVtx_dst);
531
532 ureg_release_temporary(ureg, tmp);
533 ureg_release_temporary(ureg, tmp2);
534 ureg_release_temporary(ureg, sum_blendweights);
535 if (!need_aVtx)
536 ureg_release_temporary(ureg, aVtx_dst);
537
538 if (has_aNrm) {
539 if (key->normalizenormals)
540 ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst));
541 vs->aNrm = ureg_src(aNrm_dst);
542 } else
543 ureg_release_temporary(ureg, aNrm_dst);
544 } else {
545 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
546
547 if (key->vertextween) {
548 struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg);
549 ureg_LRP(ureg, aVtx_dst, _XXXX(_CONST(30)), vs->aVtx1, vs->aVtx);
550 vs->aVtx = ureg_src(aVtx_dst);
551 if (has_aNrm) {
552 struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg);
553 ureg_LRP(ureg, aNrm_dst, _XXXX(_CONST(30)), vs->aNrm1, vs->aNrm);
554 vs->aNrm = ureg_src(aNrm_dst);
555 }
556 }
557
558 /* position = vertex * WORLD_VIEW_PROJ */
559 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(0));
560 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(1), ureg_src(tmp));
561 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(2), ureg_src(tmp));
562 ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(tmp));
563 ureg_release_temporary(ureg, tmp);
564
565 if (need_aVtx) {
566 struct ureg_dst aVtx_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
567 ureg_MUL(ureg, aVtx_dst, _XXXX(vs->aVtx), _CONST(4));
568 ureg_MAD(ureg, aVtx_dst, _YYYY(vs->aVtx), _CONST(5), ureg_src(aVtx_dst));
569 ureg_MAD(ureg, aVtx_dst, _ZZZZ(vs->aVtx), _CONST(6), ureg_src(aVtx_dst));
570 ureg_MAD(ureg, aVtx_dst, _WWWW(vs->aVtx), _CONST(7), ureg_src(aVtx_dst));
571 vs->aVtx = ureg_src(aVtx_dst);
572 }
573 if (has_aNrm) {
574 struct ureg_dst aNrm_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
575 ureg_MUL(ureg, aNrm_dst, _XXXX(vs->aNrm), _CONST(16));
576 ureg_MAD(ureg, aNrm_dst, _YYYY(vs->aNrm), _CONST(17), ureg_src(aNrm_dst));
577 ureg_MAD(ureg, aNrm_dst, _ZZZZ(vs->aNrm), _CONST(18), ureg_src(aNrm_dst));
578 if (key->normalizenormals)
579 ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst));
580 vs->aNrm = ureg_src(aNrm_dst);
581 }
582 }
583
584 /* === Process point size:
585 */
586 if (key->vertexpointsize || key->pointscale) {
587 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
588 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
589 struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
590 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
591 if (key->vertexpointsize) {
592 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
593 ureg_MAX(ureg, tmp_z, _XXXX(vs->aPsz), _XXXX(cPsz1));
594 ureg_MIN(ureg, tmp_z, _Z(tmp), _YYYY(cPsz1));
595 } else {
596 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
597 ureg_MOV(ureg, tmp_z, _ZZZZ(cPsz1));
598 }
599
600 if (key->pointscale) {
601 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
602 struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27);
603
604 ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx);
605 ureg_RSQ(ureg, tmp_y, _X(tmp));
606 ureg_MUL(ureg, tmp_y, _Y(tmp), _X(tmp));
607 ureg_CMP(ureg, tmp_y, ureg_negate(_Y(tmp)), _Y(tmp), ureg_imm1f(ureg, 0.0f));
608 ureg_MAD(ureg, tmp_x, _Y(tmp), _YYYY(cPsz2), _XXXX(cPsz2));
609 ureg_MAD(ureg, tmp_x, _Y(tmp), _X(tmp), _WWWW(cPsz1));
610 ureg_RSQ(ureg, tmp_x, _X(tmp));
611 ureg_MUL(ureg, tmp_x, _X(tmp), _Z(tmp));
612 ureg_MUL(ureg, tmp_x, _X(tmp), _WWWW(_CONST(100)));
613 ureg_MAX(ureg, tmp_x, _X(tmp), _XXXX(cPsz1));
614 ureg_MIN(ureg, tmp_z, _X(tmp), _YYYY(cPsz1));
615 }
616
617 ureg_MOV(ureg, oPsz, _Z(tmp));
618 ureg_release_temporary(ureg, tmp);
619 }
620
621 for (i = 0; i < 8; ++i) {
622 struct ureg_dst tmp, tmp_x, tmp2;
623 struct ureg_dst oTex, input_coord, transformed, t, aVtx_normed;
624 unsigned c, writemask;
625 const unsigned tci = (key->tc_gen >> (i * 3)) & 0x7;
626 const unsigned idx = (key->tc_idx >> (i * 3)) & 0x7;
627 unsigned dim_input = 1 + ((key->tc_dim_input >> (i * 2)) & 0x3);
628 const unsigned dim_output = (key->tc_dim_output >> (i * 3)) & 0x7;
629
630 /* No texture output of index s */
631 if (tci == NINED3DTSS_TCI_DISABLE)
632 continue;
633 oTex = ureg_DECL_output(ureg, texcoord_sn, i);
634 tmp = ureg_DECL_temporary(ureg);
635 tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
636 input_coord = ureg_DECL_temporary(ureg);
637 transformed = ureg_DECL_temporary(ureg);
638
639 /* Get the coordinate */
640 switch (tci) {
641 case NINED3DTSS_TCI_PASSTHRU:
642 /* NINED3DTSS_TCI_PASSTHRU => Use texcoord coming from index idx *
643 * Else the idx is used only to determine wrapping mode. */
644 vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx));
645 ureg_MOV(ureg, input_coord, vs->aTex[idx]);
646 break;
647 case NINED3DTSS_TCI_CAMERASPACENORMAL:
648 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aNrm);
649 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
650 dim_input = 4;
651 break;
652 case NINED3DTSS_TCI_CAMERASPACEPOSITION:
653 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aVtx);
654 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
655 dim_input = 4;
656 break;
657 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
658 tmp.WriteMask = TGSI_WRITEMASK_XYZ;
659 aVtx_normed = ureg_DECL_temporary(ureg);
660 ureg_normalize3(ureg, aVtx_normed, vs->aVtx);
661 ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm);
662 ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp));
663 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
664 ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp)));
665 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
666 ureg_release_temporary(ureg, aVtx_normed);
667 dim_input = 4;
668 tmp.WriteMask = TGSI_WRITEMASK_XYZW;
669 break;
670 case NINED3DTSS_TCI_SPHEREMAP:
671 /* Implement the formula of GL_SPHERE_MAP */
672 tmp.WriteMask = TGSI_WRITEMASK_XYZ;
673 aVtx_normed = ureg_DECL_temporary(ureg);
674 tmp2 = ureg_DECL_temporary(ureg);
675 ureg_normalize3(ureg, aVtx_normed, vs->aVtx);
676 ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm);
677 ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp));
678 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
679 ureg_ADD(ureg, tmp, ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp)));
680 /* now tmp = normed(Vtx) - 2 dot3(normed(Vtx), Nrm) Nrm */
681 ureg_MOV(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_XYZ), ureg_src(tmp));
682 ureg_MUL(ureg, tmp2, ureg_src(tmp2), ureg_src(tmp2));
683 ureg_DP3(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_src(tmp2));
684 ureg_RSQ(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2));
685 ureg_MUL(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_imm1f(ureg, 0.5f));
686 /* tmp2 = 0.5 / sqrt(tmp.x^2 + tmp.y^2 + (tmp.z+1)^2)
687 * TODO: z coordinates are a bit different gl vs d3d, should the formula be adapted ? */
688 ureg_MUL(ureg, tmp, ureg_src(tmp), _X(tmp2));
689 ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 0.5f));
690 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_ZW), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
691 ureg_release_temporary(ureg, aVtx_normed);
692 ureg_release_temporary(ureg, tmp2);
693 dim_input = 4;
694 tmp.WriteMask = TGSI_WRITEMASK_XYZW;
695 break;
696 default:
697 assert(0);
698 break;
699 }
700
701 /* Apply the transformation */
702 /* dim_output == 0 => do not transform the components.
703 * XYZRHW also disables transformation */
704 if (!dim_output || key->position_t) {
705 ureg_release_temporary(ureg, transformed);
706 transformed = input_coord;
707 writemask = TGSI_WRITEMASK_XYZW;
708 } else {
709 for (c = 0; c < dim_output; c++) {
710 t = ureg_writemask(transformed, 1 << c);
711 switch (dim_input) {
712 /* dim_input = 1 2 3: -> we add trailing 1 to input*/
713 case 1: ureg_MAD(ureg, t, _X(input_coord), _XXXX(_CONST(128 + i * 4 + c)), _YYYY(_CONST(128 + i * 4 + c)));
714 break;
715 case 2: ureg_DP2(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
716 ureg_ADD(ureg, t, ureg_src(transformed), _ZZZZ(_CONST(128 + i * 4 + c)));
717 break;
718 case 3: ureg_DP3(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
719 ureg_ADD(ureg, t, ureg_src(transformed), _WWWW(_CONST(128 + i * 4 + c)));
720 break;
721 case 4: ureg_DP4(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); break;
722 default:
723 assert(0);
724 }
725 }
726 writemask = (1 << dim_output) - 1;
727 ureg_release_temporary(ureg, input_coord);
728 }
729
730 ureg_MOV(ureg, ureg_writemask(oTex, writemask), ureg_src(transformed));
731 ureg_release_temporary(ureg, transformed);
732 ureg_release_temporary(ureg, tmp);
733 }
734
735 /* === Lighting:
736 *
737 * DIRECTIONAL: Light at infinite distance, parallel rays, no attenuation.
738 * POINT: Finite distance to scene, divergent rays, isotropic, attenuation.
739 * SPOT: Finite distance, divergent rays, angular dependence, attenuation.
740 *
741 * vec3 normal = normalize(in.Normal * NormalMatrix);
742 * vec3 hitDir = light.direction;
743 * float atten = 1.0;
744 *
745 * if (light.type != DIRECTIONAL)
746 * {
747 * vec3 hitVec = light.position - eyeVertex;
748 * float d = length(hitVec);
749 * hitDir = hitVec / d;
750 * atten = 1 / ((light.atten2 * d + light.atten1) * d + light.atten0);
751 * }
752 *
753 * if (light.type == SPOTLIGHT)
754 * {
755 * float rho = dp3(-hitVec, light.direction);
756 * if (rho < cos(light.phi / 2))
757 * atten = 0;
758 * if (rho < cos(light.theta / 2))
759 * atten *= pow(some_func(rho), light.falloff);
760 * }
761 *
762 * float nDotHit = dp3_sat(normal, hitVec);
763 * float powFact = 0.0;
764 *
765 * if (nDotHit > 0.0)
766 * {
767 * vec3 midVec = normalize(hitDir + eye);
768 * float nDotMid = dp3_sat(normal, midVec);
769 * pFact = pow(nDotMid, material.power);
770 * }
771 *
772 * ambient += light.ambient * atten;
773 * diffuse += light.diffuse * atten * nDotHit;
774 * specular += light.specular * atten * powFact;
775 */
776 if (key->lighting) {
777 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
778 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
779 struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
780 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
781 struct ureg_dst rAtt = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W);
782 struct ureg_dst rHit = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
783 struct ureg_dst rMid = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
784
785 struct ureg_dst rCtr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W);
786
787 struct ureg_dst AL = ureg_writemask(AR, TGSI_WRITEMASK_X);
788
789 /* Light.*.Alpha is not used. */
790 struct ureg_dst rD = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
791 struct ureg_dst rA = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
792 struct ureg_dst rS = ureg_DECL_temporary(ureg);
793
794 struct ureg_src mtlP = _XXXX(MATERIAL_CONST(4));
795
796 struct ureg_src cLKind = _XXXX(LIGHT_CONST(0));
797 struct ureg_src cLAtt0 = _YYYY(LIGHT_CONST(0));
798 struct ureg_src cLAtt1 = _ZZZZ(LIGHT_CONST(0));
799 struct ureg_src cLAtt2 = _WWWW(LIGHT_CONST(0));
800 struct ureg_src cLColD = _XYZW(LIGHT_CONST(1));
801 struct ureg_src cLColS = _XYZW(LIGHT_CONST(2));
802 struct ureg_src cLColA = _XYZW(LIGHT_CONST(3));
803 struct ureg_src cLPos = _XYZW(LIGHT_CONST(4));
804 struct ureg_src cLRng = _WWWW(LIGHT_CONST(4));
805 struct ureg_src cLDir = _XYZW(LIGHT_CONST(5));
806 struct ureg_src cLFOff = _WWWW(LIGHT_CONST(5));
807 struct ureg_src cLTht = _XXXX(LIGHT_CONST(6));
808 struct ureg_src cLPhi = _YYYY(LIGHT_CONST(6));
809 struct ureg_src cLSDiv = _ZZZZ(LIGHT_CONST(6));
810 struct ureg_src cLLast = _WWWW(LIGHT_CONST(7));
811
812 const unsigned loop_label = l++;
813
814 ureg_MOV(ureg, rCtr, ureg_imm1f(ureg, 32.0f)); /* &lightconst(0) */
815 ureg_MOV(ureg, rD, ureg_imm1f(ureg, 0.0f));
816 ureg_MOV(ureg, rA, ureg_imm1f(ureg, 0.0f));
817 ureg_MOV(ureg, rS, ureg_imm1f(ureg, 0.0f));
818
819 /* loop management */
820 ureg_BGNLOOP(ureg, &label[loop_label]);
821 ureg_ARL(ureg, AL, _W(rCtr));
822
823 /* if (not DIRECTIONAL light): */
824 ureg_SNE(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_DIRECTIONAL));
825 ureg_MOV(ureg, rHit, ureg_negate(cLDir));
826 ureg_MOV(ureg, rAtt, ureg_imm1f(ureg, 1.0f));
827 ureg_IF(ureg, _X(tmp), &label[l++]);
828 {
829 /* hitDir = light.position - eyeVtx
830 * d = length(hitDir)
831 */
832 ureg_ADD(ureg, rHit, cLPos, ureg_negate(vs->aVtx));
833 ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit));
834 ureg_RSQ(ureg, tmp_y, _X(tmp));
835 ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */
836
837 /* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */
838 ureg_MAD(ureg, rAtt, _X(tmp), cLAtt2, cLAtt1);
839 ureg_MAD(ureg, rAtt, _X(tmp), _W(rAtt), cLAtt0);
840 ureg_RCP(ureg, rAtt, _W(rAtt));
841 /* cut-off if distance exceeds Light.Range */
842 ureg_SLT(ureg, tmp_x, _X(tmp), cLRng);
843 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
844 }
845 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
846 ureg_ENDIF(ureg);
847
848 /* normalize hitDir */
849 ureg_normalize3(ureg, rHit, ureg_src(rHit));
850
851 /* if (SPOT light) */
852 ureg_SEQ(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_SPOT));
853 ureg_IF(ureg, _X(tmp), &label[l++]);
854 {
855 /* rho = dp3(-hitDir, light.spotDir)
856 *
857 * if (rho > light.ctht2) NOTE: 0 <= phi <= pi, 0 <= theta <= phi
858 * spotAtt = 1
859 * else
860 * if (rho <= light.cphi2)
861 * spotAtt = 0
862 * else
863 * spotAtt = (rho - light.cphi2) / (light.ctht2 - light.cphi2) ^ light.falloff
864 */
865 ureg_DP3(ureg, tmp_y, ureg_negate(ureg_src(rHit)), cLDir); /* rho */
866 ureg_ADD(ureg, tmp_x, _Y(tmp), ureg_negate(cLPhi));
867 ureg_MUL(ureg, tmp_x, _X(tmp), cLSDiv);
868 ureg_POW(ureg, tmp_x, _X(tmp), cLFOff); /* spotAtten */
869 ureg_SGE(ureg, tmp_z, _Y(tmp), cLTht); /* if inside theta && phi */
870 ureg_SGE(ureg, tmp_y, _Y(tmp), cLPhi); /* if inside phi */
871 ureg_MAD(ureg, ureg_saturate(tmp_x), _X(tmp), _Y(tmp), _Z(tmp));
872 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
873 }
874 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
875 ureg_ENDIF(ureg);
876
877 /* directional factors, let's not use LIT because of clarity */
878
879 if (has_aNrm) {
880 if (key->localviewer) {
881 ureg_normalize3(ureg, rMid, vs->aVtx);
882 ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_negate(ureg_src(rMid)));
883 } else {
884 ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, -1.0f));
885 }
886 ureg_normalize3(ureg, rMid, ureg_src(rMid));
887 ureg_DP3(ureg, ureg_saturate(tmp_x), vs->aNrm, ureg_src(rHit));
888 ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid));
889 ureg_MUL(ureg, tmp_z, _X(tmp), _Y(tmp));
890 /* Tests show that specular is computed only if (dp3(normal,hitDir) > 0).
891 * For front facing, it is more restrictive than test (dp3(normal,mid) > 0).
892 * No tests were made for backfacing, so add the two conditions */
893 ureg_IF(ureg, _Z(tmp), &label[l++]);
894 {
895 ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid));
896 ureg_POW(ureg, tmp_y, _Y(tmp), mtlP);
897 ureg_MUL(ureg, tmp_y, _W(rAtt), _Y(tmp)); /* power factor * att */
898 ureg_MAD(ureg, rS, cLColS, _Y(tmp), ureg_src(rS)); /* accumulate specular */
899 }
900 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
901 ureg_ENDIF(ureg);
902
903 ureg_MUL(ureg, tmp_x, _W(rAtt), _X(tmp)); /* dp3(normal,hitDir) * att */
904 ureg_MAD(ureg, rD, cLColD, _X(tmp), ureg_src(rD)); /* accumulate diffuse */
905 }
906
907 ureg_MAD(ureg, rA, cLColA, _W(rAtt), ureg_src(rA)); /* accumulate ambient */
908
909 /* break if this was the last light */
910 ureg_IF(ureg, cLLast, &label[l++]);
911 ureg_BRK(ureg);
912 ureg_ENDIF(ureg);
913 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
914
915 ureg_ADD(ureg, rCtr, _W(rCtr), ureg_imm1f(ureg, 8.0f));
916 ureg_fixup_label(ureg, label[loop_label], ureg_get_instruction_number(ureg));
917 ureg_ENDLOOP(ureg, &label[loop_label]);
918
919 /* Apply to material:
920 *
921 * oCol[0] = (material.emissive + material.ambient * rs.ambient) +
922 * material.ambient * ambient +
923 * material.diffuse * diffuse +
924 * oCol[1] = material.specular * specular;
925 */
926 if (key->mtl_emissive == 0 && key->mtl_ambient == 0)
927 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), vs->mtlA, _CONST(19));
928 else {
929 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), _CONST(25));
930 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE);
931 }
932
933 ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), ureg_src(rD), vs->mtlD, ureg_src(tmp));
934 ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD);
935 ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS);
936 ureg_release_temporary(ureg, rAtt);
937 ureg_release_temporary(ureg, rHit);
938 ureg_release_temporary(ureg, rMid);
939 ureg_release_temporary(ureg, rCtr);
940 ureg_release_temporary(ureg, rD);
941 ureg_release_temporary(ureg, rA);
942 ureg_release_temporary(ureg, rS);
943 ureg_release_temporary(ureg, rAtt);
944 ureg_release_temporary(ureg, tmp);
945 } else
946 /* COLOR */
947 if (key->darkness) {
948 if (key->mtl_emissive == 0 && key->mtl_ambient == 0)
949 ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), _CONST(19));
950 else
951 ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE);
952 ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD);
953 ureg_MOV(ureg, oCol[1], ureg_imm1f(ureg, 0.0f));
954 } else {
955 ureg_MOV(ureg, oCol[0], vs->aCol[0]);
956 ureg_MOV(ureg, oCol[1], vs->aCol[1]);
957 }
958
959 /* === Process fog.
960 *
961 * exp(x) = ex2(log2(e) * x)
962 */
963 if (key->fog_mode) {
964 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
965 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
966 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
967 if (key->fog_range) {
968 ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx);
969 ureg_RSQ(ureg, tmp_z, _X(tmp));
970 ureg_MUL(ureg, tmp_z, _Z(tmp), _X(tmp));
971 } else {
972 ureg_MOV(ureg, tmp_z, ureg_abs(_ZZZZ(vs->aVtx)));
973 }
974
975 if (key->fog_mode == D3DFOG_EXP) {
976 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
977 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
978 ureg_EX2(ureg, tmp_x, _X(tmp));
979 } else
980 if (key->fog_mode == D3DFOG_EXP2) {
981 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
982 ureg_MUL(ureg, tmp_x, _X(tmp), _X(tmp));
983 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
984 ureg_EX2(ureg, tmp_x, _X(tmp));
985 } else
986 if (key->fog_mode == D3DFOG_LINEAR) {
987 ureg_ADD(ureg, tmp_x, _XXXX(_CONST(28)), ureg_negate(_Z(tmp)));
988 ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28)));
989 }
990 ureg_MOV(ureg, oFog, _X(tmp));
991 ureg_release_temporary(ureg, tmp);
992 } else if (key->fog && !(key->passthrough & (1 << NINE_DECLUSAGE_FOG))) {
993 ureg_MOV(ureg, oFog, ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W));
994 }
995
996 if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) {
997 struct ureg_src input;
998 struct ureg_dst output;
999 input = vs->aWgt;
1000 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 18);
1001 ureg_MOV(ureg, output, input);
1002 }
1003 if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) {
1004 struct ureg_src input;
1005 struct ureg_dst output;
1006 input = vs->aInd;
1007 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 19);
1008 ureg_MOV(ureg, output, input);
1009 }
1010 if (key->passthrough & (1 << NINE_DECLUSAGE_NORMAL)) {
1011 struct ureg_src input;
1012 struct ureg_dst output;
1013 input = vs->aNrm;
1014 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 20);
1015 ureg_MOV(ureg, output, input);
1016 }
1017 if (key->passthrough & (1 << NINE_DECLUSAGE_TANGENT)) {
1018 struct ureg_src input;
1019 struct ureg_dst output;
1020 input = build_vs_add_input(vs, NINE_DECLUSAGE_TANGENT);
1021 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 21);
1022 ureg_MOV(ureg, output, input);
1023 }
1024 if (key->passthrough & (1 << NINE_DECLUSAGE_BINORMAL)) {
1025 struct ureg_src input;
1026 struct ureg_dst output;
1027 input = build_vs_add_input(vs, NINE_DECLUSAGE_BINORMAL);
1028 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 22);
1029 ureg_MOV(ureg, output, input);
1030 }
1031 if (key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
1032 struct ureg_src input;
1033 struct ureg_dst output;
1034 input = build_vs_add_input(vs, NINE_DECLUSAGE_FOG);
1035 input = ureg_scalar(input, TGSI_SWIZZLE_X);
1036 output = oFog;
1037 ureg_MOV(ureg, output, input);
1038 }
1039 if (key->passthrough & (1 << NINE_DECLUSAGE_DEPTH)) {
1040 (void) 0; /* TODO: replace z of position output ? */
1041 }
1042
1043 /* ucp for ff applies on world coordinates.
1044 * aVtx is in worldview coordinates. */
1045 if (key->ucp) {
1046 struct ureg_dst clipVect = ureg_DECL_output(ureg, TGSI_SEMANTIC_CLIPVERTEX, 0);
1047 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1048 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(12));
1049 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(13), ureg_src(tmp));
1050 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(14), ureg_src(tmp));
1051 ureg_ADD(ureg, clipVect, _CONST(15), ureg_src(tmp));
1052 ureg_release_temporary(ureg, tmp);
1053 }
1054
1055 if (key->position_t && device->driver_caps.window_space_position_support)
1056 ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
1057
1058 ureg_END(ureg);
1059 nine_ureg_tgsi_dump(ureg, FALSE);
1060 return ureg_create_shader_and_destroy(ureg, device->context.pipe);
1061 }
1062
1063 /* PS FF constants layout:
1064 *
1065 * CONST[ 0.. 7] stage[i].D3DTSS_CONSTANT
1066 * CONST[ 8..15].x___ stage[i].D3DTSS_BUMPENVMAT00
1067 * CONST[ 8..15]._y__ stage[i].D3DTSS_BUMPENVMAT01
1068 * CONST[ 8..15].__z_ stage[i].D3DTSS_BUMPENVMAT10
1069 * CONST[ 8..15].___w stage[i].D3DTSS_BUMPENVMAT11
1070 * CONST[16..19].x_z_ stage[i].D3DTSS_BUMPENVLSCALE
1071 * CONST[17..19]._y_w stage[i].D3DTSS_BUMPENVLOFFSET
1072 *
1073 * CONST[20] D3DRS_TEXTUREFACTOR
1074 * CONST[21] D3DRS_FOGCOLOR
1075 * CONST[22].x___ RS.FogEnd
1076 * CONST[22]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
1077 * CONST[22].__z_ RS.FogDensity
1078 */
1079 struct ps_build_ctx
1080 {
1081 struct ureg_program *ureg;
1082
1083 struct ureg_src vC[2]; /* DIFFUSE, SPECULAR */
1084 struct ureg_src vT[8]; /* TEXCOORD[i] */
1085 struct ureg_dst rCur; /* D3DTA_CURRENT */
1086 struct ureg_dst rMod;
1087 struct ureg_src rCurSrc;
1088 struct ureg_dst rTmp; /* D3DTA_TEMP */
1089 struct ureg_src rTmpSrc;
1090 struct ureg_dst rTex;
1091 struct ureg_src rTexSrc;
1092 struct ureg_src cBEM[8];
1093 struct ureg_src s[8];
1094
1095 struct {
1096 unsigned index;
1097 unsigned index_pre_mod;
1098 } stage;
1099 };
1100
1101 static struct ureg_src
ps_get_ts_arg(struct ps_build_ctx * ps,unsigned ta)1102 ps_get_ts_arg(struct ps_build_ctx *ps, unsigned ta)
1103 {
1104 struct ureg_src reg;
1105
1106 switch (ta & D3DTA_SELECTMASK) {
1107 case D3DTA_CONSTANT:
1108 reg = ureg_DECL_constant(ps->ureg, ps->stage.index);
1109 break;
1110 case D3DTA_CURRENT:
1111 reg = (ps->stage.index == ps->stage.index_pre_mod) ? ureg_src(ps->rMod) : ps->rCurSrc;
1112 break;
1113 case D3DTA_DIFFUSE:
1114 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR);
1115 break;
1116 case D3DTA_SPECULAR:
1117 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1118 break;
1119 case D3DTA_TEMP:
1120 reg = ps->rTmpSrc;
1121 break;
1122 case D3DTA_TEXTURE:
1123 reg = ps->rTexSrc;
1124 break;
1125 case D3DTA_TFACTOR:
1126 reg = ureg_DECL_constant(ps->ureg, 20);
1127 break;
1128 default:
1129 assert(0);
1130 reg = ureg_src_undef();
1131 break;
1132 }
1133 if (ta & D3DTA_COMPLEMENT) {
1134 struct ureg_dst dst = ureg_DECL_temporary(ps->ureg);
1135 ureg_ADD(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), ureg_negate(reg));
1136 reg = ureg_src(dst);
1137 }
1138 if (ta & D3DTA_ALPHAREPLICATE)
1139 reg = _WWWW(reg);
1140 return reg;
1141 }
1142
1143 static struct ureg_dst
ps_get_ts_dst(struct ps_build_ctx * ps,unsigned ta)1144 ps_get_ts_dst(struct ps_build_ctx *ps, unsigned ta)
1145 {
1146 assert(!(ta & (D3DTA_COMPLEMENT | D3DTA_ALPHAREPLICATE)));
1147
1148 switch (ta & D3DTA_SELECTMASK) {
1149 case D3DTA_CURRENT:
1150 return ps->rCur;
1151 case D3DTA_TEMP:
1152 return ps->rTmp;
1153 default:
1154 assert(0);
1155 return ureg_dst_undef();
1156 }
1157 }
1158
ps_d3dtop_args_mask(D3DTEXTUREOP top)1159 static uint8_t ps_d3dtop_args_mask(D3DTEXTUREOP top)
1160 {
1161 switch (top) {
1162 case D3DTOP_DISABLE:
1163 return 0x0;
1164 case D3DTOP_SELECTARG1:
1165 case D3DTOP_PREMODULATE:
1166 return 0x2;
1167 case D3DTOP_SELECTARG2:
1168 return 0x4;
1169 case D3DTOP_MULTIPLYADD:
1170 case D3DTOP_LERP:
1171 return 0x7;
1172 default:
1173 return 0x6;
1174 }
1175 }
1176
1177 static inline boolean
is_MOV_no_op(struct ureg_dst dst,struct ureg_src src)1178 is_MOV_no_op(struct ureg_dst dst, struct ureg_src src)
1179 {
1180 return !dst.WriteMask ||
1181 (dst.File == src.File &&
1182 dst.Index == src.Index &&
1183 !dst.Indirect &&
1184 !dst.Saturate &&
1185 !src.Indirect &&
1186 !src.Negate &&
1187 !src.Absolute &&
1188 (!(dst.WriteMask & TGSI_WRITEMASK_X) || (src.SwizzleX == TGSI_SWIZZLE_X)) &&
1189 (!(dst.WriteMask & TGSI_WRITEMASK_Y) || (src.SwizzleY == TGSI_SWIZZLE_Y)) &&
1190 (!(dst.WriteMask & TGSI_WRITEMASK_Z) || (src.SwizzleZ == TGSI_SWIZZLE_Z)) &&
1191 (!(dst.WriteMask & TGSI_WRITEMASK_W) || (src.SwizzleW == TGSI_SWIZZLE_W)));
1192
1193 }
1194
1195 static void
ps_do_ts_op(struct ps_build_ctx * ps,unsigned top,struct ureg_dst dst,struct ureg_src * arg)1196 ps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct ureg_src *arg)
1197 {
1198 struct ureg_program *ureg = ps->ureg;
1199 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1200 struct ureg_dst tmp2 = ureg_DECL_temporary(ureg);
1201 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
1202
1203 tmp.WriteMask = dst.WriteMask;
1204
1205 if (top != D3DTOP_SELECTARG1 && top != D3DTOP_SELECTARG2 &&
1206 top != D3DTOP_MODULATE && top != D3DTOP_PREMODULATE &&
1207 top != D3DTOP_BLENDDIFFUSEALPHA && top != D3DTOP_BLENDTEXTUREALPHA &&
1208 top != D3DTOP_BLENDFACTORALPHA && top != D3DTOP_BLENDCURRENTALPHA &&
1209 top != D3DTOP_BUMPENVMAP && top != D3DTOP_BUMPENVMAPLUMINANCE &&
1210 top != D3DTOP_LERP)
1211 dst = ureg_saturate(dst);
1212
1213 switch (top) {
1214 case D3DTOP_SELECTARG1:
1215 if (!is_MOV_no_op(dst, arg[1]))
1216 ureg_MOV(ureg, dst, arg[1]);
1217 break;
1218 case D3DTOP_SELECTARG2:
1219 if (!is_MOV_no_op(dst, arg[2]))
1220 ureg_MOV(ureg, dst, arg[2]);
1221 break;
1222 case D3DTOP_MODULATE:
1223 ureg_MUL(ureg, dst, arg[1], arg[2]);
1224 break;
1225 case D3DTOP_MODULATE2X:
1226 ureg_MUL(ureg, tmp, arg[1], arg[2]);
1227 ureg_ADD(ureg, dst, ureg_src(tmp), ureg_src(tmp));
1228 break;
1229 case D3DTOP_MODULATE4X:
1230 ureg_MUL(ureg, tmp, arg[1], arg[2]);
1231 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 4.0f));
1232 break;
1233 case D3DTOP_ADD:
1234 ureg_ADD(ureg, dst, arg[1], arg[2]);
1235 break;
1236 case D3DTOP_ADDSIGNED:
1237 ureg_ADD(ureg, tmp, arg[1], arg[2]);
1238 ureg_ADD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, -0.5f));
1239 break;
1240 case D3DTOP_ADDSIGNED2X:
1241 ureg_ADD(ureg, tmp, arg[1], arg[2]);
1242 ureg_MAD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1243 break;
1244 case D3DTOP_SUBTRACT:
1245 ureg_ADD(ureg, dst, arg[1], ureg_negate(arg[2]));
1246 break;
1247 case D3DTOP_ADDSMOOTH:
1248 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1]));
1249 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], arg[1]);
1250 break;
1251 case D3DTOP_BLENDDIFFUSEALPHA:
1252 ureg_LRP(ureg, dst, _WWWW(ps->vC[0]), arg[1], arg[2]);
1253 break;
1254 case D3DTOP_BLENDTEXTUREALPHA:
1255 /* XXX: alpha taken from previous stage, texture or result ? */
1256 ureg_LRP(ureg, dst, _W(ps->rTex), arg[1], arg[2]);
1257 break;
1258 case D3DTOP_BLENDFACTORALPHA:
1259 ureg_LRP(ureg, dst, _WWWW(_CONST(20)), arg[1], arg[2]);
1260 break;
1261 case D3DTOP_BLENDTEXTUREALPHAPM:
1262 ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_W(ps->rTex)));
1263 ureg_MAD(ureg, dst, arg[2], _X(tmp), arg[1]);
1264 break;
1265 case D3DTOP_BLENDCURRENTALPHA:
1266 ureg_LRP(ureg, dst, _WWWW(ps->rCurSrc), arg[1], arg[2]);
1267 break;
1268 case D3DTOP_PREMODULATE:
1269 ureg_MOV(ureg, dst, arg[1]);
1270 ps->stage.index_pre_mod = ps->stage.index + 1;
1271 break;
1272 case D3DTOP_MODULATEALPHA_ADDCOLOR:
1273 ureg_MAD(ureg, dst, _WWWW(arg[1]), arg[2], arg[1]);
1274 break;
1275 case D3DTOP_MODULATECOLOR_ADDALPHA:
1276 ureg_MAD(ureg, dst, arg[1], arg[2], _WWWW(arg[1]));
1277 break;
1278 case D3DTOP_MODULATEINVALPHA_ADDCOLOR:
1279 ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_WWWW(arg[1])));
1280 ureg_MAD(ureg, dst, _X(tmp), arg[2], arg[1]);
1281 break;
1282 case D3DTOP_MODULATEINVCOLOR_ADDALPHA:
1283 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1]));
1284 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], _WWWW(arg[1]));
1285 break;
1286 case D3DTOP_BUMPENVMAP:
1287 break;
1288 case D3DTOP_BUMPENVMAPLUMINANCE:
1289 break;
1290 case D3DTOP_DOTPRODUCT3:
1291 ureg_ADD(ureg, tmp, arg[1], ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5));
1292 ureg_ADD(ureg, tmp2, arg[2] , ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5));
1293 ureg_DP3(ureg, tmp, ureg_src(tmp), ureg_src(tmp2));
1294 ureg_MUL(ureg, ureg_saturate(dst), ureg_src(tmp), ureg_imm4f(ureg,4.0,4.0,4.0,4.0));
1295 break;
1296 case D3DTOP_MULTIPLYADD:
1297 ureg_MAD(ureg, dst, arg[1], arg[2], arg[0]);
1298 break;
1299 case D3DTOP_LERP:
1300 ureg_LRP(ureg, dst, arg[0], arg[1], arg[2]);
1301 break;
1302 case D3DTOP_DISABLE:
1303 /* no-op ? */
1304 break;
1305 default:
1306 assert(!"invalid D3DTOP");
1307 break;
1308 }
1309 ureg_release_temporary(ureg, tmp);
1310 ureg_release_temporary(ureg, tmp2);
1311 }
1312
1313 static void *
nine_ff_build_ps(struct NineDevice9 * device,struct nine_ff_ps_key * key)1314 nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
1315 {
1316 struct ps_build_ctx ps;
1317 struct ureg_program *ureg = ureg_create(PIPE_SHADER_FRAGMENT);
1318 struct ureg_dst oCol;
1319 unsigned s;
1320 const unsigned texcoord_sn = get_texcoord_sn(device->screen);
1321
1322 memset(&ps, 0, sizeof(ps));
1323 ps.ureg = ureg;
1324 ps.stage.index_pre_mod = -1;
1325
1326 ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR);
1327
1328 ps.rCur = ureg_DECL_temporary(ureg);
1329 ps.rTmp = ureg_DECL_temporary(ureg);
1330 ps.rTex = ureg_DECL_temporary(ureg);
1331 ps.rCurSrc = ureg_src(ps.rCur);
1332 ps.rTmpSrc = ureg_src(ps.rTmp);
1333 ps.rTexSrc = ureg_src(ps.rTex);
1334
1335 /* Initial values */
1336 ureg_MOV(ureg, ps.rCur, ps.vC[0]);
1337 ureg_MOV(ureg, ps.rTmp, ureg_imm1f(ureg, 0.0f));
1338 ureg_MOV(ureg, ps.rTex, ureg_imm1f(ureg, 0.0f));
1339
1340 for (s = 0; s < 8; ++s) {
1341 ps.s[s] = ureg_src_undef();
1342
1343 if (key->ts[s].colorop != D3DTOP_DISABLE) {
1344 if (key->ts[s].colorarg0 == D3DTA_SPECULAR ||
1345 key->ts[s].colorarg1 == D3DTA_SPECULAR ||
1346 key->ts[s].colorarg2 == D3DTA_SPECULAR)
1347 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1348
1349 if (key->ts[s].colorarg0 == D3DTA_TEXTURE ||
1350 key->ts[s].colorarg1 == D3DTA_TEXTURE ||
1351 key->ts[s].colorarg2 == D3DTA_TEXTURE) {
1352 ps.s[s] = ureg_DECL_sampler(ureg, s);
1353 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1354 }
1355 if (s && (key->ts[s - 1].colorop == D3DTOP_PREMODULATE ||
1356 key->ts[s - 1].alphaop == D3DTOP_PREMODULATE))
1357 ps.s[s] = ureg_DECL_sampler(ureg, s);
1358 }
1359
1360 if (key->ts[s].alphaop != D3DTOP_DISABLE) {
1361 if (key->ts[s].alphaarg0 == D3DTA_SPECULAR ||
1362 key->ts[s].alphaarg1 == D3DTA_SPECULAR ||
1363 key->ts[s].alphaarg2 == D3DTA_SPECULAR)
1364 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1365
1366 if (key->ts[s].alphaarg0 == D3DTA_TEXTURE ||
1367 key->ts[s].alphaarg1 == D3DTA_TEXTURE ||
1368 key->ts[s].alphaarg2 == D3DTA_TEXTURE) {
1369 ps.s[s] = ureg_DECL_sampler(ureg, s);
1370 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1371 }
1372 }
1373 }
1374 if (key->specular)
1375 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1376
1377 oCol = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
1378
1379 /* Run stages.
1380 */
1381 for (s = 0; s < 8; ++s) {
1382 unsigned colorarg[3];
1383 unsigned alphaarg[3];
1384 const uint8_t used_c = ps_d3dtop_args_mask(key->ts[s].colorop);
1385 const uint8_t used_a = ps_d3dtop_args_mask(key->ts[s].alphaop);
1386 struct ureg_dst dst;
1387 struct ureg_src arg[3];
1388
1389 if (key->ts[s].colorop == D3DTOP_DISABLE) {
1390 assert (key->ts[s].alphaop == D3DTOP_DISABLE);
1391 continue;
1392 }
1393 ps.stage.index = s;
1394
1395 DBG("STAGE[%u]: colorop=%s alphaop=%s\n", s,
1396 nine_D3DTOP_to_str(key->ts[s].colorop),
1397 nine_D3DTOP_to_str(key->ts[s].alphaop));
1398
1399 if (!ureg_src_is_undef(ps.s[s])) {
1400 unsigned target;
1401 struct ureg_src texture_coord = ps.vT[s];
1402 struct ureg_dst delta;
1403 switch (key->ts[s].textarget) {
1404 case 0: target = TGSI_TEXTURE_1D; break;
1405 case 1: target = TGSI_TEXTURE_2D; break;
1406 case 2: target = TGSI_TEXTURE_3D; break;
1407 case 3: target = TGSI_TEXTURE_CUBE; break;
1408 /* this is a 2 bit bitfield, do I really need a default case ? */
1409 }
1410
1411 /* Modify coordinates */
1412 if (s >= 1 &&
1413 (key->ts[s-1].colorop == D3DTOP_BUMPENVMAP ||
1414 key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)) {
1415 delta = ureg_DECL_temporary(ureg);
1416 /* Du' = D3DTSS_BUMPENVMAT00(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT10(stage s-1)*t(s-1)G */
1417 ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _X(ps.rTex), _XXXX(_CONST(8 + s - 1)));
1418 ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _Y(ps.rTex), _ZZZZ(_CONST(8 + s - 1)), ureg_src(delta));
1419 /* Dv' = D3DTSS_BUMPENVMAT01(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT11(stage s-1)*t(s-1)G */
1420 ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _X(ps.rTex), _YYYY(_CONST(8 + s - 1)));
1421 ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _Y(ps.rTex), _WWWW(_CONST(8 + s - 1)), ureg_src(delta));
1422 texture_coord = ureg_src(ureg_DECL_temporary(ureg));
1423 ureg_MOV(ureg, ureg_writemask(ureg_dst(texture_coord), ureg_dst(ps.vT[s]).WriteMask), ps.vT[s]);
1424 ureg_ADD(ureg, ureg_writemask(ureg_dst(texture_coord), TGSI_WRITEMASK_XY), texture_coord, ureg_src(delta));
1425 /* Prepare luminance multiplier
1426 * t(s)RGBA = t(s)RGBA * clamp[(t(s-1)B * D3DTSS_BUMPENVLSCALE(stage s-1)) + D3DTSS_BUMPENVLOFFSET(stage s-1)] */
1427 if (key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE) {
1428 struct ureg_src bumpenvlscale = ((s-1) & 1) ? _ZZZZ(_CONST(16 + (s-1) / 2)) : _XXXX(_CONST(16 + (s-1) / 2));
1429 struct ureg_src bumpenvloffset = ((s-1) & 1) ? _WWWW(_CONST(16 + (s-1) / 2)) : _YYYY(_CONST(16 + (s-1) / 2));
1430
1431 ureg_MAD(ureg, ureg_saturate(ureg_writemask(delta, TGSI_WRITEMASK_X)), _Z(ps.rTex), bumpenvlscale, bumpenvloffset);
1432 }
1433 }
1434 if (key->projected & (3 << (s *2))) {
1435 unsigned dim = 1 + ((key->projected >> (2 * s)) & 3);
1436 if (dim == 4)
1437 ureg_TXP(ureg, ps.rTex, target, texture_coord, ps.s[s]);
1438 else {
1439 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1440 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(texture_coord, dim-1));
1441 ureg_MUL(ureg, ps.rTmp, _X(tmp), texture_coord);
1442 ureg_TEX(ureg, ps.rTex, target, ps.rTmpSrc, ps.s[s]);
1443 ureg_release_temporary(ureg, tmp);
1444 }
1445 } else {
1446 ureg_TEX(ureg, ps.rTex, target, texture_coord, ps.s[s]);
1447 }
1448 if (s >= 1 && key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)
1449 ureg_MUL(ureg, ps.rTex, ureg_src(ps.rTex), _X(delta));
1450 }
1451
1452 if (key->ts[s].colorop == D3DTOP_BUMPENVMAP ||
1453 key->ts[s].colorop == D3DTOP_BUMPENVMAPLUMINANCE)
1454 continue;
1455
1456 dst = ps_get_ts_dst(&ps, key->ts[s].resultarg ? D3DTA_TEMP : D3DTA_CURRENT);
1457
1458 if (ps.stage.index_pre_mod == ps.stage.index) {
1459 ps.rMod = ureg_DECL_temporary(ureg);
1460 ureg_MUL(ureg, ps.rMod, ps.rCurSrc, ps.rTexSrc);
1461 }
1462
1463 colorarg[0] = (key->ts[s].colorarg0 | ((key->colorarg_b4[0] >> s) << 4) | ((key->colorarg_b5[0] >> s) << 5)) & 0x3f;
1464 colorarg[1] = (key->ts[s].colorarg1 | ((key->colorarg_b4[1] >> s) << 4) | ((key->colorarg_b5[1] >> s) << 5)) & 0x3f;
1465 colorarg[2] = (key->ts[s].colorarg2 | ((key->colorarg_b4[2] >> s) << 4) | ((key->colorarg_b5[2] >> s) << 5)) & 0x3f;
1466 alphaarg[0] = (key->ts[s].alphaarg0 | ((key->alphaarg_b4[0] >> s) << 4)) & 0x1f;
1467 alphaarg[1] = (key->ts[s].alphaarg1 | ((key->alphaarg_b4[1] >> s) << 4)) & 0x1f;
1468 alphaarg[2] = (key->ts[s].alphaarg2 | ((key->alphaarg_b4[2] >> s) << 4)) & 0x1f;
1469
1470 if (key->ts[s].colorop != key->ts[s].alphaop ||
1471 colorarg[0] != alphaarg[0] ||
1472 colorarg[1] != alphaarg[1] ||
1473 colorarg[2] != alphaarg[2])
1474 dst.WriteMask = TGSI_WRITEMASK_XYZ;
1475
1476 /* Special DOTPRODUCT behaviour (see wine tests) */
1477 if (key->ts[s].colorop == D3DTOP_DOTPRODUCT3)
1478 dst.WriteMask = TGSI_WRITEMASK_XYZW;
1479
1480 if (used_c & 0x1) arg[0] = ps_get_ts_arg(&ps, colorarg[0]);
1481 if (used_c & 0x2) arg[1] = ps_get_ts_arg(&ps, colorarg[1]);
1482 if (used_c & 0x4) arg[2] = ps_get_ts_arg(&ps, colorarg[2]);
1483 ps_do_ts_op(&ps, key->ts[s].colorop, dst, arg);
1484
1485 if (dst.WriteMask != TGSI_WRITEMASK_XYZW) {
1486 dst.WriteMask = TGSI_WRITEMASK_W;
1487
1488 if (used_a & 0x1) arg[0] = ps_get_ts_arg(&ps, alphaarg[0]);
1489 if (used_a & 0x2) arg[1] = ps_get_ts_arg(&ps, alphaarg[1]);
1490 if (used_a & 0x4) arg[2] = ps_get_ts_arg(&ps, alphaarg[2]);
1491 ps_do_ts_op(&ps, key->ts[s].alphaop, dst, arg);
1492 }
1493 }
1494
1495 if (key->specular)
1496 ureg_ADD(ureg, ureg_writemask(ps.rCur, TGSI_WRITEMASK_XYZ), ps.rCurSrc, ps.vC[1]);
1497
1498 /* Fog.
1499 */
1500 if (key->fog_mode) {
1501 struct ureg_dst rFog = ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X);
1502 struct ureg_src vPos;
1503 if (device->screen->get_param(device->screen,
1504 PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL)) {
1505 vPos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
1506 } else {
1507 vPos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
1508 TGSI_INTERPOLATE_LINEAR);
1509 }
1510
1511 /* Source is either W or Z.
1512 * When we use vs ff,
1513 * Z is when an orthogonal projection matrix is detected,
1514 * W (WFOG) else.
1515 * Z is used for programmable vs.
1516 * Note: Tests indicate that the projection matrix coefficients do
1517 * actually affect pixel fog (and not vertex fog) when vs ff is used,
1518 * which justifies taking the position's w instead of taking the z coordinate
1519 * before the projection in the vs shader.
1520 */
1521 if (!key->fog_source)
1522 ureg_MOV(ureg, rFog, _ZZZZ(vPos));
1523 else
1524 /* Position's w is 1/w */
1525 ureg_RCP(ureg, rFog, _WWWW(vPos));
1526
1527 if (key->fog_mode == D3DFOG_EXP) {
1528 ureg_MUL(ureg, rFog, _X(rFog), _ZZZZ(_CONST(22)));
1529 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
1530 ureg_EX2(ureg, rFog, _X(rFog));
1531 } else
1532 if (key->fog_mode == D3DFOG_EXP2) {
1533 ureg_MUL(ureg, rFog, _X(rFog), _ZZZZ(_CONST(22)));
1534 ureg_MUL(ureg, rFog, _X(rFog), _X(rFog));
1535 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
1536 ureg_EX2(ureg, rFog, _X(rFog));
1537 } else
1538 if (key->fog_mode == D3DFOG_LINEAR) {
1539 ureg_ADD(ureg, rFog, _XXXX(_CONST(22)), ureg_negate(_X(rFog)));
1540 ureg_MUL(ureg, ureg_saturate(rFog), _X(rFog), _YYYY(_CONST(22)));
1541 }
1542 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _X(rFog), ps.rCurSrc, _CONST(21));
1543 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
1544 } else
1545 if (key->fog) {
1546 struct ureg_src vFog = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0, TGSI_INTERPOLATE_PERSPECTIVE);
1547 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _XXXX(vFog), ps.rCurSrc, _CONST(21));
1548 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
1549 } else {
1550 ureg_MOV(ureg, oCol, ps.rCurSrc);
1551 }
1552
1553 ureg_END(ureg);
1554 nine_ureg_tgsi_dump(ureg, FALSE);
1555 return ureg_create_shader_and_destroy(ureg, device->context.pipe);
1556 }
1557
1558 static struct NineVertexShader9 *
nine_ff_get_vs(struct NineDevice9 * device)1559 nine_ff_get_vs(struct NineDevice9 *device)
1560 {
1561 const struct nine_context *context = &device->context;
1562 struct NineVertexShader9 *vs;
1563 enum pipe_error err;
1564 struct vs_build_ctx bld;
1565 struct nine_ff_vs_key key;
1566 unsigned s, i;
1567 boolean has_indexes = false;
1568 boolean has_weights = false;
1569 char input_texture_coord[8];
1570
1571 assert(sizeof(key) <= sizeof(key.value32));
1572
1573 memset(&key, 0, sizeof(key));
1574 memset(&bld, 0, sizeof(bld));
1575 memset(&input_texture_coord, 0, sizeof(input_texture_coord));
1576
1577 bld.key = &key;
1578
1579 /* FIXME: this shouldn't be NULL, but it is on init */
1580 if (context->vdecl) {
1581 key.color0in_one = 1;
1582 key.color1in_zero = 1;
1583 for (i = 0; i < context->vdecl->nelems; i++) {
1584 uint16_t usage = context->vdecl->usage_map[i];
1585 if (usage == NINE_DECLUSAGE_POSITIONT)
1586 key.position_t = 1;
1587 else if (usage == NINE_DECLUSAGE_i(COLOR, 0))
1588 key.color0in_one = 0;
1589 else if (usage == NINE_DECLUSAGE_i(COLOR, 1))
1590 key.color1in_zero = 0;
1591 else if (usage == NINE_DECLUSAGE_i(BLENDINDICES, 0)) {
1592 has_indexes = true;
1593 key.passthrough |= 1 << usage;
1594 } else if (usage == NINE_DECLUSAGE_i(BLENDWEIGHT, 0)) {
1595 has_weights = true;
1596 key.passthrough |= 1 << usage;
1597 } else if (usage == NINE_DECLUSAGE_i(NORMAL, 0)) {
1598 key.has_normal = 1;
1599 key.passthrough |= 1 << usage;
1600 } else if (usage == NINE_DECLUSAGE_PSIZE)
1601 key.vertexpointsize = 1;
1602 else if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) {
1603 s = usage / NINE_DECLUSAGE_COUNT;
1604 if (s < 8)
1605 input_texture_coord[s] = nine_decltype_get_dim(context->vdecl->decls[i].Type);
1606 else
1607 DBG("FF given texture coordinate >= 8. Ignoring\n");
1608 } else if (usage < NINE_DECLUSAGE_NONE)
1609 key.passthrough |= 1 << usage;
1610 }
1611 }
1612 /* ff vs + ps 3.0: some elements are passed to the ps (wine test).
1613 * We do restrict to indices 0 */
1614 key.passthrough &= ~((1 << NINE_DECLUSAGE_POSITION) | (1 << NINE_DECLUSAGE_PSIZE) |
1615 (1 << NINE_DECLUSAGE_TEXCOORD) | (1 << NINE_DECLUSAGE_POSITIONT) |
1616 (1 << NINE_DECLUSAGE_TESSFACTOR) | (1 << NINE_DECLUSAGE_SAMPLE));
1617 if (!key.position_t)
1618 key.passthrough = 0;
1619 key.pointscale = !!context->rs[D3DRS_POINTSCALEENABLE];
1620
1621 key.lighting = !!context->rs[D3DRS_LIGHTING] && context->ff.num_lights_active;
1622 key.darkness = !!context->rs[D3DRS_LIGHTING] && !context->ff.num_lights_active;
1623 if (key.position_t) {
1624 key.darkness = 0; /* |= key.lighting; */ /* XXX ? */
1625 key.lighting = 0;
1626 }
1627 if ((key.lighting | key.darkness) && context->rs[D3DRS_COLORVERTEX]) {
1628 uint32_t mask = (key.color0in_one ? 0 : 1) | (key.color1in_zero ? 0 : 2);
1629 key.mtl_diffuse = context->rs[D3DRS_DIFFUSEMATERIALSOURCE] & mask;
1630 key.mtl_ambient = context->rs[D3DRS_AMBIENTMATERIALSOURCE] & mask;
1631 key.mtl_specular = context->rs[D3DRS_SPECULARMATERIALSOURCE] & mask;
1632 key.mtl_emissive = context->rs[D3DRS_EMISSIVEMATERIALSOURCE] & mask;
1633 }
1634 key.fog = !!context->rs[D3DRS_FOGENABLE];
1635 key.fog_mode = (!key.position_t && context->rs[D3DRS_FOGENABLE]) ? context->rs[D3DRS_FOGVERTEXMODE] : 0;
1636 if (key.fog_mode)
1637 key.fog_range = context->rs[D3DRS_RANGEFOGENABLE];
1638
1639 key.localviewer = !!context->rs[D3DRS_LOCALVIEWER];
1640 key.normalizenormals = !!context->rs[D3DRS_NORMALIZENORMALS];
1641 key.ucp = !!context->rs[D3DRS_CLIPPLANEENABLE];
1642
1643 if (context->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
1644 key.vertexblend_indexed = !!context->rs[D3DRS_INDEXEDVERTEXBLENDENABLE] && has_indexes;
1645
1646 switch (context->rs[D3DRS_VERTEXBLEND]) {
1647 case D3DVBF_0WEIGHTS: key.vertexblend = key.vertexblend_indexed; break;
1648 case D3DVBF_1WEIGHTS: key.vertexblend = 2; break;
1649 case D3DVBF_2WEIGHTS: key.vertexblend = 3; break;
1650 case D3DVBF_3WEIGHTS: key.vertexblend = 4; break;
1651 case D3DVBF_TWEENING: key.vertextween = 1; break;
1652 default:
1653 assert(!"invalid D3DVBF");
1654 break;
1655 }
1656 if (!has_weights && context->rs[D3DRS_VERTEXBLEND] != D3DVBF_0WEIGHTS)
1657 key.vertexblend = 0; /* TODO: if key.vertexblend_indexed, perhaps it should use 1.0 as weight, or revert to D3DVBF_0WEIGHTS */
1658 }
1659
1660 for (s = 0; s < 8; ++s) {
1661 unsigned gen = (context->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1;
1662 unsigned idx = context->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] & 7;
1663 unsigned dim;
1664
1665 if (key.position_t && gen > NINED3DTSS_TCI_PASSTHRU)
1666 gen = NINED3DTSS_TCI_PASSTHRU;
1667
1668 if (!input_texture_coord[idx] && gen == NINED3DTSS_TCI_PASSTHRU)
1669 gen = NINED3DTSS_TCI_DISABLE;
1670
1671 key.tc_gen |= gen << (s * 3);
1672 key.tc_idx |= idx << (s * 3);
1673 key.tc_dim_input |= ((input_texture_coord[idx]-1) & 0x3) << (s * 2);
1674
1675 dim = context->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7;
1676 if (dim > 4)
1677 dim = input_texture_coord[idx];
1678 if (dim == 1) /* NV behaviour */
1679 dim = 0;
1680 key.tc_dim_output |= dim << (s * 3);
1681 }
1682
1683 vs = util_hash_table_get(device->ff.ht_vs, &key);
1684 if (vs)
1685 return vs;
1686 NineVertexShader9_new(device, &vs, NULL, nine_ff_build_vs(device, &bld));
1687
1688 nine_ff_prune_vs(device);
1689 if (vs) {
1690 unsigned n;
1691
1692 memcpy(&vs->ff_key, &key, sizeof(vs->ff_key));
1693
1694 err = util_hash_table_set(device->ff.ht_vs, &vs->ff_key, vs);
1695 (void)err;
1696 assert(err == PIPE_OK);
1697 device->ff.num_vs++;
1698 NineUnknown_ConvertRefToBind(NineUnknown(vs));
1699
1700 vs->num_inputs = bld.num_inputs;
1701 for (n = 0; n < bld.num_inputs; ++n)
1702 vs->input_map[n].ndecl = bld.input[n];
1703
1704 vs->position_t = key.position_t;
1705 vs->point_size = key.vertexpointsize | key.pointscale;
1706 }
1707 return vs;
1708 }
1709
1710 #define GET_D3DTS(n) nine_state_access_transform(&context->ff, D3DTS_##n, FALSE)
1711 #define IS_D3DTS_DIRTY(s,n) ((s)->ff.changed.transform[(D3DTS_##n) / 32] & (1 << ((D3DTS_##n) % 32)))
1712
1713 static struct NinePixelShader9 *
nine_ff_get_ps(struct NineDevice9 * device)1714 nine_ff_get_ps(struct NineDevice9 *device)
1715 {
1716 struct nine_context *context = &device->context;
1717 D3DMATRIX *projection_matrix = GET_D3DTS(PROJECTION);
1718 struct NinePixelShader9 *ps;
1719 enum pipe_error err;
1720 struct nine_ff_ps_key key;
1721 unsigned s;
1722 uint8_t sampler_mask = 0;
1723
1724 assert(sizeof(key) <= sizeof(key.value32));
1725
1726 memset(&key, 0, sizeof(key));
1727 for (s = 0; s < 8; ++s) {
1728 key.ts[s].colorop = context->ff.tex_stage[s][D3DTSS_COLOROP];
1729 key.ts[s].alphaop = context->ff.tex_stage[s][D3DTSS_ALPHAOP];
1730 const uint8_t used_c = ps_d3dtop_args_mask(key.ts[s].colorop);
1731 const uint8_t used_a = ps_d3dtop_args_mask(key.ts[s].alphaop);
1732 /* MSDN says D3DTOP_DISABLE disables this and all subsequent stages.
1733 * ALPHAOP cannot be enabled if COLOROP is disabled.
1734 * Verified on Windows. */
1735 if (key.ts[s].colorop == D3DTOP_DISABLE) {
1736 key.ts[s].alphaop = D3DTOP_DISABLE; /* DISABLE == 1, avoid degenerate keys */
1737 break;
1738 }
1739
1740 if (!context->texture[s].enabled &&
1741 ((context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE &&
1742 used_c & 0x1) ||
1743 (context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE &&
1744 used_c & 0x2) ||
1745 (context->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE &&
1746 used_c & 0x4))) {
1747 /* Tested on Windows: Invalid texture read disables the stage
1748 * and the subsequent ones, but only for colorop. For alpha,
1749 * it's as if the texture had alpha of 1.0, which is what
1750 * has our dummy texture in that case. Invalid color also
1751 * disabled the following alpha stages. */
1752 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
1753 break;
1754 }
1755
1756 if (context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE ||
1757 context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE ||
1758 context->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE ||
1759 context->ff.tex_stage[s][D3DTSS_ALPHAARG0] == D3DTA_TEXTURE ||
1760 context->ff.tex_stage[s][D3DTSS_ALPHAARG1] == D3DTA_TEXTURE ||
1761 context->ff.tex_stage[s][D3DTSS_ALPHAARG2] == D3DTA_TEXTURE)
1762 sampler_mask |= (1 << s);
1763
1764 if (key.ts[s].colorop != D3DTOP_DISABLE) {
1765 if (used_c & 0x1) key.ts[s].colorarg0 = context->ff.tex_stage[s][D3DTSS_COLORARG0];
1766 if (used_c & 0x2) key.ts[s].colorarg1 = context->ff.tex_stage[s][D3DTSS_COLORARG1];
1767 if (used_c & 0x4) key.ts[s].colorarg2 = context->ff.tex_stage[s][D3DTSS_COLORARG2];
1768 if (used_c & 0x1) key.colorarg_b4[0] |= (context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 4) << s;
1769 if (used_c & 0x1) key.colorarg_b5[0] |= (context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 5) << s;
1770 if (used_c & 0x2) key.colorarg_b4[1] |= (context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 4) << s;
1771 if (used_c & 0x2) key.colorarg_b5[1] |= (context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 5) << s;
1772 if (used_c & 0x4) key.colorarg_b4[2] |= (context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 4) << s;
1773 if (used_c & 0x4) key.colorarg_b5[2] |= (context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 5) << s;
1774 }
1775 if (key.ts[s].alphaop != D3DTOP_DISABLE) {
1776 if (used_a & 0x1) key.ts[s].alphaarg0 = context->ff.tex_stage[s][D3DTSS_ALPHAARG0];
1777 if (used_a & 0x2) key.ts[s].alphaarg1 = context->ff.tex_stage[s][D3DTSS_ALPHAARG1];
1778 if (used_a & 0x4) key.ts[s].alphaarg2 = context->ff.tex_stage[s][D3DTSS_ALPHAARG2];
1779 if (used_a & 0x1) key.alphaarg_b4[0] |= (context->ff.tex_stage[s][D3DTSS_ALPHAARG0] >> 4) << s;
1780 if (used_a & 0x2) key.alphaarg_b4[1] |= (context->ff.tex_stage[s][D3DTSS_ALPHAARG1] >> 4) << s;
1781 if (used_a & 0x4) key.alphaarg_b4[2] |= (context->ff.tex_stage[s][D3DTSS_ALPHAARG2] >> 4) << s;
1782 }
1783 key.ts[s].resultarg = context->ff.tex_stage[s][D3DTSS_RESULTARG] == D3DTA_TEMP;
1784
1785 if (context->texture[s].enabled) {
1786 switch (context->texture[s].type) {
1787 case D3DRTYPE_TEXTURE: key.ts[s].textarget = 1; break;
1788 case D3DRTYPE_VOLUMETEXTURE: key.ts[s].textarget = 2; break;
1789 case D3DRTYPE_CUBETEXTURE: key.ts[s].textarget = 3; break;
1790 default:
1791 assert(!"unexpected texture type");
1792 break;
1793 }
1794 } else {
1795 key.ts[s].textarget = 1;
1796 }
1797 }
1798
1799 /* Note: If colorop is D3DTOP_DISABLE for the first stage
1800 * (which implies alphaop is too), nothing particular happens,
1801 * that is, current is equal to diffuse (which is the case anyway,
1802 * because it is how it is initialized).
1803 * Special case seems if alphaop is D3DTOP_DISABLE and not colorop,
1804 * because then if the resultarg is TEMP, then diffuse alpha is written
1805 * to it. */
1806 if (key.ts[0].colorop != D3DTOP_DISABLE &&
1807 key.ts[0].alphaop == D3DTOP_DISABLE &&
1808 key.ts[0].resultarg != 0) {
1809 key.ts[0].alphaop = D3DTOP_SELECTARG1;
1810 key.ts[0].alphaarg1 = D3DTA_DIFFUSE;
1811 }
1812 /* When no alpha stage writes to current, diffuse alpha is taken.
1813 * Since we initialize current to diffuse, we have the behaviour. */
1814
1815 /* Last stage always writes to Current */
1816 if (s >= 1)
1817 key.ts[s-1].resultarg = 0;
1818
1819 key.projected = nine_ff_get_projected_key(context);
1820 key.specular = !!context->rs[D3DRS_SPECULARENABLE];
1821
1822 for (; s < 8; ++s)
1823 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
1824 if (context->rs[D3DRS_FOGENABLE])
1825 key.fog_mode = context->rs[D3DRS_FOGTABLEMODE];
1826 key.fog = !!context->rs[D3DRS_FOGENABLE];
1827 /* Pixel fog (with WFOG advertised): source is either Z or W.
1828 * W is the source if vs ff is used, and the
1829 * projection matrix is not orthogonal.
1830 * Tests on Win 10 seem to indicate _34
1831 * and _33 are checked against 0, 1. */
1832 if (key.fog_mode && key.fog)
1833 key.fog_source = !context->programmable_vs &&
1834 !(projection_matrix->_34 == 0.0f &&
1835 projection_matrix->_44 == 1.0f);
1836
1837 ps = util_hash_table_get(device->ff.ht_ps, &key);
1838 if (ps)
1839 return ps;
1840 NinePixelShader9_new(device, &ps, NULL, nine_ff_build_ps(device, &key));
1841
1842 nine_ff_prune_ps(device);
1843 if (ps) {
1844 memcpy(&ps->ff_key, &key, sizeof(ps->ff_key));
1845
1846 err = util_hash_table_set(device->ff.ht_ps, &ps->ff_key, ps);
1847 (void)err;
1848 assert(err == PIPE_OK);
1849 device->ff.num_ps++;
1850 NineUnknown_ConvertRefToBind(NineUnknown(ps));
1851
1852 ps->rt_mask = 0x1;
1853 ps->sampler_mask = sampler_mask;
1854 }
1855 return ps;
1856 }
1857
1858 static void
nine_ff_load_vs_transforms(struct NineDevice9 * device)1859 nine_ff_load_vs_transforms(struct NineDevice9 *device)
1860 {
1861 struct nine_context *context = &device->context;
1862 D3DMATRIX T;
1863 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
1864 unsigned i;
1865
1866 /* TODO: make this nicer, and only upload the ones we need */
1867 /* TODO: use ff.vs_const as storage of W, V, P matrices */
1868
1869 if (IS_D3DTS_DIRTY(context, WORLD) ||
1870 IS_D3DTS_DIRTY(context, VIEW) ||
1871 IS_D3DTS_DIRTY(context, PROJECTION)) {
1872 /* WVP, WV matrices */
1873 nine_d3d_matrix_matrix_mul(&M[1], GET_D3DTS(WORLD), GET_D3DTS(VIEW));
1874 nine_d3d_matrix_matrix_mul(&M[0], &M[1], GET_D3DTS(PROJECTION));
1875
1876 /* normal matrix == transpose(inverse(WV)) */
1877 nine_d3d_matrix_inverse(&T, &M[1]);
1878 nine_d3d_matrix_transpose(&M[4], &T);
1879
1880 /* P matrix */
1881 M[2] = *GET_D3DTS(PROJECTION);
1882
1883 /* V and W matrix */
1884 nine_d3d_matrix_inverse(&M[3], GET_D3DTS(VIEW));
1885 M[40] = M[1];
1886 }
1887
1888 if (context->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
1889 /* load other world matrices */
1890 for (i = 1; i <= 8; ++i) {
1891 nine_d3d_matrix_matrix_mul(&M[40 + i], GET_D3DTS(WORLDMATRIX(i)), GET_D3DTS(VIEW));
1892 }
1893 }
1894
1895 device->ff.vs_const[30 * 4] = asfloat(context->rs[D3DRS_TWEENFACTOR]);
1896 }
1897
1898 static void
nine_ff_load_lights(struct NineDevice9 * device)1899 nine_ff_load_lights(struct NineDevice9 *device)
1900 {
1901 struct nine_context *context = &device->context;
1902 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1903 unsigned l;
1904
1905 if (context->changed.group & NINE_STATE_FF_MATERIAL) {
1906 const D3DMATERIAL9 *mtl = &context->ff.material;
1907
1908 memcpy(&dst[20], &mtl->Diffuse, 4 * sizeof(float));
1909 memcpy(&dst[21], &mtl->Ambient, 4 * sizeof(float));
1910 memcpy(&dst[22], &mtl->Specular, 4 * sizeof(float));
1911 dst[23].x = mtl->Power;
1912 memcpy(&dst[24], &mtl->Emissive, 4 * sizeof(float));
1913 d3dcolor_to_rgba(&dst[25].x, context->rs[D3DRS_AMBIENT]);
1914 dst[19].x = dst[25].x * mtl->Ambient.r + mtl->Emissive.r;
1915 dst[19].y = dst[25].y * mtl->Ambient.g + mtl->Emissive.g;
1916 dst[19].z = dst[25].z * mtl->Ambient.b + mtl->Emissive.b;
1917 }
1918
1919 if (!(context->changed.group & NINE_STATE_FF_LIGHTING))
1920 return;
1921
1922 for (l = 0; l < context->ff.num_lights_active; ++l) {
1923 const D3DLIGHT9 *light = &context->ff.light[context->ff.active_light[l]];
1924
1925 dst[32 + l * 8].x = light->Type;
1926 dst[32 + l * 8].y = light->Attenuation0;
1927 dst[32 + l * 8].z = light->Attenuation1;
1928 dst[32 + l * 8].w = light->Attenuation2;
1929 memcpy(&dst[33 + l * 8].x, &light->Diffuse, sizeof(light->Diffuse));
1930 memcpy(&dst[34 + l * 8].x, &light->Specular, sizeof(light->Specular));
1931 memcpy(&dst[35 + l * 8].x, &light->Ambient, sizeof(light->Ambient));
1932 nine_d3d_vector4_matrix_mul((D3DVECTOR *)&dst[36 + l * 8].x, &light->Position, GET_D3DTS(VIEW));
1933 nine_d3d_vector3_matrix_mul((D3DVECTOR *)&dst[37 + l * 8].x, &light->Direction, GET_D3DTS(VIEW));
1934 dst[36 + l * 8].w = light->Type == D3DLIGHT_DIRECTIONAL ? 1e9f : light->Range;
1935 dst[37 + l * 8].w = light->Falloff;
1936 dst[38 + l * 8].x = cosf(light->Theta * 0.5f);
1937 dst[38 + l * 8].y = cosf(light->Phi * 0.5f);
1938 dst[38 + l * 8].z = 1.0f / (dst[38 + l * 8].x - dst[38 + l * 8].y);
1939 dst[39 + l * 8].w = (l + 1) == context->ff.num_lights_active;
1940 }
1941 }
1942
1943 static void
nine_ff_load_point_and_fog_params(struct NineDevice9 * device)1944 nine_ff_load_point_and_fog_params(struct NineDevice9 *device)
1945 {
1946 struct nine_context *context = &device->context;
1947 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1948
1949 if (!(context->changed.group & NINE_STATE_FF_OTHER))
1950 return;
1951 dst[26].x = asfloat(context->rs[D3DRS_POINTSIZE_MIN]);
1952 dst[26].y = asfloat(context->rs[D3DRS_POINTSIZE_MAX]);
1953 dst[26].z = asfloat(context->rs[D3DRS_POINTSIZE]);
1954 dst[26].w = asfloat(context->rs[D3DRS_POINTSCALE_A]);
1955 dst[27].x = asfloat(context->rs[D3DRS_POINTSCALE_B]);
1956 dst[27].y = asfloat(context->rs[D3DRS_POINTSCALE_C]);
1957 dst[28].x = asfloat(context->rs[D3DRS_FOGEND]);
1958 dst[28].y = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART]));
1959 if (isinf(dst[28].y))
1960 dst[28].y = 0.0f;
1961 dst[28].z = asfloat(context->rs[D3DRS_FOGDENSITY]);
1962 }
1963
1964 static void
nine_ff_load_tex_matrices(struct NineDevice9 * device)1965 nine_ff_load_tex_matrices(struct NineDevice9 *device)
1966 {
1967 struct nine_context *context = &device->context;
1968 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
1969 unsigned s;
1970
1971 if (!(context->ff.changed.transform[0] & 0xff0000))
1972 return;
1973 for (s = 0; s < 8; ++s) {
1974 if (IS_D3DTS_DIRTY(context, TEXTURE0 + s))
1975 nine_d3d_matrix_transpose(&M[32 + s], nine_state_access_transform(&context->ff, D3DTS_TEXTURE0 + s, FALSE));
1976 }
1977 }
1978
1979 static void
nine_ff_load_ps_params(struct NineDevice9 * device)1980 nine_ff_load_ps_params(struct NineDevice9 *device)
1981 {
1982 struct nine_context *context = &device->context;
1983 struct fvec4 *dst = (struct fvec4 *)device->ff.ps_const;
1984 unsigned s;
1985
1986 if (!(context->changed.group & (NINE_STATE_FF_PSSTAGES | NINE_STATE_FF_OTHER)))
1987 return;
1988
1989 for (s = 0; s < 8; ++s)
1990 d3dcolor_to_rgba(&dst[s].x, context->ff.tex_stage[s][D3DTSS_CONSTANT]);
1991
1992 for (s = 0; s < 8; ++s) {
1993 dst[8 + s].x = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT00]);
1994 dst[8 + s].y = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT01]);
1995 dst[8 + s].z = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT10]);
1996 dst[8 + s].w = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT11]);
1997 if (s & 1) {
1998 dst[16 + s / 2].z = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
1999 dst[16 + s / 2].w = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
2000 } else {
2001 dst[16 + s / 2].x = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
2002 dst[16 + s / 2].y = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
2003 }
2004 }
2005
2006 d3dcolor_to_rgba(&dst[20].x, context->rs[D3DRS_TEXTUREFACTOR]);
2007 d3dcolor_to_rgba(&dst[21].x, context->rs[D3DRS_FOGCOLOR]);
2008 dst[22].x = asfloat(context->rs[D3DRS_FOGEND]);
2009 dst[22].y = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART]));
2010 dst[22].z = asfloat(context->rs[D3DRS_FOGDENSITY]);
2011 }
2012
2013 static void
nine_ff_load_viewport_info(struct NineDevice9 * device)2014 nine_ff_load_viewport_info(struct NineDevice9 *device)
2015 {
2016 D3DVIEWPORT9 *viewport = &device->context.viewport;
2017 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
2018 float diffZ = viewport->MaxZ - viewport->MinZ;
2019
2020 /* Note: the other functions avoids to fill the const again if nothing changed.
2021 * But we don't have much to fill, and adding code to allow that may be complex
2022 * so just fill it always */
2023 dst[100].x = 2.0f / (float)(viewport->Width);
2024 dst[100].y = 2.0f / (float)(viewport->Height);
2025 dst[100].z = (diffZ == 0.0f) ? 0.0f : (1.0f / diffZ);
2026 dst[100].w = (float)(viewport->Width);
2027 dst[101].x = (float)(viewport->X);
2028 dst[101].y = (float)(viewport->Y);
2029 dst[101].z = (float)(viewport->MinZ);
2030 }
2031
2032 void
nine_ff_update(struct NineDevice9 * device)2033 nine_ff_update(struct NineDevice9 *device)
2034 {
2035 struct nine_context *context = &device->context;
2036 struct pipe_constant_buffer cb;
2037
2038 DBG("vs=%p ps=%p\n", context->vs, context->ps);
2039
2040 /* NOTE: the only reference belongs to the hash table */
2041 if (!context->programmable_vs) {
2042 device->ff.vs = nine_ff_get_vs(device);
2043 context->changed.group |= NINE_STATE_VS;
2044 }
2045 if (!context->ps) {
2046 device->ff.ps = nine_ff_get_ps(device);
2047 context->changed.group |= NINE_STATE_PS;
2048 }
2049
2050 if (!context->programmable_vs) {
2051 nine_ff_load_vs_transforms(device);
2052 nine_ff_load_tex_matrices(device);
2053 nine_ff_load_lights(device);
2054 nine_ff_load_point_and_fog_params(device);
2055 nine_ff_load_viewport_info(device);
2056
2057 memset(context->ff.changed.transform, 0, sizeof(context->ff.changed.transform));
2058
2059 cb.buffer_offset = 0;
2060 cb.buffer = NULL;
2061 cb.user_buffer = device->ff.vs_const;
2062 cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float);
2063
2064 if (!device->driver_caps.user_cbufs) {
2065 context->pipe_data.cb_vs_ff.buffer_size = cb.buffer_size;
2066 u_upload_data(device->constbuf_uploader,
2067 0,
2068 cb.buffer_size,
2069 device->constbuf_alignment,
2070 cb.user_buffer,
2071 &context->pipe_data.cb_vs_ff.buffer_offset,
2072 &context->pipe_data.cb_vs_ff.buffer);
2073 u_upload_unmap(device->constbuf_uploader);
2074 context->pipe_data.cb_vs_ff.user_buffer = NULL;
2075 } else
2076 context->pipe_data.cb_vs_ff = cb;
2077 context->commit |= NINE_STATE_COMMIT_CONST_VS;
2078 }
2079
2080 if (!context->ps) {
2081 nine_ff_load_ps_params(device);
2082
2083 cb.buffer_offset = 0;
2084 cb.buffer = NULL;
2085 cb.user_buffer = device->ff.ps_const;
2086 cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float);
2087
2088 if (!device->driver_caps.user_cbufs) {
2089 context->pipe_data.cb_ps_ff.buffer_size = cb.buffer_size;
2090 u_upload_data(device->constbuf_uploader,
2091 0,
2092 cb.buffer_size,
2093 device->constbuf_alignment,
2094 cb.user_buffer,
2095 &context->pipe_data.cb_ps_ff.buffer_offset,
2096 &context->pipe_data.cb_ps_ff.buffer);
2097 u_upload_unmap(device->constbuf_uploader);
2098 context->pipe_data.cb_ps_ff.user_buffer = NULL;
2099 } else
2100 context->pipe_data.cb_ps_ff = cb;
2101 context->commit |= NINE_STATE_COMMIT_CONST_PS;
2102 }
2103
2104 context->changed.group &= ~NINE_STATE_FF;
2105 }
2106
2107
2108 boolean
nine_ff_init(struct NineDevice9 * device)2109 nine_ff_init(struct NineDevice9 *device)
2110 {
2111 device->ff.ht_vs = util_hash_table_create(nine_ff_vs_key_hash,
2112 nine_ff_vs_key_comp);
2113 device->ff.ht_ps = util_hash_table_create(nine_ff_ps_key_hash,
2114 nine_ff_ps_key_comp);
2115
2116 device->ff.ht_fvf = util_hash_table_create(nine_ff_fvf_key_hash,
2117 nine_ff_fvf_key_comp);
2118
2119 device->ff.vs_const = CALLOC(NINE_FF_NUM_VS_CONST, 4 * sizeof(float));
2120 device->ff.ps_const = CALLOC(NINE_FF_NUM_PS_CONST, 4 * sizeof(float));
2121
2122 return device->ff.ht_vs && device->ff.ht_ps &&
2123 device->ff.ht_fvf &&
2124 device->ff.vs_const && device->ff.ps_const;
2125 }
2126
nine_ff_ht_delete_cb(void * key,void * value,void * data)2127 static enum pipe_error nine_ff_ht_delete_cb(void *key, void *value, void *data)
2128 {
2129 NineUnknown_Unbind(NineUnknown(value));
2130 return PIPE_OK;
2131 }
2132
2133 void
nine_ff_fini(struct NineDevice9 * device)2134 nine_ff_fini(struct NineDevice9 *device)
2135 {
2136 if (device->ff.ht_vs) {
2137 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
2138 util_hash_table_destroy(device->ff.ht_vs);
2139 }
2140 if (device->ff.ht_ps) {
2141 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
2142 util_hash_table_destroy(device->ff.ht_ps);
2143 }
2144 if (device->ff.ht_fvf) {
2145 util_hash_table_foreach(device->ff.ht_fvf, nine_ff_ht_delete_cb, NULL);
2146 util_hash_table_destroy(device->ff.ht_fvf);
2147 }
2148 device->ff.vs = NULL; /* destroyed by unbinding from hash table */
2149 device->ff.ps = NULL;
2150
2151 FREE(device->ff.vs_const);
2152 FREE(device->ff.ps_const);
2153 }
2154
2155 static void
nine_ff_prune_vs(struct NineDevice9 * device)2156 nine_ff_prune_vs(struct NineDevice9 *device)
2157 {
2158 struct nine_context *context = &device->context;
2159
2160 if (device->ff.num_vs > 100) {
2161 /* could destroy the bound one here, so unbind */
2162 context->pipe->bind_vs_state(context->pipe, NULL);
2163 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
2164 util_hash_table_clear(device->ff.ht_vs);
2165 device->ff.num_vs = 0;
2166 context->changed.group |= NINE_STATE_VS;
2167 }
2168 }
2169 static void
nine_ff_prune_ps(struct NineDevice9 * device)2170 nine_ff_prune_ps(struct NineDevice9 *device)
2171 {
2172 struct nine_context *context = &device->context;
2173
2174 if (device->ff.num_ps > 100) {
2175 /* could destroy the bound one here, so unbind */
2176 context->pipe->bind_fs_state(context->pipe, NULL);
2177 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
2178 util_hash_table_clear(device->ff.ht_ps);
2179 device->ff.num_ps = 0;
2180 context->changed.group |= NINE_STATE_PS;
2181 }
2182 }
2183
2184 /* ========================================================================== */
2185
2186 /* Matrix multiplication:
2187 *
2188 * in memory: 0 1 2 3 (row major)
2189 * 4 5 6 7
2190 * 8 9 a b
2191 * c d e f
2192 *
2193 * cA cB cC cD
2194 * r0 = (r0 * cA) (r0 * cB) . .
2195 * r1 = (r1 * cA) (r1 * cB)
2196 * r2 = (r2 * cA) .
2197 * r3 = (r3 * cA) .
2198 *
2199 * r: (11) (12) (13) (14)
2200 * (21) (22) (23) (24)
2201 * (31) (32) (33) (34)
2202 * (41) (42) (43) (44)
2203 * l: (11 12 13 14)
2204 * (21 22 23 24)
2205 * (31 32 33 34)
2206 * (41 42 43 44)
2207 *
2208 * v: (x y z 1 )
2209 *
2210 * t.xyzw = MUL(v.xxxx, r[0]);
2211 * t.xyzw = MAD(v.yyyy, r[1], t.xyzw);
2212 * t.xyzw = MAD(v.zzzz, r[2], t.xyzw);
2213 * v.xyzw = MAD(v.wwww, r[3], t.xyzw);
2214 *
2215 * v.x = DP4(v, c[0]);
2216 * v.y = DP4(v, c[1]);
2217 * v.z = DP4(v, c[2]);
2218 * v.w = DP4(v, c[3]) = 1
2219 */
2220
2221 /*
2222 static void
2223 nine_D3DMATRIX_print(const D3DMATRIX *M)
2224 {
2225 DBG("\n(%f %f %f %f)\n"
2226 "(%f %f %f %f)\n"
2227 "(%f %f %f %f)\n"
2228 "(%f %f %f %f)\n",
2229 M->m[0][0], M->m[0][1], M->m[0][2], M->m[0][3],
2230 M->m[1][0], M->m[1][1], M->m[1][2], M->m[1][3],
2231 M->m[2][0], M->m[2][1], M->m[2][2], M->m[2][3],
2232 M->m[3][0], M->m[3][1], M->m[3][2], M->m[3][3]);
2233 }
2234 */
2235
2236 static inline float
nine_DP4_row_col(const D3DMATRIX * A,int r,const D3DMATRIX * B,int c)2237 nine_DP4_row_col(const D3DMATRIX *A, int r, const D3DMATRIX *B, int c)
2238 {
2239 return A->m[r][0] * B->m[0][c] +
2240 A->m[r][1] * B->m[1][c] +
2241 A->m[r][2] * B->m[2][c] +
2242 A->m[r][3] * B->m[3][c];
2243 }
2244
2245 static inline float
nine_DP4_vec_col(const D3DVECTOR * v,const D3DMATRIX * M,int c)2246 nine_DP4_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
2247 {
2248 return v->x * M->m[0][c] +
2249 v->y * M->m[1][c] +
2250 v->z * M->m[2][c] +
2251 1.0f * M->m[3][c];
2252 }
2253
2254 static inline float
nine_DP3_vec_col(const D3DVECTOR * v,const D3DMATRIX * M,int c)2255 nine_DP3_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
2256 {
2257 return v->x * M->m[0][c] +
2258 v->y * M->m[1][c] +
2259 v->z * M->m[2][c];
2260 }
2261
2262 void
nine_d3d_matrix_matrix_mul(D3DMATRIX * D,const D3DMATRIX * L,const D3DMATRIX * R)2263 nine_d3d_matrix_matrix_mul(D3DMATRIX *D, const D3DMATRIX *L, const D3DMATRIX *R)
2264 {
2265 D->_11 = nine_DP4_row_col(L, 0, R, 0);
2266 D->_12 = nine_DP4_row_col(L, 0, R, 1);
2267 D->_13 = nine_DP4_row_col(L, 0, R, 2);
2268 D->_14 = nine_DP4_row_col(L, 0, R, 3);
2269
2270 D->_21 = nine_DP4_row_col(L, 1, R, 0);
2271 D->_22 = nine_DP4_row_col(L, 1, R, 1);
2272 D->_23 = nine_DP4_row_col(L, 1, R, 2);
2273 D->_24 = nine_DP4_row_col(L, 1, R, 3);
2274
2275 D->_31 = nine_DP4_row_col(L, 2, R, 0);
2276 D->_32 = nine_DP4_row_col(L, 2, R, 1);
2277 D->_33 = nine_DP4_row_col(L, 2, R, 2);
2278 D->_34 = nine_DP4_row_col(L, 2, R, 3);
2279
2280 D->_41 = nine_DP4_row_col(L, 3, R, 0);
2281 D->_42 = nine_DP4_row_col(L, 3, R, 1);
2282 D->_43 = nine_DP4_row_col(L, 3, R, 2);
2283 D->_44 = nine_DP4_row_col(L, 3, R, 3);
2284 }
2285
2286 void
nine_d3d_vector4_matrix_mul(D3DVECTOR * d,const D3DVECTOR * v,const D3DMATRIX * M)2287 nine_d3d_vector4_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)
2288 {
2289 d->x = nine_DP4_vec_col(v, M, 0);
2290 d->y = nine_DP4_vec_col(v, M, 1);
2291 d->z = nine_DP4_vec_col(v, M, 2);
2292 }
2293
2294 void
nine_d3d_vector3_matrix_mul(D3DVECTOR * d,const D3DVECTOR * v,const D3DMATRIX * M)2295 nine_d3d_vector3_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)
2296 {
2297 d->x = nine_DP3_vec_col(v, M, 0);
2298 d->y = nine_DP3_vec_col(v, M, 1);
2299 d->z = nine_DP3_vec_col(v, M, 2);
2300 }
2301
2302 void
nine_d3d_matrix_transpose(D3DMATRIX * D,const D3DMATRIX * M)2303 nine_d3d_matrix_transpose(D3DMATRIX *D, const D3DMATRIX *M)
2304 {
2305 unsigned i, j;
2306 for (i = 0; i < 4; ++i)
2307 for (j = 0; j < 4; ++j)
2308 D->m[i][j] = M->m[j][i];
2309 }
2310
2311 #define _M_ADD_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2312 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2313 if (t > 0.0f) pos += t; else neg += t; } while(0)
2314
2315 #define _M_SUB_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2316 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2317 if (t > 0.0f) neg -= t; else pos -= t; } while(0)
2318 float
nine_d3d_matrix_det(const D3DMATRIX * M)2319 nine_d3d_matrix_det(const D3DMATRIX *M)
2320 {
2321 float pos = 0.0f;
2322 float neg = 0.0f;
2323
2324 _M_ADD_PROD_1i_2j_3k_4l(1, 2, 3, 4);
2325 _M_ADD_PROD_1i_2j_3k_4l(1, 3, 4, 2);
2326 _M_ADD_PROD_1i_2j_3k_4l(1, 4, 2, 3);
2327
2328 _M_ADD_PROD_1i_2j_3k_4l(2, 1, 4, 3);
2329 _M_ADD_PROD_1i_2j_3k_4l(2, 3, 1, 4);
2330 _M_ADD_PROD_1i_2j_3k_4l(2, 4, 3, 1);
2331
2332 _M_ADD_PROD_1i_2j_3k_4l(3, 1, 2, 4);
2333 _M_ADD_PROD_1i_2j_3k_4l(3, 2, 4, 1);
2334 _M_ADD_PROD_1i_2j_3k_4l(3, 4, 1, 2);
2335
2336 _M_ADD_PROD_1i_2j_3k_4l(4, 1, 3, 2);
2337 _M_ADD_PROD_1i_2j_3k_4l(4, 2, 1, 3);
2338 _M_ADD_PROD_1i_2j_3k_4l(4, 3, 2, 1);
2339
2340 _M_SUB_PROD_1i_2j_3k_4l(1, 2, 4, 3);
2341 _M_SUB_PROD_1i_2j_3k_4l(1, 3, 2, 4);
2342 _M_SUB_PROD_1i_2j_3k_4l(1, 4, 3, 2);
2343
2344 _M_SUB_PROD_1i_2j_3k_4l(2, 1, 3, 4);
2345 _M_SUB_PROD_1i_2j_3k_4l(2, 3, 4, 1);
2346 _M_SUB_PROD_1i_2j_3k_4l(2, 4, 1, 3);
2347
2348 _M_SUB_PROD_1i_2j_3k_4l(3, 1, 4, 2);
2349 _M_SUB_PROD_1i_2j_3k_4l(3, 2, 1, 4);
2350 _M_SUB_PROD_1i_2j_3k_4l(3, 4, 2, 1);
2351
2352 _M_SUB_PROD_1i_2j_3k_4l(4, 1, 2, 3);
2353 _M_SUB_PROD_1i_2j_3k_4l(4, 2, 3, 1);
2354 _M_SUB_PROD_1i_2j_3k_4l(4, 3, 1, 2);
2355
2356 return pos + neg;
2357 }
2358
2359 /* XXX: Probably better to just use src/mesa/math/m_matrix.c because
2360 * I have no idea where this code came from.
2361 */
2362 void
nine_d3d_matrix_inverse(D3DMATRIX * D,const D3DMATRIX * M)2363 nine_d3d_matrix_inverse(D3DMATRIX *D, const D3DMATRIX *M)
2364 {
2365 int i, k;
2366 float det;
2367
2368 D->m[0][0] =
2369 M->m[1][1] * M->m[2][2] * M->m[3][3] -
2370 M->m[1][1] * M->m[3][2] * M->m[2][3] -
2371 M->m[1][2] * M->m[2][1] * M->m[3][3] +
2372 M->m[1][2] * M->m[3][1] * M->m[2][3] +
2373 M->m[1][3] * M->m[2][1] * M->m[3][2] -
2374 M->m[1][3] * M->m[3][1] * M->m[2][2];
2375
2376 D->m[0][1] =
2377 -M->m[0][1] * M->m[2][2] * M->m[3][3] +
2378 M->m[0][1] * M->m[3][2] * M->m[2][3] +
2379 M->m[0][2] * M->m[2][1] * M->m[3][3] -
2380 M->m[0][2] * M->m[3][1] * M->m[2][3] -
2381 M->m[0][3] * M->m[2][1] * M->m[3][2] +
2382 M->m[0][3] * M->m[3][1] * M->m[2][2];
2383
2384 D->m[0][2] =
2385 M->m[0][1] * M->m[1][2] * M->m[3][3] -
2386 M->m[0][1] * M->m[3][2] * M->m[1][3] -
2387 M->m[0][2] * M->m[1][1] * M->m[3][3] +
2388 M->m[0][2] * M->m[3][1] * M->m[1][3] +
2389 M->m[0][3] * M->m[1][1] * M->m[3][2] -
2390 M->m[0][3] * M->m[3][1] * M->m[1][2];
2391
2392 D->m[0][3] =
2393 -M->m[0][1] * M->m[1][2] * M->m[2][3] +
2394 M->m[0][1] * M->m[2][2] * M->m[1][3] +
2395 M->m[0][2] * M->m[1][1] * M->m[2][3] -
2396 M->m[0][2] * M->m[2][1] * M->m[1][3] -
2397 M->m[0][3] * M->m[1][1] * M->m[2][2] +
2398 M->m[0][3] * M->m[2][1] * M->m[1][2];
2399
2400 D->m[1][0] =
2401 -M->m[1][0] * M->m[2][2] * M->m[3][3] +
2402 M->m[1][0] * M->m[3][2] * M->m[2][3] +
2403 M->m[1][2] * M->m[2][0] * M->m[3][3] -
2404 M->m[1][2] * M->m[3][0] * M->m[2][3] -
2405 M->m[1][3] * M->m[2][0] * M->m[3][2] +
2406 M->m[1][3] * M->m[3][0] * M->m[2][2];
2407
2408 D->m[1][1] =
2409 M->m[0][0] * M->m[2][2] * M->m[3][3] -
2410 M->m[0][0] * M->m[3][2] * M->m[2][3] -
2411 M->m[0][2] * M->m[2][0] * M->m[3][3] +
2412 M->m[0][2] * M->m[3][0] * M->m[2][3] +
2413 M->m[0][3] * M->m[2][0] * M->m[3][2] -
2414 M->m[0][3] * M->m[3][0] * M->m[2][2];
2415
2416 D->m[1][2] =
2417 -M->m[0][0] * M->m[1][2] * M->m[3][3] +
2418 M->m[0][0] * M->m[3][2] * M->m[1][3] +
2419 M->m[0][2] * M->m[1][0] * M->m[3][3] -
2420 M->m[0][2] * M->m[3][0] * M->m[1][3] -
2421 M->m[0][3] * M->m[1][0] * M->m[3][2] +
2422 M->m[0][3] * M->m[3][0] * M->m[1][2];
2423
2424 D->m[1][3] =
2425 M->m[0][0] * M->m[1][2] * M->m[2][3] -
2426 M->m[0][0] * M->m[2][2] * M->m[1][3] -
2427 M->m[0][2] * M->m[1][0] * M->m[2][3] +
2428 M->m[0][2] * M->m[2][0] * M->m[1][3] +
2429 M->m[0][3] * M->m[1][0] * M->m[2][2] -
2430 M->m[0][3] * M->m[2][0] * M->m[1][2];
2431
2432 D->m[2][0] =
2433 M->m[1][0] * M->m[2][1] * M->m[3][3] -
2434 M->m[1][0] * M->m[3][1] * M->m[2][3] -
2435 M->m[1][1] * M->m[2][0] * M->m[3][3] +
2436 M->m[1][1] * M->m[3][0] * M->m[2][3] +
2437 M->m[1][3] * M->m[2][0] * M->m[3][1] -
2438 M->m[1][3] * M->m[3][0] * M->m[2][1];
2439
2440 D->m[2][1] =
2441 -M->m[0][0] * M->m[2][1] * M->m[3][3] +
2442 M->m[0][0] * M->m[3][1] * M->m[2][3] +
2443 M->m[0][1] * M->m[2][0] * M->m[3][3] -
2444 M->m[0][1] * M->m[3][0] * M->m[2][3] -
2445 M->m[0][3] * M->m[2][0] * M->m[3][1] +
2446 M->m[0][3] * M->m[3][0] * M->m[2][1];
2447
2448 D->m[2][2] =
2449 M->m[0][0] * M->m[1][1] * M->m[3][3] -
2450 M->m[0][0] * M->m[3][1] * M->m[1][3] -
2451 M->m[0][1] * M->m[1][0] * M->m[3][3] +
2452 M->m[0][1] * M->m[3][0] * M->m[1][3] +
2453 M->m[0][3] * M->m[1][0] * M->m[3][1] -
2454 M->m[0][3] * M->m[3][0] * M->m[1][1];
2455
2456 D->m[2][3] =
2457 -M->m[0][0] * M->m[1][1] * M->m[2][3] +
2458 M->m[0][0] * M->m[2][1] * M->m[1][3] +
2459 M->m[0][1] * M->m[1][0] * M->m[2][3] -
2460 M->m[0][1] * M->m[2][0] * M->m[1][3] -
2461 M->m[0][3] * M->m[1][0] * M->m[2][1] +
2462 M->m[0][3] * M->m[2][0] * M->m[1][1];
2463
2464 D->m[3][0] =
2465 -M->m[1][0] * M->m[2][1] * M->m[3][2] +
2466 M->m[1][0] * M->m[3][1] * M->m[2][2] +
2467 M->m[1][1] * M->m[2][0] * M->m[3][2] -
2468 M->m[1][1] * M->m[3][0] * M->m[2][2] -
2469 M->m[1][2] * M->m[2][0] * M->m[3][1] +
2470 M->m[1][2] * M->m[3][0] * M->m[2][1];
2471
2472 D->m[3][1] =
2473 M->m[0][0] * M->m[2][1] * M->m[3][2] -
2474 M->m[0][0] * M->m[3][1] * M->m[2][2] -
2475 M->m[0][1] * M->m[2][0] * M->m[3][2] +
2476 M->m[0][1] * M->m[3][0] * M->m[2][2] +
2477 M->m[0][2] * M->m[2][0] * M->m[3][1] -
2478 M->m[0][2] * M->m[3][0] * M->m[2][1];
2479
2480 D->m[3][2] =
2481 -M->m[0][0] * M->m[1][1] * M->m[3][2] +
2482 M->m[0][0] * M->m[3][1] * M->m[1][2] +
2483 M->m[0][1] * M->m[1][0] * M->m[3][2] -
2484 M->m[0][1] * M->m[3][0] * M->m[1][2] -
2485 M->m[0][2] * M->m[1][0] * M->m[3][1] +
2486 M->m[0][2] * M->m[3][0] * M->m[1][1];
2487
2488 D->m[3][3] =
2489 M->m[0][0] * M->m[1][1] * M->m[2][2] -
2490 M->m[0][0] * M->m[2][1] * M->m[1][2] -
2491 M->m[0][1] * M->m[1][0] * M->m[2][2] +
2492 M->m[0][1] * M->m[2][0] * M->m[1][2] +
2493 M->m[0][2] * M->m[1][0] * M->m[2][1] -
2494 M->m[0][2] * M->m[2][0] * M->m[1][1];
2495
2496 det =
2497 M->m[0][0] * D->m[0][0] +
2498 M->m[1][0] * D->m[0][1] +
2499 M->m[2][0] * D->m[0][2] +
2500 M->m[3][0] * D->m[0][3];
2501
2502 if (det < 1e-30) {/* non inversible */
2503 *D = *M; /* wine tests */
2504 return;
2505 }
2506
2507 det = 1.0 / det;
2508
2509 for (i = 0; i < 4; i++)
2510 for (k = 0; k < 4; k++)
2511 D->m[i][k] *= det;
2512
2513 #ifdef DEBUG
2514 {
2515 D3DMATRIX I;
2516
2517 nine_d3d_matrix_matrix_mul(&I, D, M);
2518
2519 for (i = 0; i < 4; ++i)
2520 for (k = 0; k < 4; ++k)
2521 if (fabsf(I.m[i][k] - (float)(i == k)) > 1e-3)
2522 DBG("Matrix inversion check FAILED !\n");
2523 }
2524 #endif
2525 }
2526