1 /*
2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com>
3 * Copyright Axel Davy <davyaxel0@gmail.com>
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "device9.h"
8 #include "basetexture9.h"
9 #include "vertexdeclaration9.h"
10 #include "vertexshader9.h"
11 #include "pixelshader9.h"
12 #include "nine_ff.h"
13 #include "nine_defines.h"
14 #include "nine_helpers.h"
15 #include "nine_pipe.h"
16 #include "nine_dump.h"
17
18 #include "pipe/p_context.h"
19 #include "tgsi/tgsi_ureg.h"
20 #include "tgsi/tgsi_dump.h"
21 #include "util/bitscan.h"
22 #include "util/box.h"
23 #include "util/u_hash_table.h"
24 #include "util/u_upload_mgr.h"
25
26 #define DBG_CHANNEL DBG_FF
27
28 #define NINE_FF_NUM_VS_CONST 204
29 #define NINE_FF_NUM_PS_CONST 24
30
31 struct fvec4
32 {
33 float x, y, z, w;
34 };
35
36 struct nine_ff_vs_key
37 {
38 union {
39 struct {
40 uint32_t position_t : 1;
41 uint32_t lighting : 1;
42 uint32_t darkness : 1; /* lighting enabled but no active lights */
43 uint32_t localviewer : 1;
44 uint32_t vertexpointsize : 1;
45 uint32_t pointscale : 1;
46 uint32_t vertexblend : 3;
47 uint32_t vertexblend_indexed : 1;
48 uint32_t vertextween : 1;
49 uint32_t mtl_diffuse : 2; /* 0 = material, 1 = color1, 2 = color2 */
50 uint32_t mtl_ambient : 2;
51 uint32_t mtl_specular : 2;
52 uint32_t mtl_emissive : 2;
53 uint32_t fog_mode : 2;
54 uint32_t fog_range : 1;
55 uint32_t color0in_one : 1;
56 uint32_t color1in_zero : 1;
57 uint32_t has_normal : 1;
58 uint32_t fog : 1;
59 uint32_t normalizenormals : 1;
60 uint32_t ucp : 1;
61 uint32_t pad1 : 4;
62 uint32_t tc_dim_input: 16; /* 8 * 2 bits */
63 uint32_t pad2 : 16;
64 uint32_t tc_dim_output: 24; /* 8 * 3 bits */
65 uint32_t pad3 : 8;
66 uint32_t tc_gen : 24; /* 8 * 3 bits */
67 uint32_t pad4 : 8;
68 uint32_t tc_idx : 24;
69 uint32_t clipplane_emulate : 8;
70 uint32_t passthrough;
71 };
72 uint64_t value64[3]; /* don't forget to resize VertexShader9.ff_key */
73 uint32_t value32[6];
74 };
75 };
76
77 /* Texture stage state:
78 *
79 * COLOROP D3DTOP 5 bit
80 * ALPHAOP D3DTOP 5 bit
81 * COLORARG0 D3DTA 3 bit
82 * COLORARG1 D3DTA 3 bit
83 * COLORARG2 D3DTA 3 bit
84 * ALPHAARG0 D3DTA 3 bit
85 * ALPHAARG1 D3DTA 3 bit
86 * ALPHAARG2 D3DTA 3 bit
87 * RESULTARG D3DTA 1 bit (CURRENT:0 or TEMP:1)
88 * TEXCOORDINDEX 0 - 7 3 bit
89 * ===========================
90 * 32 bit per stage
91 */
92 struct nine_ff_ps_key
93 {
94 union {
95 struct {
96 struct {
97 uint32_t colorop : 5;
98 uint32_t alphaop : 5;
99 uint32_t colorarg0 : 3;
100 uint32_t colorarg1 : 3;
101 uint32_t colorarg2 : 3;
102 uint32_t alphaarg0 : 3;
103 uint32_t alphaarg1 : 3;
104 uint32_t alphaarg2 : 3;
105 uint32_t resultarg : 1; /* CURRENT:0 or TEMP:1 */
106 uint32_t textarget : 2; /* 1D/2D/3D/CUBE */
107 uint32_t pad : 1;
108 /* that's 32 bit exactly */
109 } ts[8];
110 uint32_t projected : 16;
111 uint32_t fog : 1; /* for vFog coming from VS */
112 uint32_t fog_mode : 2;
113 uint32_t fog_source : 1; /* 0: Z, 1: W */
114 uint32_t specular : 1;
115 uint32_t alpha_test_emulation : 3;
116 uint32_t flatshade : 1;
117 uint32_t pad1 : 7; /* 9 32-bit words with this */
118 uint8_t colorarg_b4[3];
119 uint8_t colorarg_b5[3];
120 uint8_t alphaarg_b4[3]; /* 11 32-bit words plus a byte */
121 uint8_t pad2[3];
122 };
123 uint64_t value64[6]; /* don't forget to resize PixelShader9.ff_key */
124 uint32_t value32[12];
125 };
126 };
127
nine_ff_vs_key_hash(const void * key)128 static uint32_t nine_ff_vs_key_hash(const void *key)
129 {
130 const struct nine_ff_vs_key *vs = key;
131 unsigned i;
132 uint32_t hash = vs->value32[0];
133 for (i = 1; i < ARRAY_SIZE(vs->value32); ++i)
134 hash ^= vs->value32[i];
135 return hash;
136 }
nine_ff_vs_key_comp(const void * key1,const void * key2)137 static bool nine_ff_vs_key_comp(const void *key1, const void *key2)
138 {
139 struct nine_ff_vs_key *a = (struct nine_ff_vs_key *)key1;
140 struct nine_ff_vs_key *b = (struct nine_ff_vs_key *)key2;
141
142 return memcmp(a->value64, b->value64, sizeof(a->value64)) == 0;
143 }
nine_ff_ps_key_hash(const void * key)144 static uint32_t nine_ff_ps_key_hash(const void *key)
145 {
146 const struct nine_ff_ps_key *ps = key;
147 unsigned i;
148 uint32_t hash = ps->value32[0];
149 for (i = 1; i < ARRAY_SIZE(ps->value32); ++i)
150 hash ^= ps->value32[i];
151 return hash;
152 }
nine_ff_ps_key_comp(const void * key1,const void * key2)153 static bool nine_ff_ps_key_comp(const void *key1, const void *key2)
154 {
155 struct nine_ff_ps_key *a = (struct nine_ff_ps_key *)key1;
156 struct nine_ff_ps_key *b = (struct nine_ff_ps_key *)key2;
157
158 return memcmp(a->value64, b->value64, sizeof(a->value64)) == 0;
159 }
nine_ff_fvf_key_hash(const void * key)160 static uint32_t nine_ff_fvf_key_hash(const void *key)
161 {
162 return *(DWORD *)key;
163 }
nine_ff_fvf_key_comp(const void * key1,const void * key2)164 static bool nine_ff_fvf_key_comp(const void *key1, const void *key2)
165 {
166 return *(DWORD *)key1 == *(DWORD *)key2;
167 }
168
169 static void nine_ff_prune_vs(struct NineDevice9 *);
170 static void nine_ff_prune_ps(struct NineDevice9 *);
171
nine_ureg_tgsi_dump(struct ureg_program * ureg,bool override)172 static void nine_ureg_tgsi_dump(struct ureg_program *ureg, bool override)
173 {
174 if (debug_get_bool_option("NINE_FF_DUMP", false) || override) {
175 const struct tgsi_token *toks = ureg_get_tokens(ureg, NULL);
176 tgsi_dump(toks, 0);
177 ureg_free_tokens(toks);
178 }
179 }
180
181 #define _X(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_X)
182 #define _Y(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Y)
183 #define _Z(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Z)
184 #define _W(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_W)
185
186 #define _XXXX(r) ureg_scalar(r, TGSI_SWIZZLE_X)
187 #define _YYYY(r) ureg_scalar(r, TGSI_SWIZZLE_Y)
188 #define _ZZZZ(r) ureg_scalar(r, TGSI_SWIZZLE_Z)
189 #define _WWWW(r) ureg_scalar(r, TGSI_SWIZZLE_W)
190
191 #define _XYZW(r) (r)
192
193 /* AL should contain base address of lights table. */
194 #define LIGHT_CONST(i) \
195 ureg_src_indirect(ureg_DECL_constant(ureg, i), _X(AL))
196
197 #define MATERIAL_CONST(i) \
198 ureg_DECL_constant(ureg, 19 + (i))
199
200 #define _CONST(n) ureg_DECL_constant(ureg, n)
201
202 /* VS FF constants layout:
203 *
204 * CONST[ 0.. 3] D3DTS_WORLD * D3DTS_VIEW * D3DTS_PROJECTION
205 * CONST[ 4.. 7] D3DTS_WORLD * D3DTS_VIEW
206 * CONST[ 8..11] D3DTS_PROJECTION
207 * CONST[12..15] D3DTS_VIEW^(-1)
208 * CONST[16..18] Normal matrix
209 *
210 * CONST[19].xyz MATERIAL.Emissive + Material.Ambient * RS.Ambient
211 * CONST[20] MATERIAL.Diffuse
212 * CONST[21] MATERIAL.Ambient
213 * CONST[22] MATERIAL.Specular
214 * CONST[23].x___ MATERIAL.Power
215 * CONST[24] MATERIAL.Emissive
216 * CONST[25] RS.Ambient
217 *
218 * CONST[26].x___ RS.PointSizeMin
219 * CONST[26]._y__ RS.PointSizeMax
220 * CONST[26].__z_ RS.PointSize
221 * CONST[26].___w RS.PointScaleA
222 * CONST[27].x___ RS.PointScaleB
223 * CONST[27]._y__ RS.PointScaleC
224 *
225 * CONST[28].x___ RS.FogEnd
226 * CONST[28]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
227 * CONST[28].__z_ RS.FogDensity
228
229 * CONST[30].x___ TWEENFACTOR
230 *
231 * CONST[32].x___ LIGHT[0].Type
232 * CONST[32]._yzw LIGHT[0].Attenuation0,1,2
233 * CONST[33] LIGHT[0].Diffuse
234 * CONST[34] LIGHT[0].Specular
235 * CONST[35] LIGHT[0].Ambient
236 * CONST[36].xyz_ LIGHT[0].Position
237 * CONST[36].___w LIGHT[0].Range
238 * CONST[37].xyz_ LIGHT[0].Direction
239 * CONST[37].___w LIGHT[0].Falloff
240 * CONST[38].x___ cos(LIGHT[0].Theta / 2)
241 * CONST[38]._y__ cos(LIGHT[0].Phi / 2)
242 * CONST[38].__z_ 1.0f / (cos(LIGHT[0].Theta / 2) - cos(Light[0].Phi / 2))
243 * CONST[39].xyz_ LIGHT[0].HalfVector (for directional lights)
244 * CONST[39].___w 1 if this is the last active light, 0 if not
245 * CONST[40] LIGHT[1]
246 * CONST[48] LIGHT[2]
247 * CONST[56] LIGHT[3]
248 * CONST[64] LIGHT[4]
249 * CONST[72] LIGHT[5]
250 * CONST[80] LIGHT[6]
251 * CONST[88] LIGHT[7]
252 * NOTE: no lighting code is generated if there are no active lights
253 *
254 * CONST[100].x___ Viewport 2/width
255 * CONST[100]._y__ Viewport 2/height
256 * CONST[100].__z_ Viewport 1/(zmax - zmin)
257 * CONST[100].___w Viewport width
258 * CONST[101].x___ Viewport x0
259 * CONST[101]._y__ Viewport y0
260 * CONST[101].__z_ Viewport z0
261 *
262 * CONST[128..131] D3DTS_TEXTURE0
263 * CONST[132..135] D3DTS_TEXTURE1
264 * CONST[136..139] D3DTS_TEXTURE2
265 * CONST[140..143] D3DTS_TEXTURE3
266 * CONST[144..147] D3DTS_TEXTURE4
267 * CONST[148..151] D3DTS_TEXTURE5
268 * CONST[152..155] D3DTS_TEXTURE6
269 * CONST[156..159] D3DTS_TEXTURE7
270 *
271 * CONST[160] D3DTS_WORLDMATRIX[0] * D3DTS_VIEW
272 * CONST[164] D3DTS_WORLDMATRIX[1] * D3DTS_VIEW
273 * ...
274 * CONST[192] D3DTS_WORLDMATRIX[8] * D3DTS_VIEW
275 * CONST[196] UCP0
276 ...
277 * CONST[203] UCP7
278 */
279 struct vs_build_ctx
280 {
281 struct ureg_program *ureg;
282 const struct nine_ff_vs_key *key;
283
284 uint16_t input[PIPE_MAX_ATTRIBS];
285 unsigned num_inputs;
286
287 struct ureg_src aVtx;
288 struct ureg_src aNrm;
289 struct ureg_src aCol[2];
290 struct ureg_src aTex[8];
291 struct ureg_src aPsz;
292 struct ureg_src aInd;
293 struct ureg_src aWgt;
294
295 struct ureg_src aVtx1; /* tweening */
296 struct ureg_src aNrm1;
297
298 struct ureg_src mtlA;
299 struct ureg_src mtlD;
300 struct ureg_src mtlS;
301 struct ureg_src mtlE;
302 };
303
304 static inline unsigned
get_texcoord_sn(struct pipe_screen * screen)305 get_texcoord_sn(struct pipe_screen *screen)
306 {
307 if (screen->caps.tgsi_texcoord)
308 return TGSI_SEMANTIC_TEXCOORD;
309 return TGSI_SEMANTIC_GENERIC;
310 }
311
312 static inline struct ureg_src
build_vs_add_input(struct vs_build_ctx * vs,uint16_t ndecl)313 build_vs_add_input(struct vs_build_ctx *vs, uint16_t ndecl)
314 {
315 const unsigned i = vs->num_inputs++;
316 assert(i < PIPE_MAX_ATTRIBS);
317 vs->input[i] = ndecl;
318 return ureg_DECL_vs_input(vs->ureg, i);
319 }
320
321 /* NOTE: dst may alias src */
322 static inline void
ureg_normalize3(struct ureg_program * ureg,struct ureg_dst dst,struct ureg_src src)323 ureg_normalize3(struct ureg_program *ureg,
324 struct ureg_dst dst, struct ureg_src src)
325 {
326 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
327 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
328
329 ureg_DP3(ureg, tmp_x, src, src);
330 ureg_RSQ(ureg, tmp_x, _X(tmp));
331 ureg_MUL(ureg, dst, src, _X(tmp));
332 ureg_release_temporary(ureg, tmp);
333 }
334
335 static void *
nine_ff_build_vs(struct NineDevice9 * device,struct vs_build_ctx * vs)336 nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
337 {
338 const struct nine_ff_vs_key *key = vs->key;
339 struct ureg_program *ureg = ureg_create(PIPE_SHADER_VERTEX);
340 struct ureg_dst oPos, oCol[2], oPsz, oFog;
341 struct ureg_dst AR;
342 unsigned i, c;
343 unsigned label[32], l = 0;
344 bool need_aNrm = key->lighting || key->passthrough & (1 << NINE_DECLUSAGE_NORMAL);
345 bool has_aNrm;
346 bool need_aVtx = key->lighting || key->fog_mode || key->pointscale || key->ucp;
347 const unsigned texcoord_sn = get_texcoord_sn(device->screen);
348
349 vs->ureg = ureg;
350
351 /* Check which inputs we should transform. */
352 for (i = 0; i < 8 * 3; i += 3) {
353 switch ((key->tc_gen >> i) & 0x7) {
354 case NINED3DTSS_TCI_CAMERASPACENORMAL:
355 need_aNrm = true;
356 break;
357 case NINED3DTSS_TCI_CAMERASPACEPOSITION:
358 need_aVtx = true;
359 break;
360 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
361 need_aVtx = need_aNrm = true;
362 break;
363 case NINED3DTSS_TCI_SPHEREMAP:
364 need_aVtx = need_aNrm = true;
365 break;
366 default:
367 break;
368 }
369 }
370
371 has_aNrm = need_aNrm && key->has_normal;
372
373 /* Declare and record used inputs (needed for linkage with vertex format):
374 * (texture coordinates handled later)
375 */
376 vs->aVtx = build_vs_add_input(vs,
377 key->position_t ? NINE_DECLUSAGE_POSITIONT : NINE_DECLUSAGE_POSITION);
378
379 vs->aNrm = ureg_imm1f(ureg, 0.0f);
380 if (has_aNrm)
381 vs->aNrm = build_vs_add_input(vs, NINE_DECLUSAGE_NORMAL);
382
383 vs->aCol[0] = ureg_imm1f(ureg, 1.0f);
384 vs->aCol[1] = ureg_imm1f(ureg, 0.0f);
385
386 if (key->lighting || key->darkness) {
387 const unsigned mask = key->mtl_diffuse | key->mtl_specular |
388 key->mtl_ambient | key->mtl_emissive;
389 if ((mask & 0x1) && !key->color0in_one)
390 vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));
391 if ((mask & 0x2) && !key->color1in_zero)
392 vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));
393
394 vs->mtlD = MATERIAL_CONST(1);
395 vs->mtlA = MATERIAL_CONST(2);
396 vs->mtlS = MATERIAL_CONST(3);
397 vs->mtlE = MATERIAL_CONST(5);
398 if (key->mtl_diffuse == 1) vs->mtlD = vs->aCol[0]; else
399 if (key->mtl_diffuse == 2) vs->mtlD = vs->aCol[1];
400 if (key->mtl_ambient == 1) vs->mtlA = vs->aCol[0]; else
401 if (key->mtl_ambient == 2) vs->mtlA = vs->aCol[1];
402 if (key->mtl_specular == 1) vs->mtlS = vs->aCol[0]; else
403 if (key->mtl_specular == 2) vs->mtlS = vs->aCol[1];
404 if (key->mtl_emissive == 1) vs->mtlE = vs->aCol[0]; else
405 if (key->mtl_emissive == 2) vs->mtlE = vs->aCol[1];
406 } else {
407 if (!key->color0in_one) vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));
408 if (!key->color1in_zero) vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));
409 }
410
411 if (key->vertexpointsize)
412 vs->aPsz = build_vs_add_input(vs, NINE_DECLUSAGE_PSIZE);
413
414 if (key->vertexblend_indexed || key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES))
415 vs->aInd = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDINDICES);
416 if (key->vertexblend || key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT))
417 vs->aWgt = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDWEIGHT);
418 if (key->vertextween) {
419 vs->aVtx1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(POSITION,1));
420 vs->aNrm1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(NORMAL,1));
421 }
422
423 /* Declare outputs:
424 */
425 oPos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); /* HPOS */
426 oCol[0] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0));
427 oCol[1] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1));
428 if (key->fog || key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
429 oFog = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 16);
430 oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X);
431 }
432
433 if (key->vertexpointsize || key->pointscale || device->driver_caps.always_output_pointsize) {
434 oPsz = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_PSIZE, 0,
435 TGSI_WRITEMASK_X, 0, 1);
436 oPsz = ureg_writemask(oPsz, TGSI_WRITEMASK_X);
437 }
438
439 if (key->lighting || key->vertexblend)
440 AR = ureg_DECL_address(ureg);
441
442 /* === Vertex transformation / vertex blending:
443 */
444
445 if (key->position_t) {
446 if (device->driver_caps.window_space_position_support) {
447 ureg_MOV(ureg, oPos, vs->aVtx);
448 } else {
449 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
450 /* vs->aVtx contains the coordinates buffer wise.
451 * later in the pipeline, clipping, viewport and division
452 * by w (rhw = 1/w) are going to be applied, so do the reverse
453 * of these transformations (except clipping) to have the good
454 * position at the end.*/
455 ureg_MOV(ureg, tmp, vs->aVtx);
456 /* X from [X_min, X_min + width] to [-1, 1], same for Y. Z to [0, 1] */
457 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_negate(_CONST(101)));
458 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(100));
459 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
460 /* Y needs to be reversed */
461 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_negate(ureg_src(tmp)));
462 /* Replace w by 1 if it equals to 0 */
463 ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_W))),
464 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_W), ureg_imm1f(ureg, 1.0f));
465 /* inverse rhw */
466 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), _W(tmp));
467 /* multiply X, Y, Z by w */
468 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _W(tmp));
469 ureg_MOV(ureg, oPos, ureg_src(tmp));
470 ureg_release_temporary(ureg, tmp);
471 }
472 } else if (key->vertexblend) {
473 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
474 struct ureg_dst tmp2 = ureg_DECL_temporary(ureg);
475 struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg);
476 struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg);
477 struct ureg_dst sum_blendweights = ureg_DECL_temporary(ureg);
478 struct ureg_src cWM[4];
479
480 for (i = 160; i <= 195; ++i)
481 ureg_DECL_constant(ureg, i);
482
483 /* translate world matrix index to constant file index */
484 if (key->vertexblend_indexed) {
485 ureg_MAD(ureg, tmp, vs->aInd, ureg_imm1f(ureg, 4.0f), ureg_imm1f(ureg, 160.0f));
486 ureg_ARL(ureg, AR, ureg_src(tmp));
487 }
488
489 ureg_MOV(ureg, aVtx_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f));
490 ureg_MOV(ureg, aNrm_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f));
491 ureg_MOV(ureg, sum_blendweights, ureg_imm4f(ureg, 1.0f, 1.0f, 1.0f, 1.0f));
492
493 for (i = 0; i < key->vertexblend; ++i) {
494 for (c = 0; c < 4; ++c) {
495 cWM[c] = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, (160 + i * 4) * !key->vertexblend_indexed + c), 0);
496 if (key->vertexblend_indexed)
497 cWM[c] = ureg_src_indirect(cWM[c], ureg_scalar(ureg_src(AR), i));
498 }
499
500 /* multiply by WORLD(index) */
501 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), cWM[0]);
502 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), cWM[1], ureg_src(tmp));
503 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), cWM[2], ureg_src(tmp));
504 ureg_MAD(ureg, tmp, _WWWW(vs->aVtx), cWM[3], ureg_src(tmp));
505
506 if (has_aNrm) {
507 /* Note: the spec says the transpose of the inverse of the
508 * WorldView matrices should be used, but all tests show
509 * otherwise.
510 * Only case unknown: D3DVBF_0WEIGHTS */
511 ureg_MUL(ureg, tmp2, _XXXX(vs->aNrm), cWM[0]);
512 ureg_MAD(ureg, tmp2, _YYYY(vs->aNrm), cWM[1], ureg_src(tmp2));
513 ureg_MAD(ureg, tmp2, _ZZZZ(vs->aNrm), cWM[2], ureg_src(tmp2));
514 }
515
516 if (i < (key->vertexblend - 1)) {
517 /* accumulate weighted position value */
518 ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(vs->aWgt, i), ureg_src(aVtx_dst));
519 if (has_aNrm)
520 ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(vs->aWgt, i), ureg_src(aNrm_dst));
521 /* subtract weighted position value for last value */
522 ureg_ADD(ureg, sum_blendweights, ureg_src(sum_blendweights), ureg_negate(ureg_scalar(vs->aWgt, i)));
523 }
524 }
525
526 /* the last weighted position is always 1 - sum_of_previous_weights */
527 ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aVtx_dst));
528 if (has_aNrm)
529 ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aNrm_dst));
530
531 /* multiply by VIEW_PROJ */
532 ureg_MUL(ureg, tmp, _X(aVtx_dst), _CONST(8));
533 ureg_MAD(ureg, tmp, _Y(aVtx_dst), _CONST(9), ureg_src(tmp));
534 ureg_MAD(ureg, tmp, _Z(aVtx_dst), _CONST(10), ureg_src(tmp));
535 ureg_MAD(ureg, oPos, _W(aVtx_dst), _CONST(11), ureg_src(tmp));
536
537 if (need_aVtx)
538 vs->aVtx = ureg_src(aVtx_dst);
539
540 ureg_release_temporary(ureg, tmp);
541 ureg_release_temporary(ureg, tmp2);
542 ureg_release_temporary(ureg, sum_blendweights);
543 if (!need_aVtx)
544 ureg_release_temporary(ureg, aVtx_dst);
545
546 if (has_aNrm) {
547 if (key->normalizenormals)
548 ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst));
549 vs->aNrm = ureg_src(aNrm_dst);
550 } else
551 ureg_release_temporary(ureg, aNrm_dst);
552 } else {
553 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
554
555 if (key->vertextween) {
556 struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg);
557 ureg_LRP(ureg, aVtx_dst, _XXXX(_CONST(30)), vs->aVtx1, vs->aVtx);
558 vs->aVtx = ureg_src(aVtx_dst);
559 if (has_aNrm) {
560 struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg);
561 ureg_LRP(ureg, aNrm_dst, _XXXX(_CONST(30)), vs->aNrm1, vs->aNrm);
562 vs->aNrm = ureg_src(aNrm_dst);
563 }
564 }
565
566 /* position = vertex * WORLD_VIEW_PROJ */
567 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(0));
568 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(1), ureg_src(tmp));
569 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(2), ureg_src(tmp));
570 ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(tmp));
571 ureg_release_temporary(ureg, tmp);
572
573 if (need_aVtx) {
574 struct ureg_dst aVtx_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
575 ureg_MUL(ureg, aVtx_dst, _XXXX(vs->aVtx), _CONST(4));
576 ureg_MAD(ureg, aVtx_dst, _YYYY(vs->aVtx), _CONST(5), ureg_src(aVtx_dst));
577 ureg_MAD(ureg, aVtx_dst, _ZZZZ(vs->aVtx), _CONST(6), ureg_src(aVtx_dst));
578 ureg_MAD(ureg, aVtx_dst, _WWWW(vs->aVtx), _CONST(7), ureg_src(aVtx_dst));
579 vs->aVtx = ureg_src(aVtx_dst);
580 }
581 if (has_aNrm) {
582 struct ureg_dst aNrm_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
583 ureg_MUL(ureg, aNrm_dst, _XXXX(vs->aNrm), _CONST(16));
584 ureg_MAD(ureg, aNrm_dst, _YYYY(vs->aNrm), _CONST(17), ureg_src(aNrm_dst));
585 ureg_MAD(ureg, aNrm_dst, _ZZZZ(vs->aNrm), _CONST(18), ureg_src(aNrm_dst));
586 if (key->normalizenormals)
587 ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst));
588 vs->aNrm = ureg_src(aNrm_dst);
589 }
590 }
591
592 /* === Process point size:
593 */
594 if (key->vertexpointsize || key->pointscale || device->driver_caps.always_output_pointsize) {
595 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
596 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
597 struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
598 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
599 if (key->vertexpointsize) {
600 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
601 ureg_MAX(ureg, tmp_z, _XXXX(vs->aPsz), _XXXX(cPsz1));
602 ureg_MIN(ureg, tmp_z, _Z(tmp), _YYYY(cPsz1));
603 } else {
604 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
605 ureg_MOV(ureg, tmp_z, _ZZZZ(cPsz1));
606 }
607
608 if (key->pointscale) {
609 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
610 struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27);
611
612 ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx);
613 ureg_RSQ(ureg, tmp_y, _X(tmp));
614 ureg_MUL(ureg, tmp_y, _Y(tmp), _X(tmp));
615 ureg_CMP(ureg, tmp_y, ureg_negate(_Y(tmp)), _Y(tmp), ureg_imm1f(ureg, 0.0f));
616 ureg_MAD(ureg, tmp_x, _Y(tmp), _YYYY(cPsz2), _XXXX(cPsz2));
617 ureg_MAD(ureg, tmp_x, _Y(tmp), _X(tmp), _WWWW(cPsz1));
618 ureg_RSQ(ureg, tmp_x, _X(tmp));
619 ureg_MUL(ureg, tmp_x, _X(tmp), _Z(tmp));
620 ureg_MUL(ureg, tmp_x, _X(tmp), _WWWW(_CONST(100)));
621 ureg_MAX(ureg, tmp_x, _X(tmp), _XXXX(cPsz1));
622 ureg_MIN(ureg, tmp_z, _X(tmp), _YYYY(cPsz1));
623 }
624
625 ureg_MOV(ureg, oPsz, _Z(tmp));
626 ureg_release_temporary(ureg, tmp);
627 }
628
629 for (i = 0; i < 8; ++i) {
630 struct ureg_dst tmp, tmp_x, tmp2;
631 struct ureg_dst oTex, input_coord, transformed, t, aVtx_normed;
632 unsigned c, writemask;
633 const unsigned tci = (key->tc_gen >> (i * 3)) & 0x7;
634 const unsigned idx = (key->tc_idx >> (i * 3)) & 0x7;
635 unsigned dim_input = 1 + ((key->tc_dim_input >> (i * 2)) & 0x3);
636 const unsigned dim_output = (key->tc_dim_output >> (i * 3)) & 0x7;
637
638 /* No texture output of index s */
639 if (tci == NINED3DTSS_TCI_DISABLE)
640 continue;
641 oTex = ureg_DECL_output(ureg, texcoord_sn, i);
642 tmp = ureg_DECL_temporary(ureg);
643 tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
644 input_coord = ureg_DECL_temporary(ureg);
645 transformed = ureg_DECL_temporary(ureg);
646
647 /* Get the coordinate */
648 switch (tci) {
649 case NINED3DTSS_TCI_PASSTHRU:
650 /* NINED3DTSS_TCI_PASSTHRU => Use texcoord coming from index idx *
651 * Else the idx is used only to determine wrapping mode. */
652 vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx));
653 ureg_MOV(ureg, input_coord, vs->aTex[idx]);
654 break;
655 case NINED3DTSS_TCI_CAMERASPACENORMAL:
656 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aNrm);
657 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
658 dim_input = 4;
659 break;
660 case NINED3DTSS_TCI_CAMERASPACEPOSITION:
661 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aVtx);
662 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
663 dim_input = 4;
664 break;
665 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
666 tmp.WriteMask = TGSI_WRITEMASK_XYZ;
667 aVtx_normed = ureg_DECL_temporary(ureg);
668 ureg_normalize3(ureg, aVtx_normed, vs->aVtx);
669 ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm);
670 ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp));
671 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
672 ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp)));
673 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
674 ureg_release_temporary(ureg, aVtx_normed);
675 dim_input = 4;
676 tmp.WriteMask = TGSI_WRITEMASK_XYZW;
677 break;
678 case NINED3DTSS_TCI_SPHEREMAP:
679 /* Implement the formula of GL_SPHERE_MAP */
680 tmp.WriteMask = TGSI_WRITEMASK_XYZ;
681 aVtx_normed = ureg_DECL_temporary(ureg);
682 tmp2 = ureg_DECL_temporary(ureg);
683 ureg_normalize3(ureg, aVtx_normed, vs->aVtx);
684 ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm);
685 ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp));
686 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
687 ureg_ADD(ureg, tmp, ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp)));
688 /* now tmp = normed(Vtx) - 2 dot3(normed(Vtx), Nrm) Nrm */
689 ureg_MOV(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_XYZ), ureg_src(tmp));
690 ureg_MUL(ureg, tmp2, ureg_src(tmp2), ureg_src(tmp2));
691 ureg_DP3(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_src(tmp2));
692 ureg_RSQ(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2));
693 ureg_MUL(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_imm1f(ureg, 0.5f));
694 /* tmp2 = 0.5 / sqrt(tmp.x^2 + tmp.y^2 + (tmp.z+1)^2)
695 * TODO: z coordinates are a bit different gl vs d3d, should the formula be adapted ? */
696 ureg_MUL(ureg, tmp, ureg_src(tmp), _X(tmp2));
697 ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 0.5f));
698 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_ZW), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
699 ureg_release_temporary(ureg, aVtx_normed);
700 ureg_release_temporary(ureg, tmp2);
701 dim_input = 4;
702 tmp.WriteMask = TGSI_WRITEMASK_XYZW;
703 break;
704 default:
705 assert(0);
706 break;
707 }
708
709 /* Apply the transformation */
710 /* dim_output == 0 => do not transform the components.
711 * XYZRHW also disables transformation */
712 if (!dim_output || key->position_t) {
713 ureg_release_temporary(ureg, transformed);
714 transformed = input_coord;
715 writemask = TGSI_WRITEMASK_XYZW;
716 } else {
717 for (c = 0; c < dim_output; c++) {
718 t = ureg_writemask(transformed, 1 << c);
719 switch (dim_input) {
720 /* dim_input = 1 2 3: -> we add trailing 1 to input*/
721 case 1: ureg_MAD(ureg, t, _X(input_coord), _XXXX(_CONST(128 + i * 4 + c)), _YYYY(_CONST(128 + i * 4 + c)));
722 break;
723 case 2: ureg_DP2(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
724 ureg_ADD(ureg, t, ureg_src(transformed), _ZZZZ(_CONST(128 + i * 4 + c)));
725 break;
726 case 3: ureg_DP3(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
727 ureg_ADD(ureg, t, ureg_src(transformed), _WWWW(_CONST(128 + i * 4 + c)));
728 break;
729 case 4: ureg_DP4(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); break;
730 default:
731 assert(0);
732 }
733 }
734 writemask = (1 << dim_output) - 1;
735 ureg_release_temporary(ureg, input_coord);
736 }
737
738 ureg_MOV(ureg, ureg_writemask(oTex, writemask), ureg_src(transformed));
739 ureg_release_temporary(ureg, transformed);
740 ureg_release_temporary(ureg, tmp);
741 }
742
743 /* === Lighting:
744 *
745 * DIRECTIONAL: Light at infinite distance, parallel rays, no attenuation.
746 * POINT: Finite distance to scene, divergent rays, isotropic, attenuation.
747 * SPOT: Finite distance, divergent rays, angular dependence, attenuation.
748 *
749 * vec3 normal = normalize(in.Normal * NormalMatrix);
750 * vec3 hitDir = light.direction;
751 * float atten = 1.0;
752 *
753 * if (light.type != DIRECTIONAL)
754 * {
755 * vec3 hitVec = light.position - eyeVertex;
756 * float d = length(hitVec);
757 * hitDir = hitVec / d;
758 * atten = 1 / ((light.atten2 * d + light.atten1) * d + light.atten0);
759 * }
760 *
761 * if (light.type == SPOTLIGHT)
762 * {
763 * float rho = dp3(-hitVec, light.direction);
764 * if (rho < cos(light.phi / 2))
765 * atten = 0;
766 * if (rho < cos(light.theta / 2))
767 * atten *= pow(some_func(rho), light.falloff);
768 * }
769 *
770 * float nDotHit = dp3_sat(normal, hitVec);
771 * float powFact = 0.0;
772 *
773 * if (nDotHit > 0.0)
774 * {
775 * vec3 midVec = normalize(hitDir + eye);
776 * float nDotMid = dp3_sat(normal, midVec);
777 * pFact = pow(nDotMid, material.power);
778 * }
779 *
780 * ambient += light.ambient * atten;
781 * diffuse += light.diffuse * atten * nDotHit;
782 * specular += light.specular * atten * powFact;
783 */
784 if (key->lighting) {
785 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
786 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
787 struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
788 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
789 struct ureg_dst rAtt = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W);
790 struct ureg_dst rHit = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
791 struct ureg_dst rMid = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
792
793 struct ureg_dst rCtr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W);
794
795 struct ureg_dst AL = ureg_writemask(AR, TGSI_WRITEMASK_X);
796
797 /* Light.*.Alpha is not used. */
798 struct ureg_dst rD = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
799 struct ureg_dst rA = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
800 struct ureg_dst rS = ureg_DECL_temporary(ureg);
801
802 struct ureg_src mtlP = _XXXX(MATERIAL_CONST(4));
803
804 struct ureg_src cLKind = _XXXX(LIGHT_CONST(0));
805 struct ureg_src cLAtt0 = _YYYY(LIGHT_CONST(0));
806 struct ureg_src cLAtt1 = _ZZZZ(LIGHT_CONST(0));
807 struct ureg_src cLAtt2 = _WWWW(LIGHT_CONST(0));
808 struct ureg_src cLColD = _XYZW(LIGHT_CONST(1));
809 struct ureg_src cLColS = _XYZW(LIGHT_CONST(2));
810 struct ureg_src cLColA = _XYZW(LIGHT_CONST(3));
811 struct ureg_src cLPos = _XYZW(LIGHT_CONST(4));
812 struct ureg_src cLRng = _WWWW(LIGHT_CONST(4));
813 struct ureg_src cLDir = _XYZW(LIGHT_CONST(5));
814 struct ureg_src cLFOff = _WWWW(LIGHT_CONST(5));
815 struct ureg_src cLTht = _XXXX(LIGHT_CONST(6));
816 struct ureg_src cLPhi = _YYYY(LIGHT_CONST(6));
817 struct ureg_src cLSDiv = _ZZZZ(LIGHT_CONST(6));
818 struct ureg_src cLLast = _WWWW(LIGHT_CONST(7));
819
820 const unsigned loop_label = l++;
821
822 /* Declare all light constants to allow indirect addressing */
823 for (i = 32; i < 96; i++)
824 ureg_DECL_constant(ureg, i);
825
826 ureg_MOV(ureg, rCtr, ureg_imm1f(ureg, 32.0f)); /* &lightconst(0) */
827 ureg_MOV(ureg, rD, ureg_imm1f(ureg, 0.0f));
828 ureg_MOV(ureg, rA, ureg_imm1f(ureg, 0.0f));
829 ureg_MOV(ureg, rS, ureg_imm1f(ureg, 0.0f));
830
831 /* loop management */
832 ureg_BGNLOOP(ureg, &label[loop_label]);
833 ureg_ARL(ureg, AL, _W(rCtr));
834
835 /* if (not DIRECTIONAL light): */
836 ureg_SNE(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_DIRECTIONAL));
837 ureg_MOV(ureg, rHit, ureg_negate(cLDir));
838 ureg_MOV(ureg, rAtt, ureg_imm1f(ureg, 1.0f));
839 ureg_IF(ureg, _X(tmp), &label[l++]);
840 {
841 /* hitDir = light.position - eyeVtx
842 * d = length(hitDir)
843 */
844 ureg_ADD(ureg, rHit, cLPos, ureg_negate(vs->aVtx));
845 ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit));
846 ureg_RSQ(ureg, tmp_y, _X(tmp));
847 ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */
848
849 /* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */
850 ureg_MAD(ureg, rAtt, _X(tmp), cLAtt2, cLAtt1);
851 ureg_MAD(ureg, rAtt, _X(tmp), _W(rAtt), cLAtt0);
852 ureg_RCP(ureg, rAtt, _W(rAtt));
853 /* cut-off if distance exceeds Light.Range */
854 ureg_SLT(ureg, tmp_x, _X(tmp), cLRng);
855 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
856 }
857 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
858 ureg_ENDIF(ureg);
859
860 /* normalize hitDir */
861 ureg_normalize3(ureg, rHit, ureg_src(rHit));
862
863 /* if (SPOT light) */
864 ureg_SEQ(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_SPOT));
865 ureg_IF(ureg, _X(tmp), &label[l++]);
866 {
867 /* rho = dp3(-hitDir, light.spotDir)
868 *
869 * if (rho > light.ctht2) NOTE: 0 <= phi <= pi, 0 <= theta <= phi
870 * spotAtt = 1
871 * else
872 * if (rho <= light.cphi2)
873 * spotAtt = 0
874 * else
875 * spotAtt = (rho - light.cphi2) / (light.ctht2 - light.cphi2) ^ light.falloff
876 */
877 ureg_DP3(ureg, tmp_y, ureg_negate(ureg_src(rHit)), cLDir); /* rho */
878 ureg_ADD(ureg, tmp_x, _Y(tmp), ureg_negate(cLPhi));
879 ureg_MUL(ureg, tmp_x, _X(tmp), cLSDiv);
880 ureg_POW(ureg, tmp_x, _X(tmp), cLFOff); /* spotAtten */
881 ureg_SGE(ureg, tmp_z, _Y(tmp), cLTht); /* if inside theta && phi */
882 ureg_SGE(ureg, tmp_y, _Y(tmp), cLPhi); /* if inside phi */
883 ureg_MAD(ureg, ureg_saturate(tmp_x), _X(tmp), _Y(tmp), _Z(tmp));
884 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
885 }
886 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
887 ureg_ENDIF(ureg);
888
889 /* directional factors, let's not use LIT because of clarity */
890
891 if (has_aNrm) {
892 if (key->localviewer) {
893 ureg_normalize3(ureg, rMid, vs->aVtx);
894 ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_negate(ureg_src(rMid)));
895 } else {
896 ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, -1.0f));
897 }
898 ureg_normalize3(ureg, rMid, ureg_src(rMid));
899 ureg_DP3(ureg, ureg_saturate(tmp_x), vs->aNrm, ureg_src(rHit));
900 ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid));
901 ureg_MUL(ureg, tmp_z, _X(tmp), _Y(tmp));
902 /* Tests show that specular is computed only if (dp3(normal,hitDir) > 0).
903 * For front facing, it is more restrictive than test (dp3(normal,mid) > 0).
904 * No tests were made for backfacing, so add the two conditions */
905 ureg_IF(ureg, _Z(tmp), &label[l++]);
906 {
907 ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid));
908 ureg_POW(ureg, tmp_y, _Y(tmp), mtlP);
909 ureg_MUL(ureg, tmp_y, _W(rAtt), _Y(tmp)); /* power factor * att */
910 ureg_MAD(ureg, rS, cLColS, _Y(tmp), ureg_src(rS)); /* accumulate specular */
911 }
912 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
913 ureg_ENDIF(ureg);
914
915 ureg_MUL(ureg, tmp_x, _W(rAtt), _X(tmp)); /* dp3(normal,hitDir) * att */
916 ureg_MAD(ureg, rD, cLColD, _X(tmp), ureg_src(rD)); /* accumulate diffuse */
917 }
918
919 ureg_MAD(ureg, rA, cLColA, _W(rAtt), ureg_src(rA)); /* accumulate ambient */
920
921 /* break if this was the last light */
922 ureg_IF(ureg, cLLast, &label[l++]);
923 ureg_BRK(ureg);
924 ureg_ENDIF(ureg);
925 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
926
927 ureg_ADD(ureg, rCtr, _W(rCtr), ureg_imm1f(ureg, 8.0f));
928 ureg_fixup_label(ureg, label[loop_label], ureg_get_instruction_number(ureg));
929 ureg_ENDLOOP(ureg, &label[loop_label]);
930
931 /* Apply to material:
932 *
933 * oCol[0] = (material.emissive + material.ambient * rs.ambient) +
934 * material.ambient * ambient +
935 * material.diffuse * diffuse +
936 * oCol[1] = material.specular * specular;
937 */
938 if (key->mtl_emissive == 0 && key->mtl_ambient == 0)
939 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), vs->mtlA, _CONST(19));
940 else {
941 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), _CONST(25));
942 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE);
943 }
944
945 ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), ureg_src(rD), vs->mtlD, ureg_src(tmp));
946 ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD);
947 ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS);
948 ureg_release_temporary(ureg, rAtt);
949 ureg_release_temporary(ureg, rHit);
950 ureg_release_temporary(ureg, rMid);
951 ureg_release_temporary(ureg, rCtr);
952 ureg_release_temporary(ureg, rD);
953 ureg_release_temporary(ureg, rA);
954 ureg_release_temporary(ureg, rS);
955 ureg_release_temporary(ureg, rAtt);
956 ureg_release_temporary(ureg, tmp);
957 } else
958 /* COLOR */
959 if (key->darkness) {
960 if (key->mtl_emissive == 0 && key->mtl_ambient == 0)
961 ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), _CONST(19));
962 else
963 ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE);
964 ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD);
965 ureg_MOV(ureg, oCol[1], ureg_imm1f(ureg, 0.0f));
966 } else {
967 ureg_MOV(ureg, oCol[0], vs->aCol[0]);
968 ureg_MOV(ureg, oCol[1], vs->aCol[1]);
969 }
970
971 /* === Process fog.
972 *
973 * exp(x) = ex2(log2(e) * x)
974 */
975 if (key->fog_mode) {
976 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
977 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
978 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
979 if (key->fog_range) {
980 ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx);
981 ureg_RSQ(ureg, tmp_z, _X(tmp));
982 ureg_MUL(ureg, tmp_z, _Z(tmp), _X(tmp));
983 } else {
984 ureg_MOV(ureg, tmp_z, ureg_abs(_ZZZZ(vs->aVtx)));
985 }
986
987 if (key->fog_mode == D3DFOG_EXP) {
988 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
989 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
990 ureg_EX2(ureg, tmp_x, _X(tmp));
991 } else
992 if (key->fog_mode == D3DFOG_EXP2) {
993 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
994 ureg_MUL(ureg, tmp_x, _X(tmp), _X(tmp));
995 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
996 ureg_EX2(ureg, tmp_x, _X(tmp));
997 } else
998 if (key->fog_mode == D3DFOG_LINEAR) {
999 ureg_ADD(ureg, tmp_x, _XXXX(_CONST(28)), ureg_negate(_Z(tmp)));
1000 ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28)));
1001 }
1002 ureg_MOV(ureg, oFog, _X(tmp));
1003 ureg_release_temporary(ureg, tmp);
1004 } else if (key->fog && !(key->passthrough & (1 << NINE_DECLUSAGE_FOG))) {
1005 ureg_MOV(ureg, oFog, ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W));
1006 }
1007
1008 if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) {
1009 struct ureg_src input;
1010 struct ureg_dst output;
1011 input = vs->aWgt;
1012 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 19);
1013 ureg_MOV(ureg, output, input);
1014 }
1015 if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) {
1016 struct ureg_src input;
1017 struct ureg_dst output;
1018 input = vs->aInd;
1019 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 20);
1020 ureg_MOV(ureg, output, input);
1021 }
1022 if (key->passthrough & (1 << NINE_DECLUSAGE_NORMAL)) {
1023 struct ureg_src input;
1024 struct ureg_dst output;
1025 input = vs->aNrm;
1026 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 21);
1027 ureg_MOV(ureg, output, input);
1028 }
1029 if (key->passthrough & (1 << NINE_DECLUSAGE_TANGENT)) {
1030 struct ureg_src input;
1031 struct ureg_dst output;
1032 input = build_vs_add_input(vs, NINE_DECLUSAGE_TANGENT);
1033 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 22);
1034 ureg_MOV(ureg, output, input);
1035 }
1036 if (key->passthrough & (1 << NINE_DECLUSAGE_BINORMAL)) {
1037 struct ureg_src input;
1038 struct ureg_dst output;
1039 input = build_vs_add_input(vs, NINE_DECLUSAGE_BINORMAL);
1040 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 23);
1041 ureg_MOV(ureg, output, input);
1042 }
1043 if (key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
1044 struct ureg_src input;
1045 struct ureg_dst output;
1046 input = build_vs_add_input(vs, NINE_DECLUSAGE_FOG);
1047 input = ureg_scalar(input, TGSI_SWIZZLE_X);
1048 output = oFog;
1049 ureg_MOV(ureg, output, input);
1050 }
1051 if (key->passthrough & (1 << NINE_DECLUSAGE_DEPTH)) {
1052 (void) 0; /* TODO: replace z of position output ? */
1053 }
1054
1055 /* ucp for ff applies on world coordinates.
1056 * aVtx is in worldview coordinates. */
1057 if (key->ucp) {
1058 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1059 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(12));
1060 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(13), ureg_src(tmp));
1061 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(14), ureg_src(tmp));
1062 if (!key->clipplane_emulate) {
1063 struct ureg_dst clipVect = ureg_DECL_output(ureg, TGSI_SEMANTIC_CLIPVERTEX, 0);
1064 ureg_ADD(ureg, clipVect, _CONST(15), ureg_src(tmp));
1065 } else {
1066 struct ureg_dst clipdist[2] = {ureg_dst_undef(), ureg_dst_undef()};
1067 int num_clipdist = ffs(key->clipplane_emulate);
1068 ureg_ADD(ureg, tmp, _CONST(15), ureg_src(tmp));
1069 clipdist[0] = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_CLIPDIST, 0,
1070 ((1 << num_clipdist) - 1) & 0xf, 0, 1);
1071 if (num_clipdist >= 5)
1072 clipdist[1] = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_CLIPDIST, 1,
1073 ((1 << (num_clipdist - 4)) - 1) & 0xf, 0, 1);
1074 ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED, num_clipdist);
1075 for (i = 0; i < num_clipdist; i++) {
1076 assert(!ureg_dst_is_undef(clipdist[i>>2]));
1077 if (!(key->clipplane_emulate & (1 << i)))
1078 ureg_MOV(ureg, ureg_writemask(clipdist[i>>2], 1 << (i & 0x2)), ureg_imm1f(ureg, 0.f));
1079 else
1080 ureg_DP4(ureg, ureg_writemask(clipdist[i>>2], 1 << (i & 0x2)),
1081 ureg_src(tmp), _CONST(196+i));
1082 }
1083 }
1084 ureg_release_temporary(ureg, tmp);
1085 }
1086
1087 if (key->position_t && device->driver_caps.window_space_position_support)
1088 ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, true);
1089
1090 ureg_END(ureg);
1091 nine_ureg_tgsi_dump(ureg, false);
1092 return nine_create_shader_with_so_and_destroy(ureg, device->context.pipe, NULL);
1093 }
1094
1095 /* PS FF constants layout:
1096 *
1097 * CONST[ 0.. 7] stage[i].D3DTSS_CONSTANT
1098 * CONST[ 8..15].x___ stage[i].D3DTSS_BUMPENVMAT00
1099 * CONST[ 8..15]._y__ stage[i].D3DTSS_BUMPENVMAT01
1100 * CONST[ 8..15].__z_ stage[i].D3DTSS_BUMPENVMAT10
1101 * CONST[ 8..15].___w stage[i].D3DTSS_BUMPENVMAT11
1102 * CONST[16..19].x_z_ stage[i].D3DTSS_BUMPENVLSCALE
1103 * CONST[17..19]._y_w stage[i].D3DTSS_BUMPENVLOFFSET
1104 *
1105 * CONST[20] D3DRS_TEXTUREFACTOR
1106 * CONST[21] D3DRS_FOGCOLOR
1107 * CONST[22].x___ RS.FogEnd
1108 * CONST[22]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
1109 * CONST[22].__z_ RS.FogDensity
1110 * CONST[22].___w Alpha ref
1111 */
1112 struct ps_build_ctx
1113 {
1114 struct ureg_program *ureg;
1115 unsigned color_interpolate_flag;
1116
1117 struct ureg_src vC[2]; /* DIFFUSE, SPECULAR */
1118 struct ureg_src vT[8]; /* TEXCOORD[i] */
1119 struct ureg_dst rCur; /* D3DTA_CURRENT */
1120 struct ureg_dst rMod;
1121 struct ureg_src rCurSrc;
1122 struct ureg_dst rTmp; /* D3DTA_TEMP */
1123 struct ureg_src rTmpSrc;
1124 struct ureg_dst rTex;
1125 struct ureg_src rTexSrc;
1126 struct ureg_src cBEM[8];
1127 struct ureg_src s[8];
1128
1129 struct {
1130 unsigned index;
1131 unsigned index_pre_mod;
1132 } stage;
1133 };
1134
1135 static struct ureg_src
ps_get_ts_arg(struct ps_build_ctx * ps,unsigned ta)1136 ps_get_ts_arg(struct ps_build_ctx *ps, unsigned ta)
1137 {
1138 struct ureg_src reg;
1139
1140 switch (ta & D3DTA_SELECTMASK) {
1141 case D3DTA_CONSTANT:
1142 reg = ureg_DECL_constant(ps->ureg, ps->stage.index);
1143 break;
1144 case D3DTA_CURRENT:
1145 reg = (ps->stage.index == ps->stage.index_pre_mod) ? ureg_src(ps->rMod) : ps->rCurSrc;
1146 break;
1147 case D3DTA_DIFFUSE:
1148 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, ps->color_interpolate_flag);
1149 break;
1150 case D3DTA_SPECULAR:
1151 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, ps->color_interpolate_flag);
1152 break;
1153 case D3DTA_TEMP:
1154 reg = ps->rTmpSrc;
1155 break;
1156 case D3DTA_TEXTURE:
1157 reg = ps->rTexSrc;
1158 break;
1159 case D3DTA_TFACTOR:
1160 reg = ureg_DECL_constant(ps->ureg, 20);
1161 break;
1162 default:
1163 assert(0);
1164 reg = ureg_src_undef();
1165 break;
1166 }
1167 if (ta & D3DTA_COMPLEMENT) {
1168 struct ureg_dst dst = ureg_DECL_temporary(ps->ureg);
1169 ureg_ADD(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), ureg_negate(reg));
1170 reg = ureg_src(dst);
1171 }
1172 if (ta & D3DTA_ALPHAREPLICATE)
1173 reg = _WWWW(reg);
1174 return reg;
1175 }
1176
1177 static struct ureg_dst
ps_get_ts_dst(struct ps_build_ctx * ps,unsigned ta)1178 ps_get_ts_dst(struct ps_build_ctx *ps, unsigned ta)
1179 {
1180 assert(!(ta & (D3DTA_COMPLEMENT | D3DTA_ALPHAREPLICATE)));
1181
1182 switch (ta & D3DTA_SELECTMASK) {
1183 case D3DTA_CURRENT:
1184 return ps->rCur;
1185 case D3DTA_TEMP:
1186 return ps->rTmp;
1187 default:
1188 assert(0);
1189 return ureg_dst_undef();
1190 }
1191 }
1192
ps_d3dtop_args_mask(D3DTEXTUREOP top)1193 static uint8_t ps_d3dtop_args_mask(D3DTEXTUREOP top)
1194 {
1195 switch (top) {
1196 case D3DTOP_DISABLE:
1197 return 0x0;
1198 case D3DTOP_SELECTARG1:
1199 case D3DTOP_PREMODULATE:
1200 return 0x2;
1201 case D3DTOP_SELECTARG2:
1202 return 0x4;
1203 case D3DTOP_MULTIPLYADD:
1204 case D3DTOP_LERP:
1205 return 0x7;
1206 default:
1207 return 0x6;
1208 }
1209 }
1210
1211 static inline bool
is_MOV_no_op(struct ureg_dst dst,struct ureg_src src)1212 is_MOV_no_op(struct ureg_dst dst, struct ureg_src src)
1213 {
1214 return !dst.WriteMask ||
1215 (dst.File == src.File &&
1216 dst.Index == src.Index &&
1217 !dst.Indirect &&
1218 !dst.Saturate &&
1219 !src.Indirect &&
1220 !src.Negate &&
1221 !src.Absolute &&
1222 (!(dst.WriteMask & TGSI_WRITEMASK_X) || (src.SwizzleX == TGSI_SWIZZLE_X)) &&
1223 (!(dst.WriteMask & TGSI_WRITEMASK_Y) || (src.SwizzleY == TGSI_SWIZZLE_Y)) &&
1224 (!(dst.WriteMask & TGSI_WRITEMASK_Z) || (src.SwizzleZ == TGSI_SWIZZLE_Z)) &&
1225 (!(dst.WriteMask & TGSI_WRITEMASK_W) || (src.SwizzleW == TGSI_SWIZZLE_W)));
1226
1227 }
1228
1229 static void
ps_do_ts_op(struct ps_build_ctx * ps,unsigned top,struct ureg_dst dst,struct ureg_src * arg)1230 ps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct ureg_src *arg)
1231 {
1232 struct ureg_program *ureg = ps->ureg;
1233 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1234 struct ureg_dst tmp2 = ureg_DECL_temporary(ureg);
1235 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
1236
1237 tmp.WriteMask = dst.WriteMask;
1238
1239 if (top != D3DTOP_SELECTARG1 && top != D3DTOP_SELECTARG2 &&
1240 top != D3DTOP_MODULATE && top != D3DTOP_PREMODULATE &&
1241 top != D3DTOP_BLENDDIFFUSEALPHA && top != D3DTOP_BLENDTEXTUREALPHA &&
1242 top != D3DTOP_BLENDFACTORALPHA && top != D3DTOP_BLENDCURRENTALPHA &&
1243 top != D3DTOP_BUMPENVMAP && top != D3DTOP_BUMPENVMAPLUMINANCE &&
1244 top != D3DTOP_LERP)
1245 dst = ureg_saturate(dst);
1246
1247 switch (top) {
1248 case D3DTOP_SELECTARG1:
1249 if (!is_MOV_no_op(dst, arg[1]))
1250 ureg_MOV(ureg, dst, arg[1]);
1251 break;
1252 case D3DTOP_SELECTARG2:
1253 if (!is_MOV_no_op(dst, arg[2]))
1254 ureg_MOV(ureg, dst, arg[2]);
1255 break;
1256 case D3DTOP_MODULATE:
1257 ureg_MUL(ureg, dst, arg[1], arg[2]);
1258 break;
1259 case D3DTOP_MODULATE2X:
1260 ureg_MUL(ureg, tmp, arg[1], arg[2]);
1261 ureg_ADD(ureg, dst, ureg_src(tmp), ureg_src(tmp));
1262 break;
1263 case D3DTOP_MODULATE4X:
1264 ureg_MUL(ureg, tmp, arg[1], arg[2]);
1265 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 4.0f));
1266 break;
1267 case D3DTOP_ADD:
1268 ureg_ADD(ureg, dst, arg[1], arg[2]);
1269 break;
1270 case D3DTOP_ADDSIGNED:
1271 ureg_ADD(ureg, tmp, arg[1], arg[2]);
1272 ureg_ADD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, -0.5f));
1273 break;
1274 case D3DTOP_ADDSIGNED2X:
1275 ureg_ADD(ureg, tmp, arg[1], arg[2]);
1276 ureg_MAD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1277 break;
1278 case D3DTOP_SUBTRACT:
1279 ureg_ADD(ureg, dst, arg[1], ureg_negate(arg[2]));
1280 break;
1281 case D3DTOP_ADDSMOOTH:
1282 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1]));
1283 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], arg[1]);
1284 break;
1285 case D3DTOP_BLENDDIFFUSEALPHA:
1286 ureg_LRP(ureg, dst, _WWWW(ps->vC[0]), arg[1], arg[2]);
1287 break;
1288 case D3DTOP_BLENDTEXTUREALPHA:
1289 /* XXX: alpha taken from previous stage, texture or result ? */
1290 ureg_LRP(ureg, dst, _W(ps->rTex), arg[1], arg[2]);
1291 break;
1292 case D3DTOP_BLENDFACTORALPHA:
1293 ureg_LRP(ureg, dst, _WWWW(_CONST(20)), arg[1], arg[2]);
1294 break;
1295 case D3DTOP_BLENDTEXTUREALPHAPM:
1296 ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_W(ps->rTex)));
1297 ureg_MAD(ureg, dst, arg[2], _X(tmp), arg[1]);
1298 break;
1299 case D3DTOP_BLENDCURRENTALPHA:
1300 ureg_LRP(ureg, dst, _WWWW(ps->rCurSrc), arg[1], arg[2]);
1301 break;
1302 case D3DTOP_PREMODULATE:
1303 ureg_MOV(ureg, dst, arg[1]);
1304 ps->stage.index_pre_mod = ps->stage.index + 1;
1305 break;
1306 case D3DTOP_MODULATEALPHA_ADDCOLOR:
1307 ureg_MAD(ureg, dst, _WWWW(arg[1]), arg[2], arg[1]);
1308 break;
1309 case D3DTOP_MODULATECOLOR_ADDALPHA:
1310 ureg_MAD(ureg, dst, arg[1], arg[2], _WWWW(arg[1]));
1311 break;
1312 case D3DTOP_MODULATEINVALPHA_ADDCOLOR:
1313 ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_WWWW(arg[1])));
1314 ureg_MAD(ureg, dst, _X(tmp), arg[2], arg[1]);
1315 break;
1316 case D3DTOP_MODULATEINVCOLOR_ADDALPHA:
1317 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1]));
1318 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], _WWWW(arg[1]));
1319 break;
1320 case D3DTOP_BUMPENVMAP:
1321 break;
1322 case D3DTOP_BUMPENVMAPLUMINANCE:
1323 break;
1324 case D3DTOP_DOTPRODUCT3:
1325 ureg_ADD(ureg, tmp, arg[1], ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5));
1326 ureg_ADD(ureg, tmp2, arg[2] , ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5));
1327 ureg_DP3(ureg, tmp, ureg_src(tmp), ureg_src(tmp2));
1328 ureg_MUL(ureg, ureg_saturate(dst), ureg_src(tmp), ureg_imm4f(ureg,4.0,4.0,4.0,4.0));
1329 break;
1330 case D3DTOP_MULTIPLYADD:
1331 ureg_MAD(ureg, dst, arg[1], arg[2], arg[0]);
1332 break;
1333 case D3DTOP_LERP:
1334 ureg_LRP(ureg, dst, arg[0], arg[1], arg[2]);
1335 break;
1336 case D3DTOP_DISABLE:
1337 /* no-op ? */
1338 break;
1339 default:
1340 assert(!"invalid D3DTOP");
1341 break;
1342 }
1343 ureg_release_temporary(ureg, tmp);
1344 ureg_release_temporary(ureg, tmp2);
1345 }
1346
1347 static void *
nine_ff_build_ps(struct NineDevice9 * device,struct nine_ff_ps_key * key)1348 nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
1349 {
1350 struct ps_build_ctx ps;
1351 struct ureg_program *ureg = ureg_create(PIPE_SHADER_FRAGMENT);
1352 struct ureg_dst oCol;
1353 unsigned s;
1354 const unsigned texcoord_sn = get_texcoord_sn(device->screen);
1355
1356 memset(&ps, 0, sizeof(ps));
1357 ps.ureg = ureg;
1358 ps.color_interpolate_flag = key->flatshade ? TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
1359 ps.stage.index_pre_mod = -1;
1360
1361 ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, ps.color_interpolate_flag);
1362
1363 ps.rCur = ureg_DECL_temporary(ureg);
1364 ps.rTmp = ureg_DECL_temporary(ureg);
1365 ps.rTex = ureg_DECL_temporary(ureg);
1366 ps.rCurSrc = ureg_src(ps.rCur);
1367 ps.rTmpSrc = ureg_src(ps.rTmp);
1368 ps.rTexSrc = ureg_src(ps.rTex);
1369
1370 /* Initial values */
1371 ureg_MOV(ureg, ps.rCur, ps.vC[0]);
1372 ureg_MOV(ureg, ps.rTmp, ureg_imm1f(ureg, 0.0f));
1373 ureg_MOV(ureg, ps.rTex, ureg_imm1f(ureg, 0.0f));
1374
1375 for (s = 0; s < 8; ++s) {
1376 ps.s[s] = ureg_src_undef();
1377
1378 if (key->ts[s].colorop != D3DTOP_DISABLE) {
1379 if (key->ts[s].colorarg0 == D3DTA_SPECULAR ||
1380 key->ts[s].colorarg1 == D3DTA_SPECULAR ||
1381 key->ts[s].colorarg2 == D3DTA_SPECULAR)
1382 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, ps.color_interpolate_flag);
1383
1384 if (key->ts[s].colorarg0 == D3DTA_TEXTURE ||
1385 key->ts[s].colorarg1 == D3DTA_TEXTURE ||
1386 key->ts[s].colorarg2 == D3DTA_TEXTURE ||
1387 key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHA ||
1388 key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHAPM) {
1389 ps.s[s] = ureg_DECL_sampler(ureg, s);
1390 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1391 }
1392 if (s && (key->ts[s - 1].colorop == D3DTOP_PREMODULATE ||
1393 key->ts[s - 1].alphaop == D3DTOP_PREMODULATE))
1394 ps.s[s] = ureg_DECL_sampler(ureg, s);
1395 }
1396
1397 if (key->ts[s].alphaop != D3DTOP_DISABLE) {
1398 if (key->ts[s].alphaarg0 == D3DTA_SPECULAR ||
1399 key->ts[s].alphaarg1 == D3DTA_SPECULAR ||
1400 key->ts[s].alphaarg2 == D3DTA_SPECULAR)
1401 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, ps.color_interpolate_flag);
1402
1403 if (key->ts[s].alphaarg0 == D3DTA_TEXTURE ||
1404 key->ts[s].alphaarg1 == D3DTA_TEXTURE ||
1405 key->ts[s].alphaarg2 == D3DTA_TEXTURE ||
1406 key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHA ||
1407 key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHAPM) {
1408 ps.s[s] = ureg_DECL_sampler(ureg, s);
1409 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1410 }
1411 }
1412 }
1413 if (key->specular)
1414 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, ps.color_interpolate_flag);
1415
1416 oCol = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
1417
1418 /* Run stages.
1419 */
1420 for (s = 0; s < 8; ++s) {
1421 unsigned colorarg[3];
1422 unsigned alphaarg[3];
1423 const uint8_t used_c = ps_d3dtop_args_mask(key->ts[s].colorop);
1424 const uint8_t used_a = ps_d3dtop_args_mask(key->ts[s].alphaop);
1425 struct ureg_dst dst;
1426 struct ureg_src arg[3];
1427
1428 if (key->ts[s].colorop == D3DTOP_DISABLE) {
1429 assert (key->ts[s].alphaop == D3DTOP_DISABLE);
1430 continue;
1431 }
1432 ps.stage.index = s;
1433
1434 DBG("STAGE[%u]: colorop=%s alphaop=%s\n", s,
1435 nine_D3DTOP_to_str(key->ts[s].colorop),
1436 nine_D3DTOP_to_str(key->ts[s].alphaop));
1437
1438 if (!ureg_src_is_undef(ps.s[s])) {
1439 unsigned target;
1440 struct ureg_src texture_coord = ps.vT[s];
1441 struct ureg_dst delta;
1442 switch (key->ts[s].textarget) {
1443 case 0: target = TGSI_TEXTURE_1D; break;
1444 case 1: target = TGSI_TEXTURE_2D; break;
1445 case 2: target = TGSI_TEXTURE_3D; break;
1446 case 3: target = TGSI_TEXTURE_CUBE; break;
1447 /* this is a 2 bit bitfield, do I really need a default case ? */
1448 }
1449
1450 /* Modify coordinates */
1451 if (s >= 1 &&
1452 (key->ts[s-1].colorop == D3DTOP_BUMPENVMAP ||
1453 key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)) {
1454 delta = ureg_DECL_temporary(ureg);
1455 /* Du' = D3DTSS_BUMPENVMAT00(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT10(stage s-1)*t(s-1)G */
1456 ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _X(ps.rTex), _XXXX(_CONST(8 + s - 1)));
1457 ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _Y(ps.rTex), _ZZZZ(_CONST(8 + s - 1)), ureg_src(delta));
1458 /* Dv' = D3DTSS_BUMPENVMAT01(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT11(stage s-1)*t(s-1)G */
1459 ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _X(ps.rTex), _YYYY(_CONST(8 + s - 1)));
1460 ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _Y(ps.rTex), _WWWW(_CONST(8 + s - 1)), ureg_src(delta));
1461 texture_coord = ureg_src(ureg_DECL_temporary(ureg));
1462 ureg_MOV(ureg, ureg_writemask(ureg_dst(texture_coord), ureg_dst(ps.vT[s]).WriteMask), ps.vT[s]);
1463 ureg_ADD(ureg, ureg_writemask(ureg_dst(texture_coord), TGSI_WRITEMASK_XY), texture_coord, ureg_src(delta));
1464 /* Prepare luminance multiplier
1465 * t(s)RGBA = t(s)RGBA * clamp[(t(s-1)B * D3DTSS_BUMPENVLSCALE(stage s-1)) + D3DTSS_BUMPENVLOFFSET(stage s-1)] */
1466 if (key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE) {
1467 struct ureg_src bumpenvlscale = ((s-1) & 1) ? _ZZZZ(_CONST(16 + (s-1) / 2)) : _XXXX(_CONST(16 + (s-1) / 2));
1468 struct ureg_src bumpenvloffset = ((s-1) & 1) ? _WWWW(_CONST(16 + (s-1) / 2)) : _YYYY(_CONST(16 + (s-1) / 2));
1469
1470 ureg_MAD(ureg, ureg_saturate(ureg_writemask(delta, TGSI_WRITEMASK_X)), _Z(ps.rTex), bumpenvlscale, bumpenvloffset);
1471 }
1472 }
1473 if (key->projected & (3 << (s *2))) {
1474 unsigned dim = 1 + ((key->projected >> (2 * s)) & 3);
1475 if (dim == 4)
1476 ureg_TXP(ureg, ps.rTex, target, texture_coord, ps.s[s]);
1477 else {
1478 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1479 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(texture_coord, dim-1));
1480 ureg_MUL(ureg, ps.rTmp, _X(tmp), texture_coord);
1481 ureg_TEX(ureg, ps.rTex, target, ps.rTmpSrc, ps.s[s]);
1482 ureg_release_temporary(ureg, tmp);
1483 }
1484 } else {
1485 ureg_TEX(ureg, ps.rTex, target, texture_coord, ps.s[s]);
1486 }
1487 if (s >= 1 && key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)
1488 ureg_MUL(ureg, ps.rTex, ureg_src(ps.rTex), _X(delta));
1489 }
1490
1491 if (key->ts[s].colorop == D3DTOP_BUMPENVMAP ||
1492 key->ts[s].colorop == D3DTOP_BUMPENVMAPLUMINANCE)
1493 continue;
1494
1495 dst = ps_get_ts_dst(&ps, key->ts[s].resultarg ? D3DTA_TEMP : D3DTA_CURRENT);
1496
1497 if (ps.stage.index_pre_mod == ps.stage.index) {
1498 ps.rMod = ureg_DECL_temporary(ureg);
1499 ureg_MUL(ureg, ps.rMod, ps.rCurSrc, ps.rTexSrc);
1500 }
1501
1502 colorarg[0] = (key->ts[s].colorarg0 | (((key->colorarg_b4[0] >> s) & 0x1) << 4) | ((key->colorarg_b5[0] >> s) << 5)) & 0x3f;
1503 colorarg[1] = (key->ts[s].colorarg1 | (((key->colorarg_b4[1] >> s) & 0x1) << 4) | ((key->colorarg_b5[1] >> s) << 5)) & 0x3f;
1504 colorarg[2] = (key->ts[s].colorarg2 | (((key->colorarg_b4[2] >> s) & 0x1) << 4) | ((key->colorarg_b5[2] >> s) << 5)) & 0x3f;
1505 alphaarg[0] = (key->ts[s].alphaarg0 | ((key->alphaarg_b4[0] >> s) << 4)) & 0x1f;
1506 alphaarg[1] = (key->ts[s].alphaarg1 | ((key->alphaarg_b4[1] >> s) << 4)) & 0x1f;
1507 alphaarg[2] = (key->ts[s].alphaarg2 | ((key->alphaarg_b4[2] >> s) << 4)) & 0x1f;
1508
1509 if (key->ts[s].colorop != key->ts[s].alphaop ||
1510 colorarg[0] != alphaarg[0] ||
1511 colorarg[1] != alphaarg[1] ||
1512 colorarg[2] != alphaarg[2])
1513 dst.WriteMask = TGSI_WRITEMASK_XYZ;
1514
1515 /* Special DOTPRODUCT behaviour (see wine tests) */
1516 if (key->ts[s].colorop == D3DTOP_DOTPRODUCT3)
1517 dst.WriteMask = TGSI_WRITEMASK_XYZW;
1518
1519 if (used_c & 0x1) arg[0] = ps_get_ts_arg(&ps, colorarg[0]);
1520 if (used_c & 0x2) arg[1] = ps_get_ts_arg(&ps, colorarg[1]);
1521 if (used_c & 0x4) arg[2] = ps_get_ts_arg(&ps, colorarg[2]);
1522 ps_do_ts_op(&ps, key->ts[s].colorop, dst, arg);
1523
1524 if (dst.WriteMask != TGSI_WRITEMASK_XYZW) {
1525 dst.WriteMask = TGSI_WRITEMASK_W;
1526
1527 if (used_a & 0x1) arg[0] = ps_get_ts_arg(&ps, alphaarg[0]);
1528 if (used_a & 0x2) arg[1] = ps_get_ts_arg(&ps, alphaarg[1]);
1529 if (used_a & 0x4) arg[2] = ps_get_ts_arg(&ps, alphaarg[2]);
1530 ps_do_ts_op(&ps, key->ts[s].alphaop, dst, arg);
1531 }
1532 }
1533
1534 if (key->specular)
1535 ureg_ADD(ureg, ureg_writemask(ps.rCur, TGSI_WRITEMASK_XYZ), ps.rCurSrc, ps.vC[1]);
1536
1537 if (key->alpha_test_emulation == PIPE_FUNC_NEVER) {
1538 ureg_KILL(ureg);
1539 } else if (key->alpha_test_emulation != PIPE_FUNC_ALWAYS) {
1540 unsigned cmp_op;
1541 struct ureg_src src[2];
1542 struct ureg_dst tmp = ps.rTmp;
1543 cmp_op = pipe_comp_to_tgsi_opposite(key->alpha_test_emulation);
1544 src[0] = ureg_scalar(ps.rCurSrc, TGSI_SWIZZLE_W); /* Read color alpha channel */
1545 src[1] = _WWWW(_CONST(22)); /* Read alpha ref */
1546 ureg_insn(ureg, cmp_op, &tmp, 1, src, 2, 0);
1547 ureg_KILL_IF(ureg, ureg_negate(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X))); /* if opposite test passes, discard */
1548 }
1549
1550 /* Fog.
1551 */
1552 if (key->fog_mode) {
1553 struct ureg_dst rFog = ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X);
1554 struct ureg_src vPos;
1555 if (device->screen->caps.fs_position_is_sysval) {
1556 vPos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
1557 } else {
1558 vPos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
1559 TGSI_INTERPOLATE_LINEAR);
1560 }
1561
1562 /* Source is either W or Z.
1563 * Z is when an orthogonal projection matrix is detected,
1564 * W (WFOG) else.
1565 */
1566 if (!key->fog_source)
1567 ureg_MOV(ureg, rFog, _ZZZZ(vPos));
1568 else
1569 /* Position's w is 1/w */
1570 ureg_RCP(ureg, rFog, _WWWW(vPos));
1571
1572 if (key->fog_mode == D3DFOG_EXP) {
1573 ureg_MUL(ureg, rFog, _X(rFog), _ZZZZ(_CONST(22)));
1574 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
1575 ureg_EX2(ureg, rFog, _X(rFog));
1576 } else
1577 if (key->fog_mode == D3DFOG_EXP2) {
1578 ureg_MUL(ureg, rFog, _X(rFog), _ZZZZ(_CONST(22)));
1579 ureg_MUL(ureg, rFog, _X(rFog), _X(rFog));
1580 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
1581 ureg_EX2(ureg, rFog, _X(rFog));
1582 } else
1583 if (key->fog_mode == D3DFOG_LINEAR) {
1584 ureg_ADD(ureg, rFog, _XXXX(_CONST(22)), ureg_negate(_X(rFog)));
1585 ureg_MUL(ureg, ureg_saturate(rFog), _X(rFog), _YYYY(_CONST(22)));
1586 }
1587 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _X(rFog), ps.rCurSrc, _CONST(21));
1588 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
1589 } else
1590 if (key->fog) {
1591 struct ureg_src vFog = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 16, TGSI_INTERPOLATE_PERSPECTIVE);
1592 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _XXXX(vFog), ps.rCurSrc, _CONST(21));
1593 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
1594 } else {
1595 ureg_MOV(ureg, oCol, ps.rCurSrc);
1596 }
1597
1598 ureg_END(ureg);
1599 nine_ureg_tgsi_dump(ureg, false);
1600 return nine_create_shader_with_so_and_destroy(ureg, device->context.pipe, NULL);
1601 }
1602
1603 static struct NineVertexShader9 *
nine_ff_get_vs(struct NineDevice9 * device)1604 nine_ff_get_vs(struct NineDevice9 *device)
1605 {
1606 const struct nine_context *context = &device->context;
1607 struct NineVertexShader9 *vs;
1608 struct vs_build_ctx bld;
1609 struct nine_ff_vs_key key;
1610 unsigned s, i;
1611 bool has_indexes = false;
1612 bool has_weights = false;
1613 int8_t input_texture_coord[8];
1614
1615 assert(sizeof(key) <= sizeof(key.value32));
1616
1617 memset(&key, 0, sizeof(key));
1618 memset(&bld, 0, sizeof(bld));
1619 memset(&input_texture_coord, 0, sizeof(input_texture_coord));
1620
1621 bld.key = &key;
1622
1623 /* FIXME: this shouldn't be NULL, but it is on init */
1624 if (context->vdecl) {
1625 key.color0in_one = 1;
1626 key.color1in_zero = 1;
1627 for (i = 0; i < context->vdecl->nelems; i++) {
1628 uint16_t usage = context->vdecl->usage_map[i];
1629 if (usage == NINE_DECLUSAGE_POSITIONT)
1630 key.position_t = 1;
1631 else if (usage == NINE_DECLUSAGE_i(COLOR, 0))
1632 key.color0in_one = 0;
1633 else if (usage == NINE_DECLUSAGE_i(COLOR, 1))
1634 key.color1in_zero = 0;
1635 else if (usage == NINE_DECLUSAGE_i(BLENDINDICES, 0)) {
1636 has_indexes = true;
1637 key.passthrough |= 1 << usage;
1638 } else if (usage == NINE_DECLUSAGE_i(BLENDWEIGHT, 0)) {
1639 has_weights = true;
1640 key.passthrough |= 1 << usage;
1641 } else if (usage == NINE_DECLUSAGE_i(NORMAL, 0)) {
1642 key.has_normal = 1;
1643 key.passthrough |= 1 << usage;
1644 } else if (usage == NINE_DECLUSAGE_PSIZE)
1645 key.vertexpointsize = 1;
1646 else if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) {
1647 s = usage / NINE_DECLUSAGE_COUNT;
1648 if (s < 8)
1649 input_texture_coord[s] = nine_decltype_get_dim(context->vdecl->decls[i].Type);
1650 else
1651 DBG("FF given texture coordinate >= 8. Ignoring\n");
1652 } else if (usage < NINE_DECLUSAGE_NONE)
1653 key.passthrough |= 1 << usage;
1654 }
1655 }
1656 /* ff vs + ps 3.0: some elements are passed to the ps (wine test).
1657 * We do restrict to indices 0 */
1658 key.passthrough &= ~((1 << NINE_DECLUSAGE_POSITION) | (1 << NINE_DECLUSAGE_PSIZE) |
1659 (1 << NINE_DECLUSAGE_TEXCOORD) | (1 << NINE_DECLUSAGE_POSITIONT) |
1660 (1 << NINE_DECLUSAGE_TESSFACTOR) | (1 << NINE_DECLUSAGE_SAMPLE));
1661 if (!key.position_t)
1662 key.passthrough = 0;
1663 key.pointscale = !!context->rs[D3DRS_POINTSCALEENABLE];
1664
1665 key.lighting = !!context->rs[D3DRS_LIGHTING] && context->ff.num_lights_active;
1666 key.darkness = !!context->rs[D3DRS_LIGHTING] && !context->ff.num_lights_active;
1667 if (key.position_t) {
1668 key.darkness = 0; /* |= key.lighting; */ /* XXX ? */
1669 key.lighting = 0;
1670 }
1671 if ((key.lighting | key.darkness) && context->rs[D3DRS_COLORVERTEX]) {
1672 uint32_t mask = (key.color0in_one ? 0 : 1) | (key.color1in_zero ? 0 : 2);
1673 key.mtl_diffuse = context->rs[D3DRS_DIFFUSEMATERIALSOURCE] & mask;
1674 key.mtl_ambient = context->rs[D3DRS_AMBIENTMATERIALSOURCE] & mask;
1675 key.mtl_specular = context->rs[D3DRS_SPECULARMATERIALSOURCE] & mask;
1676 key.mtl_emissive = context->rs[D3DRS_EMISSIVEMATERIALSOURCE] & mask;
1677 }
1678 key.fog = !!context->rs[D3DRS_FOGENABLE];
1679 key.fog_mode = (!key.position_t && context->rs[D3DRS_FOGENABLE]) ? context->rs[D3DRS_FOGVERTEXMODE] : 0;
1680 if (key.fog_mode)
1681 key.fog_range = context->rs[D3DRS_RANGEFOGENABLE];
1682
1683 key.localviewer = !!context->rs[D3DRS_LOCALVIEWER];
1684 key.normalizenormals = !!context->rs[D3DRS_NORMALIZENORMALS];
1685 key.ucp = !!context->rs[D3DRS_CLIPPLANEENABLE];
1686 key.clipplane_emulate = device->driver_caps.emulate_ucp ? (context->rs[D3DRS_CLIPPLANEENABLE] & 0xff) : 0;
1687
1688 if (context->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
1689 key.vertexblend_indexed = !!context->rs[D3DRS_INDEXEDVERTEXBLENDENABLE] && has_indexes;
1690
1691 switch (context->rs[D3DRS_VERTEXBLEND]) {
1692 case D3DVBF_0WEIGHTS: key.vertexblend = key.vertexblend_indexed; break;
1693 case D3DVBF_1WEIGHTS: key.vertexblend = 2; break;
1694 case D3DVBF_2WEIGHTS: key.vertexblend = 3; break;
1695 case D3DVBF_3WEIGHTS: key.vertexblend = 4; break;
1696 case D3DVBF_TWEENING: key.vertextween = 1; break;
1697 default:
1698 assert(!"invalid D3DVBF");
1699 break;
1700 }
1701 if (!has_weights && context->rs[D3DRS_VERTEXBLEND] != D3DVBF_0WEIGHTS)
1702 key.vertexblend = 0; /* TODO: if key.vertexblend_indexed, perhaps it should use 1.0 as weight, or revert to D3DVBF_0WEIGHTS */
1703 }
1704
1705 for (s = 0; s < 8; ++s) {
1706 unsigned gen = (context->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1;
1707 unsigned idx = context->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] & 7;
1708 unsigned dim;
1709
1710 if (key.position_t && gen > NINED3DTSS_TCI_PASSTHRU)
1711 gen = NINED3DTSS_TCI_PASSTHRU;
1712
1713 if (!input_texture_coord[idx] && gen == NINED3DTSS_TCI_PASSTHRU)
1714 gen = NINED3DTSS_TCI_DISABLE;
1715
1716 key.tc_gen |= gen << (s * 3);
1717 key.tc_idx |= idx << (s * 3);
1718 key.tc_dim_input |= ((input_texture_coord[idx]-1) & 0x3) << (s * 2);
1719
1720 dim = context->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7;
1721 if (dim > 4)
1722 dim = input_texture_coord[idx];
1723 if (dim == 1) /* NV behaviour */
1724 dim = 0;
1725 key.tc_dim_output |= dim << (s * 3);
1726 }
1727
1728 DBG("VS ff key hash: %x\n", nine_ff_vs_key_hash(&key));
1729 vs = util_hash_table_get(device->ff.ht_vs, &key);
1730 if (vs)
1731 return vs;
1732 NineVertexShader9_new(device, &vs, NULL, nine_ff_build_vs(device, &bld));
1733
1734 nine_ff_prune_vs(device);
1735 if (vs) {
1736 unsigned n;
1737
1738 memcpy(&vs->ff_key, &key, sizeof(vs->ff_key));
1739
1740 _mesa_hash_table_insert(device->ff.ht_vs, &vs->ff_key, vs);
1741 device->ff.num_vs++;
1742
1743 vs->num_inputs = bld.num_inputs;
1744 for (n = 0; n < bld.num_inputs; ++n)
1745 vs->input_map[n].ndecl = bld.input[n];
1746
1747 vs->position_t = key.position_t;
1748 vs->point_size = key.vertexpointsize | key.pointscale | device->driver_caps.always_output_pointsize;
1749 }
1750 return vs;
1751 }
1752
1753 #define GET_D3DTS(n) nine_state_access_transform(&context->ff, D3DTS_##n, FALSE)
1754 #define IS_D3DTS_DIRTY(s,n) ((s)->ff.changed.transform[(D3DTS_##n) / 32] & (1 << ((D3DTS_##n) % 32)))
1755
1756 static struct NinePixelShader9 *
nine_ff_get_ps(struct NineDevice9 * device)1757 nine_ff_get_ps(struct NineDevice9 *device)
1758 {
1759 struct nine_context *context = &device->context;
1760 struct NinePixelShader9 *ps;
1761 struct nine_ff_ps_key key;
1762 unsigned s;
1763 uint8_t sampler_mask = 0;
1764
1765 assert(sizeof(key) <= sizeof(key.value32));
1766
1767 memset(&key, 0, sizeof(key));
1768 for (s = 0; s < 8; ++s) {
1769 key.ts[s].colorop = context->ff.tex_stage[s][D3DTSS_COLOROP];
1770 key.ts[s].alphaop = context->ff.tex_stage[s][D3DTSS_ALPHAOP];
1771 const uint8_t used_c = ps_d3dtop_args_mask(key.ts[s].colorop);
1772 const uint8_t used_a = ps_d3dtop_args_mask(key.ts[s].alphaop);
1773 /* MSDN says D3DTOP_DISABLE disables this and all subsequent stages.
1774 * ALPHAOP cannot be enabled if COLOROP is disabled.
1775 * Verified on Windows. */
1776 if (key.ts[s].colorop == D3DTOP_DISABLE) {
1777 key.ts[s].alphaop = D3DTOP_DISABLE; /* DISABLE == 1, avoid degenerate keys */
1778 break;
1779 }
1780
1781 if (!context->texture[s].enabled &&
1782 ((context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE &&
1783 used_c & 0x1) ||
1784 (context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE &&
1785 used_c & 0x2) ||
1786 (context->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE &&
1787 used_c & 0x4))) {
1788 /* Tested on Windows: Invalid texture read disables the stage
1789 * and the subsequent ones, but only for colorop. For alpha,
1790 * it's as if the texture had alpha of 1.0, which is what
1791 * has our dummy texture in that case. Invalid color also
1792 * disabled the following alpha stages. */
1793 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
1794 break;
1795 }
1796
1797 if (context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE ||
1798 context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE ||
1799 context->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE ||
1800 context->ff.tex_stage[s][D3DTSS_ALPHAARG0] == D3DTA_TEXTURE ||
1801 context->ff.tex_stage[s][D3DTSS_ALPHAARG1] == D3DTA_TEXTURE ||
1802 context->ff.tex_stage[s][D3DTSS_ALPHAARG2] == D3DTA_TEXTURE)
1803 sampler_mask |= (1 << s);
1804
1805 if (key.ts[s].colorop != D3DTOP_DISABLE) {
1806 if (used_c & 0x1) key.ts[s].colorarg0 = context->ff.tex_stage[s][D3DTSS_COLORARG0] & 0x7;
1807 if (used_c & 0x2) key.ts[s].colorarg1 = context->ff.tex_stage[s][D3DTSS_COLORARG1] & 0x7;
1808 if (used_c & 0x4) key.ts[s].colorarg2 = context->ff.tex_stage[s][D3DTSS_COLORARG2] & 0x7;
1809 if (used_c & 0x1) key.colorarg_b4[0] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 4) & 0x1) << s;
1810 if (used_c & 0x1) key.colorarg_b5[0] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 5) & 0x1) << s;
1811 if (used_c & 0x2) key.colorarg_b4[1] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 4) & 0x1) << s;
1812 if (used_c & 0x2) key.colorarg_b5[1] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 5) & 0x1) << s;
1813 if (used_c & 0x4) key.colorarg_b4[2] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 4) & 0x1) << s;
1814 if (used_c & 0x4) key.colorarg_b5[2] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 5) & 0x1) << s;
1815 }
1816 if (key.ts[s].alphaop != D3DTOP_DISABLE) {
1817 if (used_a & 0x1) key.ts[s].alphaarg0 = context->ff.tex_stage[s][D3DTSS_ALPHAARG0] & 0x7;
1818 if (used_a & 0x2) key.ts[s].alphaarg1 = context->ff.tex_stage[s][D3DTSS_ALPHAARG1] & 0x7;
1819 if (used_a & 0x4) key.ts[s].alphaarg2 = context->ff.tex_stage[s][D3DTSS_ALPHAARG2] & 0x7;
1820 if (used_a & 0x1) key.alphaarg_b4[0] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG0] >> 4) & 0x1) << s;
1821 if (used_a & 0x2) key.alphaarg_b4[1] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG1] >> 4) & 0x1) << s;
1822 if (used_a & 0x4) key.alphaarg_b4[2] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG2] >> 4) & 0x1) << s;
1823 }
1824 key.ts[s].resultarg = context->ff.tex_stage[s][D3DTSS_RESULTARG] == D3DTA_TEMP;
1825
1826 if (context->texture[s].enabled) {
1827 switch (context->texture[s].type) {
1828 case D3DRTYPE_TEXTURE: key.ts[s].textarget = 1; break;
1829 case D3DRTYPE_VOLUMETEXTURE: key.ts[s].textarget = 2; break;
1830 case D3DRTYPE_CUBETEXTURE: key.ts[s].textarget = 3; break;
1831 default:
1832 assert(!"unexpected texture type");
1833 break;
1834 }
1835 } else {
1836 key.ts[s].textarget = 1;
1837 }
1838 }
1839
1840 /* Note: If colorop is D3DTOP_DISABLE for the first stage
1841 * (which implies alphaop is too), nothing particular happens,
1842 * that is, current is equal to diffuse (which is the case anyway,
1843 * because it is how it is initialized).
1844 * Special case seems if alphaop is D3DTOP_DISABLE and not colorop,
1845 * because then if the resultarg is TEMP, then diffuse alpha is written
1846 * to it. */
1847 if (key.ts[0].colorop != D3DTOP_DISABLE &&
1848 key.ts[0].alphaop == D3DTOP_DISABLE &&
1849 key.ts[0].resultarg != 0) {
1850 key.ts[0].alphaop = D3DTOP_SELECTARG1;
1851 key.ts[0].alphaarg1 = D3DTA_DIFFUSE;
1852 }
1853 /* When no alpha stage writes to current, diffuse alpha is taken.
1854 * Since we initialize current to diffuse, we have the behaviour. */
1855
1856 /* Last stage always writes to Current */
1857 if (s >= 1)
1858 key.ts[s-1].resultarg = 0;
1859
1860 key.projected = nine_ff_get_projected_key_ff(context);
1861 key.specular = !!context->rs[D3DRS_SPECULARENABLE];
1862 key.flatshade = context->rs[D3DRS_SHADEMODE] == D3DSHADE_FLAT;
1863
1864 for (; s < 8; ++s)
1865 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
1866 if (context->rs[D3DRS_FOGENABLE])
1867 key.fog_mode = context->rs[D3DRS_FOGTABLEMODE];
1868 key.fog = !!context->rs[D3DRS_FOGENABLE];
1869 if (key.fog_mode && key.fog)
1870 key.fog_source = !context->zfog;
1871 key.alpha_test_emulation = context->rs[NINED3DRS_EMULATED_ALPHATEST] & 0x7;
1872
1873 DBG("PS ff key hash: %x\n", nine_ff_ps_key_hash(&key));
1874 ps = util_hash_table_get(device->ff.ht_ps, &key);
1875 if (ps)
1876 return ps;
1877 NinePixelShader9_new(device, &ps, NULL, nine_ff_build_ps(device, &key));
1878
1879 nine_ff_prune_ps(device);
1880 if (ps) {
1881 memcpy(&ps->ff_key, &key, sizeof(ps->ff_key));
1882
1883 _mesa_hash_table_insert(device->ff.ht_ps, &ps->ff_key, ps);
1884 device->ff.num_ps++;
1885
1886 ps->rt_mask = 0x1;
1887 ps->sampler_mask = sampler_mask;
1888 }
1889 return ps;
1890 }
1891
1892 static void
nine_ff_load_vs_transforms(struct NineDevice9 * device)1893 nine_ff_load_vs_transforms(struct NineDevice9 *device)
1894 {
1895 struct nine_context *context = &device->context;
1896 D3DMATRIX T;
1897 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
1898 unsigned i;
1899
1900 /* TODO: make this nicer, and only upload the ones we need */
1901 /* TODO: use ff.vs_const as storage of W, V, P matrices */
1902
1903 if (IS_D3DTS_DIRTY(context, WORLD) ||
1904 IS_D3DTS_DIRTY(context, VIEW) ||
1905 IS_D3DTS_DIRTY(context, PROJECTION)) {
1906 /* WVP, WV matrices */
1907 nine_d3d_matrix_matrix_mul(&M[1], GET_D3DTS(WORLD), GET_D3DTS(VIEW));
1908 nine_d3d_matrix_matrix_mul(&M[0], &M[1], GET_D3DTS(PROJECTION));
1909
1910 /* normal matrix == transpose(inverse(WV)) */
1911 nine_d3d_matrix_inverse(&T, &M[1]);
1912 nine_d3d_matrix_transpose(&M[4], &T);
1913
1914 /* P matrix */
1915 M[2] = *GET_D3DTS(PROJECTION);
1916
1917 /* V and W matrix */
1918 nine_d3d_matrix_inverse(&M[3], GET_D3DTS(VIEW));
1919 M[40] = M[1];
1920 }
1921
1922 if (context->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
1923 /* load other world matrices */
1924 for (i = 1; i <= 8; ++i) {
1925 nine_d3d_matrix_matrix_mul(&M[40 + i], GET_D3DTS(WORLDMATRIX(i)), GET_D3DTS(VIEW));
1926 }
1927 }
1928
1929 device->ff.vs_const[30 * 4] = asfloat(context->rs[D3DRS_TWEENFACTOR]);
1930 }
1931
1932 static void
nine_ff_load_lights(struct NineDevice9 * device)1933 nine_ff_load_lights(struct NineDevice9 *device)
1934 {
1935 struct nine_context *context = &device->context;
1936 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1937 unsigned l;
1938
1939 if (context->changed.group & NINE_STATE_FF_MATERIAL) {
1940 const D3DMATERIAL9 *mtl = &context->ff.material;
1941
1942 memcpy(&dst[20], &mtl->Diffuse, 4 * sizeof(float));
1943 memcpy(&dst[21], &mtl->Ambient, 4 * sizeof(float));
1944 memcpy(&dst[22], &mtl->Specular, 4 * sizeof(float));
1945 dst[23].x = mtl->Power;
1946 memcpy(&dst[24], &mtl->Emissive, 4 * sizeof(float));
1947 d3dcolor_to_rgba(&dst[25].x, context->rs[D3DRS_AMBIENT]);
1948 dst[19].x = dst[25].x * mtl->Ambient.r + mtl->Emissive.r;
1949 dst[19].y = dst[25].y * mtl->Ambient.g + mtl->Emissive.g;
1950 dst[19].z = dst[25].z * mtl->Ambient.b + mtl->Emissive.b;
1951 }
1952
1953 if (!(context->changed.group & NINE_STATE_FF_LIGHTING) && !IS_D3DTS_DIRTY(context, VIEW))
1954 return;
1955
1956 for (l = 0; l < context->ff.num_lights_active; ++l) {
1957 const D3DLIGHT9 *light = &context->ff.light[context->ff.active_light[l]];
1958
1959 dst[32 + l * 8].x = light->Type;
1960 dst[32 + l * 8].y = light->Attenuation0;
1961 dst[32 + l * 8].z = light->Attenuation1;
1962 dst[32 + l * 8].w = light->Attenuation2;
1963 memcpy(&dst[33 + l * 8].x, &light->Diffuse, sizeof(light->Diffuse));
1964 memcpy(&dst[34 + l * 8].x, &light->Specular, sizeof(light->Specular));
1965 memcpy(&dst[35 + l * 8].x, &light->Ambient, sizeof(light->Ambient));
1966 nine_d3d_vector4_matrix_mul((D3DVECTOR *)&dst[36 + l * 8].x, &light->Position, GET_D3DTS(VIEW));
1967 nine_d3d_vector3_matrix_mul((D3DVECTOR *)&dst[37 + l * 8].x, &light->Direction, GET_D3DTS(VIEW));
1968 dst[36 + l * 8].w = light->Type == D3DLIGHT_DIRECTIONAL ? 1e9f : light->Range;
1969 dst[37 + l * 8].w = light->Falloff;
1970 dst[38 + l * 8].x = cosf(light->Theta * 0.5f);
1971 dst[38 + l * 8].y = cosf(light->Phi * 0.5f);
1972 dst[38 + l * 8].z = 1.0f / (dst[38 + l * 8].x - dst[38 + l * 8].y);
1973 dst[39 + l * 8].w = (float)((l + 1) == context->ff.num_lights_active);
1974 }
1975 }
1976
1977 static void
nine_ff_load_point_and_fog_params(struct NineDevice9 * device)1978 nine_ff_load_point_and_fog_params(struct NineDevice9 *device)
1979 {
1980 struct nine_context *context = &device->context;
1981 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1982
1983 if (!(context->changed.group & NINE_STATE_FF_VS_OTHER))
1984 return;
1985 dst[26].x = asfloat(context->rs[D3DRS_POINTSIZE_MIN]);
1986 dst[26].y = asfloat(context->rs[D3DRS_POINTSIZE_MAX]);
1987 dst[26].z = CLAMP(asfloat(context->rs[D3DRS_POINTSIZE]),
1988 asfloat(context->rs[D3DRS_POINTSIZE_MIN]),
1989 asfloat(context->rs[D3DRS_POINTSIZE_MAX]));
1990 dst[26].w = asfloat(context->rs[D3DRS_POINTSCALE_A]);
1991 dst[27].x = asfloat(context->rs[D3DRS_POINTSCALE_B]);
1992 dst[27].y = asfloat(context->rs[D3DRS_POINTSCALE_C]);
1993 dst[28].x = asfloat(context->rs[D3DRS_FOGEND]);
1994 dst[28].y = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART]));
1995 if (isinf(dst[28].y))
1996 dst[28].y = 0.0f;
1997 dst[28].z = asfloat(context->rs[D3DRS_FOGDENSITY]);
1998 if (device->driver_caps.emulate_ucp)
1999 memcpy(&dst[196], &context->clip.ucp, sizeof(context->clip));
2000 }
2001
2002 static void
nine_ff_load_tex_matrices(struct NineDevice9 * device)2003 nine_ff_load_tex_matrices(struct NineDevice9 *device)
2004 {
2005 struct nine_context *context = &device->context;
2006 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
2007 unsigned s;
2008
2009 if (!(context->ff.changed.transform[0] & 0xff0000))
2010 return;
2011 for (s = 0; s < 8; ++s) {
2012 if (IS_D3DTS_DIRTY(context, TEXTURE0 + s))
2013 nine_d3d_matrix_transpose(&M[32 + s], nine_state_access_transform(&context->ff, D3DTS_TEXTURE0 + s, false));
2014 }
2015 }
2016
2017 static void
nine_ff_load_ps_params(struct NineDevice9 * device)2018 nine_ff_load_ps_params(struct NineDevice9 *device)
2019 {
2020 struct nine_context *context = &device->context;
2021 struct fvec4 *dst = (struct fvec4 *)device->ff.ps_const;
2022 unsigned s;
2023
2024 if (!(context->changed.group & NINE_STATE_FF_PS_CONSTS))
2025 return;
2026
2027 for (s = 0; s < 8; ++s)
2028 d3dcolor_to_rgba(&dst[s].x, context->ff.tex_stage[s][D3DTSS_CONSTANT]);
2029
2030 for (s = 0; s < 8; ++s) {
2031 dst[8 + s].x = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT00]);
2032 dst[8 + s].y = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT01]);
2033 dst[8 + s].z = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT10]);
2034 dst[8 + s].w = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT11]);
2035 if (s & 1) {
2036 dst[16 + s / 2].z = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
2037 dst[16 + s / 2].w = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
2038 } else {
2039 dst[16 + s / 2].x = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
2040 dst[16 + s / 2].y = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
2041 }
2042 }
2043
2044 d3dcolor_to_rgba(&dst[20].x, context->rs[D3DRS_TEXTUREFACTOR]);
2045 d3dcolor_to_rgba(&dst[21].x, context->rs[D3DRS_FOGCOLOR]);
2046 dst[22].x = asfloat(context->rs[D3DRS_FOGEND]);
2047 dst[22].y = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART]));
2048 dst[22].z = asfloat(context->rs[D3DRS_FOGDENSITY]);
2049 dst[22].w = (float)context->rs[D3DRS_ALPHAREF] / 255.f;
2050 }
2051
2052 static void
nine_ff_load_viewport_info(struct NineDevice9 * device)2053 nine_ff_load_viewport_info(struct NineDevice9 *device)
2054 {
2055 D3DVIEWPORT9 *viewport = &device->context.viewport;
2056 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
2057 float diffZ = viewport->MaxZ - viewport->MinZ;
2058
2059 /* Note: the other functions avoids to fill the const again if nothing changed.
2060 * But we don't have much to fill, and adding code to allow that may be complex
2061 * so just fill it always */
2062 dst[100].x = 2.0f / (float)(viewport->Width);
2063 dst[100].y = 2.0f / (float)(viewport->Height);
2064 dst[100].z = (diffZ == 0.0f) ? 0.0f : (1.0f / diffZ);
2065 dst[100].w = (float)(viewport->Width);
2066 dst[101].x = (float)(viewport->X);
2067 dst[101].y = (float)(viewport->Y);
2068 dst[101].z = (float)(viewport->MinZ);
2069 }
2070
2071 void
nine_ff_update(struct NineDevice9 * device)2072 nine_ff_update(struct NineDevice9 *device)
2073 {
2074 struct nine_context *context = &device->context;
2075 struct pipe_constant_buffer cb;
2076
2077 DBG("vs=%p ps=%p\n", context->vs, context->ps);
2078
2079 /* NOTE: the only reference belongs to the hash table */
2080 if (!context->programmable_vs) {
2081 device->ff.vs = nine_ff_get_vs(device);
2082 context->changed.group |= NINE_STATE_VS;
2083 }
2084 if (!context->ps) {
2085 device->ff.ps = nine_ff_get_ps(device);
2086 context->changed.group |= NINE_STATE_PS;
2087 }
2088
2089 if (!context->programmable_vs) {
2090 nine_ff_load_vs_transforms(device);
2091 nine_ff_load_tex_matrices(device);
2092 nine_ff_load_lights(device);
2093 nine_ff_load_point_and_fog_params(device);
2094 nine_ff_load_viewport_info(device);
2095
2096 memset(context->ff.changed.transform, 0, sizeof(context->ff.changed.transform));
2097
2098 cb.buffer_offset = 0;
2099 cb.buffer = NULL;
2100 cb.user_buffer = device->ff.vs_const;
2101 cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float);
2102
2103 context->pipe_data.cb_vs_ff = cb;
2104 context->commit |= NINE_STATE_COMMIT_CONST_VS;
2105
2106 context->changed.group &= ~NINE_STATE_FF_VS;
2107 }
2108
2109 if (!context->ps) {
2110 nine_ff_load_ps_params(device);
2111
2112 cb.buffer_offset = 0;
2113 cb.buffer = NULL;
2114 cb.user_buffer = device->ff.ps_const;
2115 cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float);
2116
2117 context->pipe_data.cb_ps_ff = cb;
2118 context->commit |= NINE_STATE_COMMIT_CONST_PS;
2119
2120 context->changed.group &= ~NINE_STATE_FF_PS;
2121 }
2122 }
2123
2124
2125 bool
nine_ff_init(struct NineDevice9 * device)2126 nine_ff_init(struct NineDevice9 *device)
2127 {
2128 device->ff.ht_vs = _mesa_hash_table_create(NULL, nine_ff_vs_key_hash,
2129 nine_ff_vs_key_comp);
2130 device->ff.ht_ps = _mesa_hash_table_create(NULL, nine_ff_ps_key_hash,
2131 nine_ff_ps_key_comp);
2132
2133 device->ff.ht_fvf = _mesa_hash_table_create(NULL, nine_ff_fvf_key_hash,
2134 nine_ff_fvf_key_comp);
2135
2136 device->ff.vs_const = CALLOC(NINE_FF_NUM_VS_CONST, 4 * sizeof(float));
2137 device->ff.ps_const = CALLOC(NINE_FF_NUM_PS_CONST, 4 * sizeof(float));
2138
2139 return device->ff.ht_vs && device->ff.ht_ps &&
2140 device->ff.ht_fvf &&
2141 device->ff.vs_const && device->ff.ps_const;
2142 }
2143
nine_ff_ht_delete_cb(void * key,void * value,void * data)2144 static enum pipe_error nine_ff_ht_delete_cb(void *key, void *value, void *data)
2145 {
2146 NineUnknown_Unbind(NineUnknown(value));
2147 return PIPE_OK;
2148 }
2149
2150 void
nine_ff_fini(struct NineDevice9 * device)2151 nine_ff_fini(struct NineDevice9 *device)
2152 {
2153 if (device->ff.ht_vs) {
2154 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
2155 _mesa_hash_table_destroy(device->ff.ht_vs, NULL);
2156 }
2157 if (device->ff.ht_ps) {
2158 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
2159 _mesa_hash_table_destroy(device->ff.ht_ps, NULL);
2160 }
2161 if (device->ff.ht_fvf) {
2162 util_hash_table_foreach(device->ff.ht_fvf, nine_ff_ht_delete_cb, NULL);
2163 _mesa_hash_table_destroy(device->ff.ht_fvf, NULL);
2164 }
2165 device->ff.vs = NULL; /* destroyed by unbinding from hash table */
2166 device->ff.ps = NULL;
2167
2168 FREE(device->ff.vs_const);
2169 FREE(device->ff.ps_const);
2170 }
2171
2172 static void
nine_ff_prune_vs(struct NineDevice9 * device)2173 nine_ff_prune_vs(struct NineDevice9 *device)
2174 {
2175 struct nine_context *context = &device->context;
2176
2177 if (device->ff.num_vs > 1024) {
2178 /* could destroy the bound one here, so unbind */
2179 context->pipe->bind_vs_state(context->pipe, NULL);
2180 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
2181 _mesa_hash_table_clear(device->ff.ht_vs, NULL);
2182 device->ff.num_vs = 0;
2183 context->changed.group |= NINE_STATE_VS;
2184 }
2185 }
2186 static void
nine_ff_prune_ps(struct NineDevice9 * device)2187 nine_ff_prune_ps(struct NineDevice9 *device)
2188 {
2189 struct nine_context *context = &device->context;
2190
2191 if (device->ff.num_ps > 1024) {
2192 /* could destroy the bound one here, so unbind */
2193 context->pipe->bind_fs_state(context->pipe, NULL);
2194 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
2195 _mesa_hash_table_clear(device->ff.ht_ps, NULL);
2196 device->ff.num_ps = 0;
2197 context->changed.group |= NINE_STATE_PS;
2198 }
2199 }
2200
2201 /* ========================================================================== */
2202
2203 /* Matrix multiplication:
2204 *
2205 * in memory: 0 1 2 3 (row major)
2206 * 4 5 6 7
2207 * 8 9 a b
2208 * c d e f
2209 *
2210 * cA cB cC cD
2211 * r0 = (r0 * cA) (r0 * cB) . .
2212 * r1 = (r1 * cA) (r1 * cB)
2213 * r2 = (r2 * cA) .
2214 * r3 = (r3 * cA) .
2215 *
2216 * r: (11) (12) (13) (14)
2217 * (21) (22) (23) (24)
2218 * (31) (32) (33) (34)
2219 * (41) (42) (43) (44)
2220 * l: (11 12 13 14)
2221 * (21 22 23 24)
2222 * (31 32 33 34)
2223 * (41 42 43 44)
2224 *
2225 * v: (x y z 1 )
2226 *
2227 * t.xyzw = MUL(v.xxxx, r[0]);
2228 * t.xyzw = MAD(v.yyyy, r[1], t.xyzw);
2229 * t.xyzw = MAD(v.zzzz, r[2], t.xyzw);
2230 * v.xyzw = MAD(v.wwww, r[3], t.xyzw);
2231 *
2232 * v.x = DP4(v, c[0]);
2233 * v.y = DP4(v, c[1]);
2234 * v.z = DP4(v, c[2]);
2235 * v.w = DP4(v, c[3]) = 1
2236 */
2237
2238 /*
2239 static void
2240 nine_D3DMATRIX_print(const D3DMATRIX *M)
2241 {
2242 DBG("\n(%f %f %f %f)\n"
2243 "(%f %f %f %f)\n"
2244 "(%f %f %f %f)\n"
2245 "(%f %f %f %f)\n",
2246 M->m[0][0], M->m[0][1], M->m[0][2], M->m[0][3],
2247 M->m[1][0], M->m[1][1], M->m[1][2], M->m[1][3],
2248 M->m[2][0], M->m[2][1], M->m[2][2], M->m[2][3],
2249 M->m[3][0], M->m[3][1], M->m[3][2], M->m[3][3]);
2250 }
2251 */
2252
2253 static inline float
nine_DP4_row_col(const D3DMATRIX * A,int r,const D3DMATRIX * B,int c)2254 nine_DP4_row_col(const D3DMATRIX *A, int r, const D3DMATRIX *B, int c)
2255 {
2256 return A->m[r][0] * B->m[0][c] +
2257 A->m[r][1] * B->m[1][c] +
2258 A->m[r][2] * B->m[2][c] +
2259 A->m[r][3] * B->m[3][c];
2260 }
2261
2262 static inline float
nine_DP4_vec_col(const D3DVECTOR * v,const D3DMATRIX * M,int c)2263 nine_DP4_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
2264 {
2265 return v->x * M->m[0][c] +
2266 v->y * M->m[1][c] +
2267 v->z * M->m[2][c] +
2268 1.0f * M->m[3][c];
2269 }
2270
2271 static inline float
nine_DP3_vec_col(const D3DVECTOR * v,const D3DMATRIX * M,int c)2272 nine_DP3_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
2273 {
2274 return v->x * M->m[0][c] +
2275 v->y * M->m[1][c] +
2276 v->z * M->m[2][c];
2277 }
2278
2279 void
nine_d3d_matrix_matrix_mul(D3DMATRIX * D,const D3DMATRIX * L,const D3DMATRIX * R)2280 nine_d3d_matrix_matrix_mul(D3DMATRIX *D, const D3DMATRIX *L, const D3DMATRIX *R)
2281 {
2282 D->_11 = nine_DP4_row_col(L, 0, R, 0);
2283 D->_12 = nine_DP4_row_col(L, 0, R, 1);
2284 D->_13 = nine_DP4_row_col(L, 0, R, 2);
2285 D->_14 = nine_DP4_row_col(L, 0, R, 3);
2286
2287 D->_21 = nine_DP4_row_col(L, 1, R, 0);
2288 D->_22 = nine_DP4_row_col(L, 1, R, 1);
2289 D->_23 = nine_DP4_row_col(L, 1, R, 2);
2290 D->_24 = nine_DP4_row_col(L, 1, R, 3);
2291
2292 D->_31 = nine_DP4_row_col(L, 2, R, 0);
2293 D->_32 = nine_DP4_row_col(L, 2, R, 1);
2294 D->_33 = nine_DP4_row_col(L, 2, R, 2);
2295 D->_34 = nine_DP4_row_col(L, 2, R, 3);
2296
2297 D->_41 = nine_DP4_row_col(L, 3, R, 0);
2298 D->_42 = nine_DP4_row_col(L, 3, R, 1);
2299 D->_43 = nine_DP4_row_col(L, 3, R, 2);
2300 D->_44 = nine_DP4_row_col(L, 3, R, 3);
2301 }
2302
2303 void
nine_d3d_vector4_matrix_mul(D3DVECTOR * d,const D3DVECTOR * v,const D3DMATRIX * M)2304 nine_d3d_vector4_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)
2305 {
2306 d->x = nine_DP4_vec_col(v, M, 0);
2307 d->y = nine_DP4_vec_col(v, M, 1);
2308 d->z = nine_DP4_vec_col(v, M, 2);
2309 }
2310
2311 void
nine_d3d_vector3_matrix_mul(D3DVECTOR * d,const D3DVECTOR * v,const D3DMATRIX * M)2312 nine_d3d_vector3_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)
2313 {
2314 d->x = nine_DP3_vec_col(v, M, 0);
2315 d->y = nine_DP3_vec_col(v, M, 1);
2316 d->z = nine_DP3_vec_col(v, M, 2);
2317 }
2318
2319 void
nine_d3d_matrix_transpose(D3DMATRIX * D,const D3DMATRIX * M)2320 nine_d3d_matrix_transpose(D3DMATRIX *D, const D3DMATRIX *M)
2321 {
2322 unsigned i, j;
2323 for (i = 0; i < 4; ++i)
2324 for (j = 0; j < 4; ++j)
2325 D->m[i][j] = M->m[j][i];
2326 }
2327
2328 #define _M_ADD_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2329 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2330 if (t > 0.0f) pos += t; else neg += t; } while(0)
2331
2332 #define _M_SUB_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2333 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2334 if (t > 0.0f) neg -= t; else pos -= t; } while(0)
2335 float
nine_d3d_matrix_det(const D3DMATRIX * M)2336 nine_d3d_matrix_det(const D3DMATRIX *M)
2337 {
2338 float pos = 0.0f;
2339 float neg = 0.0f;
2340
2341 _M_ADD_PROD_1i_2j_3k_4l(1, 2, 3, 4);
2342 _M_ADD_PROD_1i_2j_3k_4l(1, 3, 4, 2);
2343 _M_ADD_PROD_1i_2j_3k_4l(1, 4, 2, 3);
2344
2345 _M_ADD_PROD_1i_2j_3k_4l(2, 1, 4, 3);
2346 _M_ADD_PROD_1i_2j_3k_4l(2, 3, 1, 4);
2347 _M_ADD_PROD_1i_2j_3k_4l(2, 4, 3, 1);
2348
2349 _M_ADD_PROD_1i_2j_3k_4l(3, 1, 2, 4);
2350 _M_ADD_PROD_1i_2j_3k_4l(3, 2, 4, 1);
2351 _M_ADD_PROD_1i_2j_3k_4l(3, 4, 1, 2);
2352
2353 _M_ADD_PROD_1i_2j_3k_4l(4, 1, 3, 2);
2354 _M_ADD_PROD_1i_2j_3k_4l(4, 2, 1, 3);
2355 _M_ADD_PROD_1i_2j_3k_4l(4, 3, 2, 1);
2356
2357 _M_SUB_PROD_1i_2j_3k_4l(1, 2, 4, 3);
2358 _M_SUB_PROD_1i_2j_3k_4l(1, 3, 2, 4);
2359 _M_SUB_PROD_1i_2j_3k_4l(1, 4, 3, 2);
2360
2361 _M_SUB_PROD_1i_2j_3k_4l(2, 1, 3, 4);
2362 _M_SUB_PROD_1i_2j_3k_4l(2, 3, 4, 1);
2363 _M_SUB_PROD_1i_2j_3k_4l(2, 4, 1, 3);
2364
2365 _M_SUB_PROD_1i_2j_3k_4l(3, 1, 4, 2);
2366 _M_SUB_PROD_1i_2j_3k_4l(3, 2, 1, 4);
2367 _M_SUB_PROD_1i_2j_3k_4l(3, 4, 2, 1);
2368
2369 _M_SUB_PROD_1i_2j_3k_4l(4, 1, 2, 3);
2370 _M_SUB_PROD_1i_2j_3k_4l(4, 2, 3, 1);
2371 _M_SUB_PROD_1i_2j_3k_4l(4, 3, 1, 2);
2372
2373 return pos + neg;
2374 }
2375
2376 /* XXX: Probably better to just use src/mesa/math/m_matrix.c because
2377 * I have no idea where this code came from.
2378 */
2379 void
nine_d3d_matrix_inverse(D3DMATRIX * D,const D3DMATRIX * M)2380 nine_d3d_matrix_inverse(D3DMATRIX *D, const D3DMATRIX *M)
2381 {
2382 int i, k;
2383 float det;
2384
2385 D->m[0][0] =
2386 M->m[1][1] * M->m[2][2] * M->m[3][3] -
2387 M->m[1][1] * M->m[3][2] * M->m[2][3] -
2388 M->m[1][2] * M->m[2][1] * M->m[3][3] +
2389 M->m[1][2] * M->m[3][1] * M->m[2][3] +
2390 M->m[1][3] * M->m[2][1] * M->m[3][2] -
2391 M->m[1][3] * M->m[3][1] * M->m[2][2];
2392
2393 D->m[0][1] =
2394 -M->m[0][1] * M->m[2][2] * M->m[3][3] +
2395 M->m[0][1] * M->m[3][2] * M->m[2][3] +
2396 M->m[0][2] * M->m[2][1] * M->m[3][3] -
2397 M->m[0][2] * M->m[3][1] * M->m[2][3] -
2398 M->m[0][3] * M->m[2][1] * M->m[3][2] +
2399 M->m[0][3] * M->m[3][1] * M->m[2][2];
2400
2401 D->m[0][2] =
2402 M->m[0][1] * M->m[1][2] * M->m[3][3] -
2403 M->m[0][1] * M->m[3][2] * M->m[1][3] -
2404 M->m[0][2] * M->m[1][1] * M->m[3][3] +
2405 M->m[0][2] * M->m[3][1] * M->m[1][3] +
2406 M->m[0][3] * M->m[1][1] * M->m[3][2] -
2407 M->m[0][3] * M->m[3][1] * M->m[1][2];
2408
2409 D->m[0][3] =
2410 -M->m[0][1] * M->m[1][2] * M->m[2][3] +
2411 M->m[0][1] * M->m[2][2] * M->m[1][3] +
2412 M->m[0][2] * M->m[1][1] * M->m[2][3] -
2413 M->m[0][2] * M->m[2][1] * M->m[1][3] -
2414 M->m[0][3] * M->m[1][1] * M->m[2][2] +
2415 M->m[0][3] * M->m[2][1] * M->m[1][2];
2416
2417 D->m[1][0] =
2418 -M->m[1][0] * M->m[2][2] * M->m[3][3] +
2419 M->m[1][0] * M->m[3][2] * M->m[2][3] +
2420 M->m[1][2] * M->m[2][0] * M->m[3][3] -
2421 M->m[1][2] * M->m[3][0] * M->m[2][3] -
2422 M->m[1][3] * M->m[2][0] * M->m[3][2] +
2423 M->m[1][3] * M->m[3][0] * M->m[2][2];
2424
2425 D->m[1][1] =
2426 M->m[0][0] * M->m[2][2] * M->m[3][3] -
2427 M->m[0][0] * M->m[3][2] * M->m[2][3] -
2428 M->m[0][2] * M->m[2][0] * M->m[3][3] +
2429 M->m[0][2] * M->m[3][0] * M->m[2][3] +
2430 M->m[0][3] * M->m[2][0] * M->m[3][2] -
2431 M->m[0][3] * M->m[3][0] * M->m[2][2];
2432
2433 D->m[1][2] =
2434 -M->m[0][0] * M->m[1][2] * M->m[3][3] +
2435 M->m[0][0] * M->m[3][2] * M->m[1][3] +
2436 M->m[0][2] * M->m[1][0] * M->m[3][3] -
2437 M->m[0][2] * M->m[3][0] * M->m[1][3] -
2438 M->m[0][3] * M->m[1][0] * M->m[3][2] +
2439 M->m[0][3] * M->m[3][0] * M->m[1][2];
2440
2441 D->m[1][3] =
2442 M->m[0][0] * M->m[1][2] * M->m[2][3] -
2443 M->m[0][0] * M->m[2][2] * M->m[1][3] -
2444 M->m[0][2] * M->m[1][0] * M->m[2][3] +
2445 M->m[0][2] * M->m[2][0] * M->m[1][3] +
2446 M->m[0][3] * M->m[1][0] * M->m[2][2] -
2447 M->m[0][3] * M->m[2][0] * M->m[1][2];
2448
2449 D->m[2][0] =
2450 M->m[1][0] * M->m[2][1] * M->m[3][3] -
2451 M->m[1][0] * M->m[3][1] * M->m[2][3] -
2452 M->m[1][1] * M->m[2][0] * M->m[3][3] +
2453 M->m[1][1] * M->m[3][0] * M->m[2][3] +
2454 M->m[1][3] * M->m[2][0] * M->m[3][1] -
2455 M->m[1][3] * M->m[3][0] * M->m[2][1];
2456
2457 D->m[2][1] =
2458 -M->m[0][0] * M->m[2][1] * M->m[3][3] +
2459 M->m[0][0] * M->m[3][1] * M->m[2][3] +
2460 M->m[0][1] * M->m[2][0] * M->m[3][3] -
2461 M->m[0][1] * M->m[3][0] * M->m[2][3] -
2462 M->m[0][3] * M->m[2][0] * M->m[3][1] +
2463 M->m[0][3] * M->m[3][0] * M->m[2][1];
2464
2465 D->m[2][2] =
2466 M->m[0][0] * M->m[1][1] * M->m[3][3] -
2467 M->m[0][0] * M->m[3][1] * M->m[1][3] -
2468 M->m[0][1] * M->m[1][0] * M->m[3][3] +
2469 M->m[0][1] * M->m[3][0] * M->m[1][3] +
2470 M->m[0][3] * M->m[1][0] * M->m[3][1] -
2471 M->m[0][3] * M->m[3][0] * M->m[1][1];
2472
2473 D->m[2][3] =
2474 -M->m[0][0] * M->m[1][1] * M->m[2][3] +
2475 M->m[0][0] * M->m[2][1] * M->m[1][3] +
2476 M->m[0][1] * M->m[1][0] * M->m[2][3] -
2477 M->m[0][1] * M->m[2][0] * M->m[1][3] -
2478 M->m[0][3] * M->m[1][0] * M->m[2][1] +
2479 M->m[0][3] * M->m[2][0] * M->m[1][1];
2480
2481 D->m[3][0] =
2482 -M->m[1][0] * M->m[2][1] * M->m[3][2] +
2483 M->m[1][0] * M->m[3][1] * M->m[2][2] +
2484 M->m[1][1] * M->m[2][0] * M->m[3][2] -
2485 M->m[1][1] * M->m[3][0] * M->m[2][2] -
2486 M->m[1][2] * M->m[2][0] * M->m[3][1] +
2487 M->m[1][2] * M->m[3][0] * M->m[2][1];
2488
2489 D->m[3][1] =
2490 M->m[0][0] * M->m[2][1] * M->m[3][2] -
2491 M->m[0][0] * M->m[3][1] * M->m[2][2] -
2492 M->m[0][1] * M->m[2][0] * M->m[3][2] +
2493 M->m[0][1] * M->m[3][0] * M->m[2][2] +
2494 M->m[0][2] * M->m[2][0] * M->m[3][1] -
2495 M->m[0][2] * M->m[3][0] * M->m[2][1];
2496
2497 D->m[3][2] =
2498 -M->m[0][0] * M->m[1][1] * M->m[3][2] +
2499 M->m[0][0] * M->m[3][1] * M->m[1][2] +
2500 M->m[0][1] * M->m[1][0] * M->m[3][2] -
2501 M->m[0][1] * M->m[3][0] * M->m[1][2] -
2502 M->m[0][2] * M->m[1][0] * M->m[3][1] +
2503 M->m[0][2] * M->m[3][0] * M->m[1][1];
2504
2505 D->m[3][3] =
2506 M->m[0][0] * M->m[1][1] * M->m[2][2] -
2507 M->m[0][0] * M->m[2][1] * M->m[1][2] -
2508 M->m[0][1] * M->m[1][0] * M->m[2][2] +
2509 M->m[0][1] * M->m[2][0] * M->m[1][2] +
2510 M->m[0][2] * M->m[1][0] * M->m[2][1] -
2511 M->m[0][2] * M->m[2][0] * M->m[1][1];
2512
2513 det =
2514 M->m[0][0] * D->m[0][0] +
2515 M->m[1][0] * D->m[0][1] +
2516 M->m[2][0] * D->m[0][2] +
2517 M->m[3][0] * D->m[0][3];
2518
2519 if (fabsf(det) < 1e-30) {/* non inversible */
2520 *D = *M; /* wine tests */
2521 return;
2522 }
2523
2524 det = 1.0 / det;
2525
2526 for (i = 0; i < 4; i++)
2527 for (k = 0; k < 4; k++)
2528 D->m[i][k] *= det;
2529
2530 #if MESA_DEBUG || !defined(NDEBUG)
2531 {
2532 D3DMATRIX I;
2533
2534 nine_d3d_matrix_matrix_mul(&I, D, M);
2535
2536 for (i = 0; i < 4; ++i)
2537 for (k = 0; k < 4; ++k)
2538 if (fabsf(I.m[i][k] - (float)(i == k)) > 1e-3)
2539 DBG("Matrix inversion check FAILED !\n");
2540 }
2541 #endif
2542 }
2543