1
2 /* FF is big and ugly so feel free to write lines as long as you like.
3 * Aieeeeeeeee !
4 *
5 * Let me make that clearer:
6 * Aieeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee ! !! !!!
7 */
8
9 #include "device9.h"
10 #include "basetexture9.h"
11 #include "vertexdeclaration9.h"
12 #include "vertexshader9.h"
13 #include "pixelshader9.h"
14 #include "nine_ff.h"
15 #include "nine_defines.h"
16 #include "nine_helpers.h"
17 #include "nine_pipe.h"
18 #include "nine_dump.h"
19
20 #include "pipe/p_context.h"
21 #include "tgsi/tgsi_ureg.h"
22 #include "tgsi/tgsi_dump.h"
23 #include "util/bitscan.h"
24 #include "util/u_box.h"
25 #include "util/u_hash_table.h"
26 #include "util/u_upload_mgr.h"
27
28 #define DBG_CHANNEL DBG_FF
29
30 #define NINE_FF_NUM_VS_CONST 204
31 #define NINE_FF_NUM_PS_CONST 24
32
33 struct fvec4
34 {
35 float x, y, z, w;
36 };
37
38 struct nine_ff_vs_key
39 {
40 union {
41 struct {
42 uint32_t position_t : 1;
43 uint32_t lighting : 1;
44 uint32_t darkness : 1; /* lighting enabled but no active lights */
45 uint32_t localviewer : 1;
46 uint32_t vertexpointsize : 1;
47 uint32_t pointscale : 1;
48 uint32_t vertexblend : 3;
49 uint32_t vertexblend_indexed : 1;
50 uint32_t vertextween : 1;
51 uint32_t mtl_diffuse : 2; /* 0 = material, 1 = color1, 2 = color2 */
52 uint32_t mtl_ambient : 2;
53 uint32_t mtl_specular : 2;
54 uint32_t mtl_emissive : 2;
55 uint32_t fog_mode : 2;
56 uint32_t fog_range : 1;
57 uint32_t color0in_one : 1;
58 uint32_t color1in_zero : 1;
59 uint32_t has_normal : 1;
60 uint32_t fog : 1;
61 uint32_t normalizenormals : 1;
62 uint32_t ucp : 1;
63 uint32_t pad1 : 4;
64 uint32_t tc_dim_input: 16; /* 8 * 2 bits */
65 uint32_t pad2 : 16;
66 uint32_t tc_dim_output: 24; /* 8 * 3 bits */
67 uint32_t pad3 : 8;
68 uint32_t tc_gen : 24; /* 8 * 3 bits */
69 uint32_t pad4 : 8;
70 uint32_t tc_idx : 24;
71 uint32_t clipplane_emulate : 8;
72 uint32_t passthrough;
73 };
74 uint64_t value64[3]; /* don't forget to resize VertexShader9.ff_key */
75 uint32_t value32[6];
76 };
77 };
78
79 /* Texture stage state:
80 *
81 * COLOROP D3DTOP 5 bit
82 * ALPHAOP D3DTOP 5 bit
83 * COLORARG0 D3DTA 3 bit
84 * COLORARG1 D3DTA 3 bit
85 * COLORARG2 D3DTA 3 bit
86 * ALPHAARG0 D3DTA 3 bit
87 * ALPHAARG1 D3DTA 3 bit
88 * ALPHAARG2 D3DTA 3 bit
89 * RESULTARG D3DTA 1 bit (CURRENT:0 or TEMP:1)
90 * TEXCOORDINDEX 0 - 7 3 bit
91 * ===========================
92 * 32 bit per stage
93 */
94 struct nine_ff_ps_key
95 {
96 union {
97 struct {
98 struct {
99 uint32_t colorop : 5;
100 uint32_t alphaop : 5;
101 uint32_t colorarg0 : 3;
102 uint32_t colorarg1 : 3;
103 uint32_t colorarg2 : 3;
104 uint32_t alphaarg0 : 3;
105 uint32_t alphaarg1 : 3;
106 uint32_t alphaarg2 : 3;
107 uint32_t resultarg : 1; /* CURRENT:0 or TEMP:1 */
108 uint32_t textarget : 2; /* 1D/2D/3D/CUBE */
109 uint32_t pad : 1;
110 /* that's 32 bit exactly */
111 } ts[8];
112 uint32_t projected : 16;
113 uint32_t fog : 1; /* for vFog coming from VS */
114 uint32_t fog_mode : 2;
115 uint32_t fog_source : 1; /* 0: Z, 1: W */
116 uint32_t specular : 1;
117 uint32_t alpha_test_emulation : 3;
118 uint32_t flatshade : 1;
119 uint32_t pad1 : 7; /* 9 32-bit words with this */
120 uint8_t colorarg_b4[3];
121 uint8_t colorarg_b5[3];
122 uint8_t alphaarg_b4[3]; /* 11 32-bit words plus a byte */
123 uint8_t pad2[3];
124 };
125 uint64_t value64[6]; /* don't forget to resize PixelShader9.ff_key */
126 uint32_t value32[12];
127 };
128 };
129
nine_ff_vs_key_hash(const void * key)130 static uint32_t nine_ff_vs_key_hash(const void *key)
131 {
132 const struct nine_ff_vs_key *vs = key;
133 unsigned i;
134 uint32_t hash = vs->value32[0];
135 for (i = 1; i < ARRAY_SIZE(vs->value32); ++i)
136 hash ^= vs->value32[i];
137 return hash;
138 }
nine_ff_vs_key_comp(const void * key1,const void * key2)139 static bool nine_ff_vs_key_comp(const void *key1, const void *key2)
140 {
141 struct nine_ff_vs_key *a = (struct nine_ff_vs_key *)key1;
142 struct nine_ff_vs_key *b = (struct nine_ff_vs_key *)key2;
143
144 return memcmp(a->value64, b->value64, sizeof(a->value64)) == 0;
145 }
nine_ff_ps_key_hash(const void * key)146 static uint32_t nine_ff_ps_key_hash(const void *key)
147 {
148 const struct nine_ff_ps_key *ps = key;
149 unsigned i;
150 uint32_t hash = ps->value32[0];
151 for (i = 1; i < ARRAY_SIZE(ps->value32); ++i)
152 hash ^= ps->value32[i];
153 return hash;
154 }
nine_ff_ps_key_comp(const void * key1,const void * key2)155 static bool nine_ff_ps_key_comp(const void *key1, const void *key2)
156 {
157 struct nine_ff_ps_key *a = (struct nine_ff_ps_key *)key1;
158 struct nine_ff_ps_key *b = (struct nine_ff_ps_key *)key2;
159
160 return memcmp(a->value64, b->value64, sizeof(a->value64)) == 0;
161 }
nine_ff_fvf_key_hash(const void * key)162 static uint32_t nine_ff_fvf_key_hash(const void *key)
163 {
164 return *(DWORD *)key;
165 }
nine_ff_fvf_key_comp(const void * key1,const void * key2)166 static bool nine_ff_fvf_key_comp(const void *key1, const void *key2)
167 {
168 return *(DWORD *)key1 == *(DWORD *)key2;
169 }
170
171 static void nine_ff_prune_vs(struct NineDevice9 *);
172 static void nine_ff_prune_ps(struct NineDevice9 *);
173
nine_ureg_tgsi_dump(struct ureg_program * ureg,bool override)174 static void nine_ureg_tgsi_dump(struct ureg_program *ureg, bool override)
175 {
176 if (debug_get_bool_option("NINE_FF_DUMP", false) || override) {
177 const struct tgsi_token *toks = ureg_get_tokens(ureg, NULL);
178 tgsi_dump(toks, 0);
179 ureg_free_tokens(toks);
180 }
181 }
182
183 #define _X(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_X)
184 #define _Y(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Y)
185 #define _Z(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Z)
186 #define _W(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_W)
187
188 #define _XXXX(r) ureg_scalar(r, TGSI_SWIZZLE_X)
189 #define _YYYY(r) ureg_scalar(r, TGSI_SWIZZLE_Y)
190 #define _ZZZZ(r) ureg_scalar(r, TGSI_SWIZZLE_Z)
191 #define _WWWW(r) ureg_scalar(r, TGSI_SWIZZLE_W)
192
193 #define _XYZW(r) (r)
194
195 /* AL should contain base address of lights table. */
196 #define LIGHT_CONST(i) \
197 ureg_src_indirect(ureg_DECL_constant(ureg, i), _X(AL))
198
199 #define MATERIAL_CONST(i) \
200 ureg_DECL_constant(ureg, 19 + (i))
201
202 #define _CONST(n) ureg_DECL_constant(ureg, n)
203
204 /* VS FF constants layout:
205 *
206 * CONST[ 0.. 3] D3DTS_WORLD * D3DTS_VIEW * D3DTS_PROJECTION
207 * CONST[ 4.. 7] D3DTS_WORLD * D3DTS_VIEW
208 * CONST[ 8..11] D3DTS_PROJECTION
209 * CONST[12..15] D3DTS_VIEW^(-1)
210 * CONST[16..18] Normal matrix
211 *
212 * CONST[19].xyz MATERIAL.Emissive + Material.Ambient * RS.Ambient
213 * CONST[20] MATERIAL.Diffuse
214 * CONST[21] MATERIAL.Ambient
215 * CONST[22] MATERIAL.Specular
216 * CONST[23].x___ MATERIAL.Power
217 * CONST[24] MATERIAL.Emissive
218 * CONST[25] RS.Ambient
219 *
220 * CONST[26].x___ RS.PointSizeMin
221 * CONST[26]._y__ RS.PointSizeMax
222 * CONST[26].__z_ RS.PointSize
223 * CONST[26].___w RS.PointScaleA
224 * CONST[27].x___ RS.PointScaleB
225 * CONST[27]._y__ RS.PointScaleC
226 *
227 * CONST[28].x___ RS.FogEnd
228 * CONST[28]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
229 * CONST[28].__z_ RS.FogDensity
230
231 * CONST[30].x___ TWEENFACTOR
232 *
233 * CONST[32].x___ LIGHT[0].Type
234 * CONST[32]._yzw LIGHT[0].Attenuation0,1,2
235 * CONST[33] LIGHT[0].Diffuse
236 * CONST[34] LIGHT[0].Specular
237 * CONST[35] LIGHT[0].Ambient
238 * CONST[36].xyz_ LIGHT[0].Position
239 * CONST[36].___w LIGHT[0].Range
240 * CONST[37].xyz_ LIGHT[0].Direction
241 * CONST[37].___w LIGHT[0].Falloff
242 * CONST[38].x___ cos(LIGHT[0].Theta / 2)
243 * CONST[38]._y__ cos(LIGHT[0].Phi / 2)
244 * CONST[38].__z_ 1.0f / (cos(LIGHT[0].Theta / 2) - cos(Light[0].Phi / 2))
245 * CONST[39].xyz_ LIGHT[0].HalfVector (for directional lights)
246 * CONST[39].___w 1 if this is the last active light, 0 if not
247 * CONST[40] LIGHT[1]
248 * CONST[48] LIGHT[2]
249 * CONST[56] LIGHT[3]
250 * CONST[64] LIGHT[4]
251 * CONST[72] LIGHT[5]
252 * CONST[80] LIGHT[6]
253 * CONST[88] LIGHT[7]
254 * NOTE: no lighting code is generated if there are no active lights
255 *
256 * CONST[100].x___ Viewport 2/width
257 * CONST[100]._y__ Viewport 2/height
258 * CONST[100].__z_ Viewport 1/(zmax - zmin)
259 * CONST[100].___w Viewport width
260 * CONST[101].x___ Viewport x0
261 * CONST[101]._y__ Viewport y0
262 * CONST[101].__z_ Viewport z0
263 *
264 * CONST[128..131] D3DTS_TEXTURE0
265 * CONST[132..135] D3DTS_TEXTURE1
266 * CONST[136..139] D3DTS_TEXTURE2
267 * CONST[140..143] D3DTS_TEXTURE3
268 * CONST[144..147] D3DTS_TEXTURE4
269 * CONST[148..151] D3DTS_TEXTURE5
270 * CONST[152..155] D3DTS_TEXTURE6
271 * CONST[156..159] D3DTS_TEXTURE7
272 *
273 * CONST[160] D3DTS_WORLDMATRIX[0] * D3DTS_VIEW
274 * CONST[164] D3DTS_WORLDMATRIX[1] * D3DTS_VIEW
275 * ...
276 * CONST[192] D3DTS_WORLDMATRIX[8] * D3DTS_VIEW
277 * CONST[196] UCP0
278 ...
279 * CONST[203] UCP7
280 */
281 struct vs_build_ctx
282 {
283 struct ureg_program *ureg;
284 const struct nine_ff_vs_key *key;
285
286 uint16_t input[PIPE_MAX_ATTRIBS];
287 unsigned num_inputs;
288
289 struct ureg_src aVtx;
290 struct ureg_src aNrm;
291 struct ureg_src aCol[2];
292 struct ureg_src aTex[8];
293 struct ureg_src aPsz;
294 struct ureg_src aInd;
295 struct ureg_src aWgt;
296
297 struct ureg_src aVtx1; /* tweening */
298 struct ureg_src aNrm1;
299
300 struct ureg_src mtlA;
301 struct ureg_src mtlD;
302 struct ureg_src mtlS;
303 struct ureg_src mtlE;
304 };
305
306 static inline unsigned
get_texcoord_sn(struct pipe_screen * screen)307 get_texcoord_sn(struct pipe_screen *screen)
308 {
309 if (screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD))
310 return TGSI_SEMANTIC_TEXCOORD;
311 return TGSI_SEMANTIC_GENERIC;
312 }
313
314 static inline struct ureg_src
build_vs_add_input(struct vs_build_ctx * vs,uint16_t ndecl)315 build_vs_add_input(struct vs_build_ctx *vs, uint16_t ndecl)
316 {
317 const unsigned i = vs->num_inputs++;
318 assert(i < PIPE_MAX_ATTRIBS);
319 vs->input[i] = ndecl;
320 return ureg_DECL_vs_input(vs->ureg, i);
321 }
322
323 /* NOTE: dst may alias src */
324 static inline void
ureg_normalize3(struct ureg_program * ureg,struct ureg_dst dst,struct ureg_src src)325 ureg_normalize3(struct ureg_program *ureg,
326 struct ureg_dst dst, struct ureg_src src)
327 {
328 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
329 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
330
331 ureg_DP3(ureg, tmp_x, src, src);
332 ureg_RSQ(ureg, tmp_x, _X(tmp));
333 ureg_MUL(ureg, dst, src, _X(tmp));
334 ureg_release_temporary(ureg, tmp);
335 }
336
337 static void *
nine_ff_build_vs(struct NineDevice9 * device,struct vs_build_ctx * vs)338 nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
339 {
340 const struct nine_ff_vs_key *key = vs->key;
341 struct ureg_program *ureg = ureg_create(PIPE_SHADER_VERTEX);
342 struct ureg_dst oPos, oCol[2], oPsz, oFog;
343 struct ureg_dst AR;
344 unsigned i, c;
345 unsigned label[32], l = 0;
346 bool need_aNrm = key->lighting || key->passthrough & (1 << NINE_DECLUSAGE_NORMAL);
347 bool has_aNrm;
348 bool need_aVtx = key->lighting || key->fog_mode || key->pointscale || key->ucp;
349 const unsigned texcoord_sn = get_texcoord_sn(device->screen);
350
351 vs->ureg = ureg;
352
353 /* Check which inputs we should transform. */
354 for (i = 0; i < 8 * 3; i += 3) {
355 switch ((key->tc_gen >> i) & 0x7) {
356 case NINED3DTSS_TCI_CAMERASPACENORMAL:
357 need_aNrm = true;
358 break;
359 case NINED3DTSS_TCI_CAMERASPACEPOSITION:
360 need_aVtx = true;
361 break;
362 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
363 need_aVtx = need_aNrm = true;
364 break;
365 case NINED3DTSS_TCI_SPHEREMAP:
366 need_aVtx = need_aNrm = true;
367 break;
368 default:
369 break;
370 }
371 }
372
373 has_aNrm = need_aNrm && key->has_normal;
374
375 /* Declare and record used inputs (needed for linkage with vertex format):
376 * (texture coordinates handled later)
377 */
378 vs->aVtx = build_vs_add_input(vs,
379 key->position_t ? NINE_DECLUSAGE_POSITIONT : NINE_DECLUSAGE_POSITION);
380
381 vs->aNrm = ureg_imm1f(ureg, 0.0f);
382 if (has_aNrm)
383 vs->aNrm = build_vs_add_input(vs, NINE_DECLUSAGE_NORMAL);
384
385 vs->aCol[0] = ureg_imm1f(ureg, 1.0f);
386 vs->aCol[1] = ureg_imm1f(ureg, 0.0f);
387
388 if (key->lighting || key->darkness) {
389 const unsigned mask = key->mtl_diffuse | key->mtl_specular |
390 key->mtl_ambient | key->mtl_emissive;
391 if ((mask & 0x1) && !key->color0in_one)
392 vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));
393 if ((mask & 0x2) && !key->color1in_zero)
394 vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));
395
396 vs->mtlD = MATERIAL_CONST(1);
397 vs->mtlA = MATERIAL_CONST(2);
398 vs->mtlS = MATERIAL_CONST(3);
399 vs->mtlE = MATERIAL_CONST(5);
400 if (key->mtl_diffuse == 1) vs->mtlD = vs->aCol[0]; else
401 if (key->mtl_diffuse == 2) vs->mtlD = vs->aCol[1];
402 if (key->mtl_ambient == 1) vs->mtlA = vs->aCol[0]; else
403 if (key->mtl_ambient == 2) vs->mtlA = vs->aCol[1];
404 if (key->mtl_specular == 1) vs->mtlS = vs->aCol[0]; else
405 if (key->mtl_specular == 2) vs->mtlS = vs->aCol[1];
406 if (key->mtl_emissive == 1) vs->mtlE = vs->aCol[0]; else
407 if (key->mtl_emissive == 2) vs->mtlE = vs->aCol[1];
408 } else {
409 if (!key->color0in_one) vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));
410 if (!key->color1in_zero) vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));
411 }
412
413 if (key->vertexpointsize)
414 vs->aPsz = build_vs_add_input(vs, NINE_DECLUSAGE_PSIZE);
415
416 if (key->vertexblend_indexed || key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES))
417 vs->aInd = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDINDICES);
418 if (key->vertexblend || key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT))
419 vs->aWgt = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDWEIGHT);
420 if (key->vertextween) {
421 vs->aVtx1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(POSITION,1));
422 vs->aNrm1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(NORMAL,1));
423 }
424
425 /* Declare outputs:
426 */
427 oPos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); /* HPOS */
428 oCol[0] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0));
429 oCol[1] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1));
430 if (key->fog || key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
431 oFog = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 16);
432 oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X);
433 }
434
435 if (key->vertexpointsize || key->pointscale || device->driver_caps.always_output_pointsize) {
436 oPsz = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_PSIZE, 0,
437 TGSI_WRITEMASK_X, 0, 1);
438 oPsz = ureg_writemask(oPsz, TGSI_WRITEMASK_X);
439 }
440
441 if (key->lighting || key->vertexblend)
442 AR = ureg_DECL_address(ureg);
443
444 /* === Vertex transformation / vertex blending:
445 */
446
447 if (key->position_t) {
448 if (device->driver_caps.window_space_position_support) {
449 ureg_MOV(ureg, oPos, vs->aVtx);
450 } else {
451 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
452 /* vs->aVtx contains the coordinates buffer wise.
453 * later in the pipeline, clipping, viewport and division
454 * by w (rhw = 1/w) are going to be applied, so do the reverse
455 * of these transformations (except clipping) to have the good
456 * position at the end.*/
457 ureg_MOV(ureg, tmp, vs->aVtx);
458 /* X from [X_min, X_min + width] to [-1, 1], same for Y. Z to [0, 1] */
459 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_negate(_CONST(101)));
460 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(100));
461 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
462 /* Y needs to be reversed */
463 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_negate(ureg_src(tmp)));
464 /* Replace w by 1 if it equals to 0 */
465 ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_W))),
466 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_W), ureg_imm1f(ureg, 1.0f));
467 /* inverse rhw */
468 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), _W(tmp));
469 /* multiply X, Y, Z by w */
470 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _W(tmp));
471 ureg_MOV(ureg, oPos, ureg_src(tmp));
472 ureg_release_temporary(ureg, tmp);
473 }
474 } else if (key->vertexblend) {
475 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
476 struct ureg_dst tmp2 = ureg_DECL_temporary(ureg);
477 struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg);
478 struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg);
479 struct ureg_dst sum_blendweights = ureg_DECL_temporary(ureg);
480 struct ureg_src cWM[4];
481
482 for (i = 160; i <= 195; ++i)
483 ureg_DECL_constant(ureg, i);
484
485 /* translate world matrix index to constant file index */
486 if (key->vertexblend_indexed) {
487 ureg_MAD(ureg, tmp, vs->aInd, ureg_imm1f(ureg, 4.0f), ureg_imm1f(ureg, 160.0f));
488 ureg_ARL(ureg, AR, ureg_src(tmp));
489 }
490
491 ureg_MOV(ureg, aVtx_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f));
492 ureg_MOV(ureg, aNrm_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f));
493 ureg_MOV(ureg, sum_blendweights, ureg_imm4f(ureg, 1.0f, 1.0f, 1.0f, 1.0f));
494
495 for (i = 0; i < key->vertexblend; ++i) {
496 for (c = 0; c < 4; ++c) {
497 cWM[c] = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, (160 + i * 4) * !key->vertexblend_indexed + c), 0);
498 if (key->vertexblend_indexed)
499 cWM[c] = ureg_src_indirect(cWM[c], ureg_scalar(ureg_src(AR), i));
500 }
501
502 /* multiply by WORLD(index) */
503 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), cWM[0]);
504 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), cWM[1], ureg_src(tmp));
505 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), cWM[2], ureg_src(tmp));
506 ureg_MAD(ureg, tmp, _WWWW(vs->aVtx), cWM[3], ureg_src(tmp));
507
508 if (has_aNrm) {
509 /* Note: the spec says the transpose of the inverse of the
510 * WorldView matrices should be used, but all tests show
511 * otherwise.
512 * Only case unknown: D3DVBF_0WEIGHTS */
513 ureg_MUL(ureg, tmp2, _XXXX(vs->aNrm), cWM[0]);
514 ureg_MAD(ureg, tmp2, _YYYY(vs->aNrm), cWM[1], ureg_src(tmp2));
515 ureg_MAD(ureg, tmp2, _ZZZZ(vs->aNrm), cWM[2], ureg_src(tmp2));
516 }
517
518 if (i < (key->vertexblend - 1)) {
519 /* accumulate weighted position value */
520 ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(vs->aWgt, i), ureg_src(aVtx_dst));
521 if (has_aNrm)
522 ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(vs->aWgt, i), ureg_src(aNrm_dst));
523 /* subtract weighted position value for last value */
524 ureg_ADD(ureg, sum_blendweights, ureg_src(sum_blendweights), ureg_negate(ureg_scalar(vs->aWgt, i)));
525 }
526 }
527
528 /* the last weighted position is always 1 - sum_of_previous_weights */
529 ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aVtx_dst));
530 if (has_aNrm)
531 ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aNrm_dst));
532
533 /* multiply by VIEW_PROJ */
534 ureg_MUL(ureg, tmp, _X(aVtx_dst), _CONST(8));
535 ureg_MAD(ureg, tmp, _Y(aVtx_dst), _CONST(9), ureg_src(tmp));
536 ureg_MAD(ureg, tmp, _Z(aVtx_dst), _CONST(10), ureg_src(tmp));
537 ureg_MAD(ureg, oPos, _W(aVtx_dst), _CONST(11), ureg_src(tmp));
538
539 if (need_aVtx)
540 vs->aVtx = ureg_src(aVtx_dst);
541
542 ureg_release_temporary(ureg, tmp);
543 ureg_release_temporary(ureg, tmp2);
544 ureg_release_temporary(ureg, sum_blendweights);
545 if (!need_aVtx)
546 ureg_release_temporary(ureg, aVtx_dst);
547
548 if (has_aNrm) {
549 if (key->normalizenormals)
550 ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst));
551 vs->aNrm = ureg_src(aNrm_dst);
552 } else
553 ureg_release_temporary(ureg, aNrm_dst);
554 } else {
555 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
556
557 if (key->vertextween) {
558 struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg);
559 ureg_LRP(ureg, aVtx_dst, _XXXX(_CONST(30)), vs->aVtx1, vs->aVtx);
560 vs->aVtx = ureg_src(aVtx_dst);
561 if (has_aNrm) {
562 struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg);
563 ureg_LRP(ureg, aNrm_dst, _XXXX(_CONST(30)), vs->aNrm1, vs->aNrm);
564 vs->aNrm = ureg_src(aNrm_dst);
565 }
566 }
567
568 /* position = vertex * WORLD_VIEW_PROJ */
569 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(0));
570 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(1), ureg_src(tmp));
571 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(2), ureg_src(tmp));
572 ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(tmp));
573 ureg_release_temporary(ureg, tmp);
574
575 if (need_aVtx) {
576 struct ureg_dst aVtx_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
577 ureg_MUL(ureg, aVtx_dst, _XXXX(vs->aVtx), _CONST(4));
578 ureg_MAD(ureg, aVtx_dst, _YYYY(vs->aVtx), _CONST(5), ureg_src(aVtx_dst));
579 ureg_MAD(ureg, aVtx_dst, _ZZZZ(vs->aVtx), _CONST(6), ureg_src(aVtx_dst));
580 ureg_MAD(ureg, aVtx_dst, _WWWW(vs->aVtx), _CONST(7), ureg_src(aVtx_dst));
581 vs->aVtx = ureg_src(aVtx_dst);
582 }
583 if (has_aNrm) {
584 struct ureg_dst aNrm_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
585 ureg_MUL(ureg, aNrm_dst, _XXXX(vs->aNrm), _CONST(16));
586 ureg_MAD(ureg, aNrm_dst, _YYYY(vs->aNrm), _CONST(17), ureg_src(aNrm_dst));
587 ureg_MAD(ureg, aNrm_dst, _ZZZZ(vs->aNrm), _CONST(18), ureg_src(aNrm_dst));
588 if (key->normalizenormals)
589 ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst));
590 vs->aNrm = ureg_src(aNrm_dst);
591 }
592 }
593
594 /* === Process point size:
595 */
596 if (key->vertexpointsize || key->pointscale || device->driver_caps.always_output_pointsize) {
597 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
598 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
599 struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
600 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
601 if (key->vertexpointsize) {
602 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
603 ureg_MAX(ureg, tmp_z, _XXXX(vs->aPsz), _XXXX(cPsz1));
604 ureg_MIN(ureg, tmp_z, _Z(tmp), _YYYY(cPsz1));
605 } else {
606 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
607 ureg_MOV(ureg, tmp_z, _ZZZZ(cPsz1));
608 }
609
610 if (key->pointscale) {
611 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
612 struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27);
613
614 ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx);
615 ureg_RSQ(ureg, tmp_y, _X(tmp));
616 ureg_MUL(ureg, tmp_y, _Y(tmp), _X(tmp));
617 ureg_CMP(ureg, tmp_y, ureg_negate(_Y(tmp)), _Y(tmp), ureg_imm1f(ureg, 0.0f));
618 ureg_MAD(ureg, tmp_x, _Y(tmp), _YYYY(cPsz2), _XXXX(cPsz2));
619 ureg_MAD(ureg, tmp_x, _Y(tmp), _X(tmp), _WWWW(cPsz1));
620 ureg_RSQ(ureg, tmp_x, _X(tmp));
621 ureg_MUL(ureg, tmp_x, _X(tmp), _Z(tmp));
622 ureg_MUL(ureg, tmp_x, _X(tmp), _WWWW(_CONST(100)));
623 ureg_MAX(ureg, tmp_x, _X(tmp), _XXXX(cPsz1));
624 ureg_MIN(ureg, tmp_z, _X(tmp), _YYYY(cPsz1));
625 }
626
627 ureg_MOV(ureg, oPsz, _Z(tmp));
628 ureg_release_temporary(ureg, tmp);
629 }
630
631 for (i = 0; i < 8; ++i) {
632 struct ureg_dst tmp, tmp_x, tmp2;
633 struct ureg_dst oTex, input_coord, transformed, t, aVtx_normed;
634 unsigned c, writemask;
635 const unsigned tci = (key->tc_gen >> (i * 3)) & 0x7;
636 const unsigned idx = (key->tc_idx >> (i * 3)) & 0x7;
637 unsigned dim_input = 1 + ((key->tc_dim_input >> (i * 2)) & 0x3);
638 const unsigned dim_output = (key->tc_dim_output >> (i * 3)) & 0x7;
639
640 /* No texture output of index s */
641 if (tci == NINED3DTSS_TCI_DISABLE)
642 continue;
643 oTex = ureg_DECL_output(ureg, texcoord_sn, i);
644 tmp = ureg_DECL_temporary(ureg);
645 tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
646 input_coord = ureg_DECL_temporary(ureg);
647 transformed = ureg_DECL_temporary(ureg);
648
649 /* Get the coordinate */
650 switch (tci) {
651 case NINED3DTSS_TCI_PASSTHRU:
652 /* NINED3DTSS_TCI_PASSTHRU => Use texcoord coming from index idx *
653 * Else the idx is used only to determine wrapping mode. */
654 vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx));
655 ureg_MOV(ureg, input_coord, vs->aTex[idx]);
656 break;
657 case NINED3DTSS_TCI_CAMERASPACENORMAL:
658 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aNrm);
659 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
660 dim_input = 4;
661 break;
662 case NINED3DTSS_TCI_CAMERASPACEPOSITION:
663 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aVtx);
664 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
665 dim_input = 4;
666 break;
667 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
668 tmp.WriteMask = TGSI_WRITEMASK_XYZ;
669 aVtx_normed = ureg_DECL_temporary(ureg);
670 ureg_normalize3(ureg, aVtx_normed, vs->aVtx);
671 ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm);
672 ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp));
673 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
674 ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp)));
675 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
676 ureg_release_temporary(ureg, aVtx_normed);
677 dim_input = 4;
678 tmp.WriteMask = TGSI_WRITEMASK_XYZW;
679 break;
680 case NINED3DTSS_TCI_SPHEREMAP:
681 /* Implement the formula of GL_SPHERE_MAP */
682 tmp.WriteMask = TGSI_WRITEMASK_XYZ;
683 aVtx_normed = ureg_DECL_temporary(ureg);
684 tmp2 = ureg_DECL_temporary(ureg);
685 ureg_normalize3(ureg, aVtx_normed, vs->aVtx);
686 ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm);
687 ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp));
688 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
689 ureg_ADD(ureg, tmp, ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp)));
690 /* now tmp = normed(Vtx) - 2 dot3(normed(Vtx), Nrm) Nrm */
691 ureg_MOV(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_XYZ), ureg_src(tmp));
692 ureg_MUL(ureg, tmp2, ureg_src(tmp2), ureg_src(tmp2));
693 ureg_DP3(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_src(tmp2));
694 ureg_RSQ(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2));
695 ureg_MUL(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_imm1f(ureg, 0.5f));
696 /* tmp2 = 0.5 / sqrt(tmp.x^2 + tmp.y^2 + (tmp.z+1)^2)
697 * TODO: z coordinates are a bit different gl vs d3d, should the formula be adapted ? */
698 ureg_MUL(ureg, tmp, ureg_src(tmp), _X(tmp2));
699 ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 0.5f));
700 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_ZW), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
701 ureg_release_temporary(ureg, aVtx_normed);
702 ureg_release_temporary(ureg, tmp2);
703 dim_input = 4;
704 tmp.WriteMask = TGSI_WRITEMASK_XYZW;
705 break;
706 default:
707 assert(0);
708 break;
709 }
710
711 /* Apply the transformation */
712 /* dim_output == 0 => do not transform the components.
713 * XYZRHW also disables transformation */
714 if (!dim_output || key->position_t) {
715 ureg_release_temporary(ureg, transformed);
716 transformed = input_coord;
717 writemask = TGSI_WRITEMASK_XYZW;
718 } else {
719 for (c = 0; c < dim_output; c++) {
720 t = ureg_writemask(transformed, 1 << c);
721 switch (dim_input) {
722 /* dim_input = 1 2 3: -> we add trailing 1 to input*/
723 case 1: ureg_MAD(ureg, t, _X(input_coord), _XXXX(_CONST(128 + i * 4 + c)), _YYYY(_CONST(128 + i * 4 + c)));
724 break;
725 case 2: ureg_DP2(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
726 ureg_ADD(ureg, t, ureg_src(transformed), _ZZZZ(_CONST(128 + i * 4 + c)));
727 break;
728 case 3: ureg_DP3(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
729 ureg_ADD(ureg, t, ureg_src(transformed), _WWWW(_CONST(128 + i * 4 + c)));
730 break;
731 case 4: ureg_DP4(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); break;
732 default:
733 assert(0);
734 }
735 }
736 writemask = (1 << dim_output) - 1;
737 ureg_release_temporary(ureg, input_coord);
738 }
739
740 ureg_MOV(ureg, ureg_writemask(oTex, writemask), ureg_src(transformed));
741 ureg_release_temporary(ureg, transformed);
742 ureg_release_temporary(ureg, tmp);
743 }
744
745 /* === Lighting:
746 *
747 * DIRECTIONAL: Light at infinite distance, parallel rays, no attenuation.
748 * POINT: Finite distance to scene, divergent rays, isotropic, attenuation.
749 * SPOT: Finite distance, divergent rays, angular dependence, attenuation.
750 *
751 * vec3 normal = normalize(in.Normal * NormalMatrix);
752 * vec3 hitDir = light.direction;
753 * float atten = 1.0;
754 *
755 * if (light.type != DIRECTIONAL)
756 * {
757 * vec3 hitVec = light.position - eyeVertex;
758 * float d = length(hitVec);
759 * hitDir = hitVec / d;
760 * atten = 1 / ((light.atten2 * d + light.atten1) * d + light.atten0);
761 * }
762 *
763 * if (light.type == SPOTLIGHT)
764 * {
765 * float rho = dp3(-hitVec, light.direction);
766 * if (rho < cos(light.phi / 2))
767 * atten = 0;
768 * if (rho < cos(light.theta / 2))
769 * atten *= pow(some_func(rho), light.falloff);
770 * }
771 *
772 * float nDotHit = dp3_sat(normal, hitVec);
773 * float powFact = 0.0;
774 *
775 * if (nDotHit > 0.0)
776 * {
777 * vec3 midVec = normalize(hitDir + eye);
778 * float nDotMid = dp3_sat(normal, midVec);
779 * pFact = pow(nDotMid, material.power);
780 * }
781 *
782 * ambient += light.ambient * atten;
783 * diffuse += light.diffuse * atten * nDotHit;
784 * specular += light.specular * atten * powFact;
785 */
786 if (key->lighting) {
787 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
788 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
789 struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
790 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
791 struct ureg_dst rAtt = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W);
792 struct ureg_dst rHit = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
793 struct ureg_dst rMid = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
794
795 struct ureg_dst rCtr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W);
796
797 struct ureg_dst AL = ureg_writemask(AR, TGSI_WRITEMASK_X);
798
799 /* Light.*.Alpha is not used. */
800 struct ureg_dst rD = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
801 struct ureg_dst rA = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
802 struct ureg_dst rS = ureg_DECL_temporary(ureg);
803
804 struct ureg_src mtlP = _XXXX(MATERIAL_CONST(4));
805
806 struct ureg_src cLKind = _XXXX(LIGHT_CONST(0));
807 struct ureg_src cLAtt0 = _YYYY(LIGHT_CONST(0));
808 struct ureg_src cLAtt1 = _ZZZZ(LIGHT_CONST(0));
809 struct ureg_src cLAtt2 = _WWWW(LIGHT_CONST(0));
810 struct ureg_src cLColD = _XYZW(LIGHT_CONST(1));
811 struct ureg_src cLColS = _XYZW(LIGHT_CONST(2));
812 struct ureg_src cLColA = _XYZW(LIGHT_CONST(3));
813 struct ureg_src cLPos = _XYZW(LIGHT_CONST(4));
814 struct ureg_src cLRng = _WWWW(LIGHT_CONST(4));
815 struct ureg_src cLDir = _XYZW(LIGHT_CONST(5));
816 struct ureg_src cLFOff = _WWWW(LIGHT_CONST(5));
817 struct ureg_src cLTht = _XXXX(LIGHT_CONST(6));
818 struct ureg_src cLPhi = _YYYY(LIGHT_CONST(6));
819 struct ureg_src cLSDiv = _ZZZZ(LIGHT_CONST(6));
820 struct ureg_src cLLast = _WWWW(LIGHT_CONST(7));
821
822 const unsigned loop_label = l++;
823
824 /* Declare all light constants to allow indirect adressing */
825 for (i = 32; i < 96; i++)
826 ureg_DECL_constant(ureg, i);
827
828 ureg_MOV(ureg, rCtr, ureg_imm1f(ureg, 32.0f)); /* &lightconst(0) */
829 ureg_MOV(ureg, rD, ureg_imm1f(ureg, 0.0f));
830 ureg_MOV(ureg, rA, ureg_imm1f(ureg, 0.0f));
831 ureg_MOV(ureg, rS, ureg_imm1f(ureg, 0.0f));
832
833 /* loop management */
834 ureg_BGNLOOP(ureg, &label[loop_label]);
835 ureg_ARL(ureg, AL, _W(rCtr));
836
837 /* if (not DIRECTIONAL light): */
838 ureg_SNE(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_DIRECTIONAL));
839 ureg_MOV(ureg, rHit, ureg_negate(cLDir));
840 ureg_MOV(ureg, rAtt, ureg_imm1f(ureg, 1.0f));
841 ureg_IF(ureg, _X(tmp), &label[l++]);
842 {
843 /* hitDir = light.position - eyeVtx
844 * d = length(hitDir)
845 */
846 ureg_ADD(ureg, rHit, cLPos, ureg_negate(vs->aVtx));
847 ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit));
848 ureg_RSQ(ureg, tmp_y, _X(tmp));
849 ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */
850
851 /* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */
852 ureg_MAD(ureg, rAtt, _X(tmp), cLAtt2, cLAtt1);
853 ureg_MAD(ureg, rAtt, _X(tmp), _W(rAtt), cLAtt0);
854 ureg_RCP(ureg, rAtt, _W(rAtt));
855 /* cut-off if distance exceeds Light.Range */
856 ureg_SLT(ureg, tmp_x, _X(tmp), cLRng);
857 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
858 }
859 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
860 ureg_ENDIF(ureg);
861
862 /* normalize hitDir */
863 ureg_normalize3(ureg, rHit, ureg_src(rHit));
864
865 /* if (SPOT light) */
866 ureg_SEQ(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_SPOT));
867 ureg_IF(ureg, _X(tmp), &label[l++]);
868 {
869 /* rho = dp3(-hitDir, light.spotDir)
870 *
871 * if (rho > light.ctht2) NOTE: 0 <= phi <= pi, 0 <= theta <= phi
872 * spotAtt = 1
873 * else
874 * if (rho <= light.cphi2)
875 * spotAtt = 0
876 * else
877 * spotAtt = (rho - light.cphi2) / (light.ctht2 - light.cphi2) ^ light.falloff
878 */
879 ureg_DP3(ureg, tmp_y, ureg_negate(ureg_src(rHit)), cLDir); /* rho */
880 ureg_ADD(ureg, tmp_x, _Y(tmp), ureg_negate(cLPhi));
881 ureg_MUL(ureg, tmp_x, _X(tmp), cLSDiv);
882 ureg_POW(ureg, tmp_x, _X(tmp), cLFOff); /* spotAtten */
883 ureg_SGE(ureg, tmp_z, _Y(tmp), cLTht); /* if inside theta && phi */
884 ureg_SGE(ureg, tmp_y, _Y(tmp), cLPhi); /* if inside phi */
885 ureg_MAD(ureg, ureg_saturate(tmp_x), _X(tmp), _Y(tmp), _Z(tmp));
886 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
887 }
888 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
889 ureg_ENDIF(ureg);
890
891 /* directional factors, let's not use LIT because of clarity */
892
893 if (has_aNrm) {
894 if (key->localviewer) {
895 ureg_normalize3(ureg, rMid, vs->aVtx);
896 ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_negate(ureg_src(rMid)));
897 } else {
898 ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, -1.0f));
899 }
900 ureg_normalize3(ureg, rMid, ureg_src(rMid));
901 ureg_DP3(ureg, ureg_saturate(tmp_x), vs->aNrm, ureg_src(rHit));
902 ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid));
903 ureg_MUL(ureg, tmp_z, _X(tmp), _Y(tmp));
904 /* Tests show that specular is computed only if (dp3(normal,hitDir) > 0).
905 * For front facing, it is more restrictive than test (dp3(normal,mid) > 0).
906 * No tests were made for backfacing, so add the two conditions */
907 ureg_IF(ureg, _Z(tmp), &label[l++]);
908 {
909 ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid));
910 ureg_POW(ureg, tmp_y, _Y(tmp), mtlP);
911 ureg_MUL(ureg, tmp_y, _W(rAtt), _Y(tmp)); /* power factor * att */
912 ureg_MAD(ureg, rS, cLColS, _Y(tmp), ureg_src(rS)); /* accumulate specular */
913 }
914 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
915 ureg_ENDIF(ureg);
916
917 ureg_MUL(ureg, tmp_x, _W(rAtt), _X(tmp)); /* dp3(normal,hitDir) * att */
918 ureg_MAD(ureg, rD, cLColD, _X(tmp), ureg_src(rD)); /* accumulate diffuse */
919 }
920
921 ureg_MAD(ureg, rA, cLColA, _W(rAtt), ureg_src(rA)); /* accumulate ambient */
922
923 /* break if this was the last light */
924 ureg_IF(ureg, cLLast, &label[l++]);
925 ureg_BRK(ureg);
926 ureg_ENDIF(ureg);
927 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
928
929 ureg_ADD(ureg, rCtr, _W(rCtr), ureg_imm1f(ureg, 8.0f));
930 ureg_fixup_label(ureg, label[loop_label], ureg_get_instruction_number(ureg));
931 ureg_ENDLOOP(ureg, &label[loop_label]);
932
933 /* Apply to material:
934 *
935 * oCol[0] = (material.emissive + material.ambient * rs.ambient) +
936 * material.ambient * ambient +
937 * material.diffuse * diffuse +
938 * oCol[1] = material.specular * specular;
939 */
940 if (key->mtl_emissive == 0 && key->mtl_ambient == 0)
941 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), vs->mtlA, _CONST(19));
942 else {
943 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), _CONST(25));
944 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE);
945 }
946
947 ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), ureg_src(rD), vs->mtlD, ureg_src(tmp));
948 ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD);
949 ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS);
950 ureg_release_temporary(ureg, rAtt);
951 ureg_release_temporary(ureg, rHit);
952 ureg_release_temporary(ureg, rMid);
953 ureg_release_temporary(ureg, rCtr);
954 ureg_release_temporary(ureg, rD);
955 ureg_release_temporary(ureg, rA);
956 ureg_release_temporary(ureg, rS);
957 ureg_release_temporary(ureg, rAtt);
958 ureg_release_temporary(ureg, tmp);
959 } else
960 /* COLOR */
961 if (key->darkness) {
962 if (key->mtl_emissive == 0 && key->mtl_ambient == 0)
963 ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), _CONST(19));
964 else
965 ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE);
966 ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD);
967 ureg_MOV(ureg, oCol[1], ureg_imm1f(ureg, 0.0f));
968 } else {
969 ureg_MOV(ureg, oCol[0], vs->aCol[0]);
970 ureg_MOV(ureg, oCol[1], vs->aCol[1]);
971 }
972
973 /* === Process fog.
974 *
975 * exp(x) = ex2(log2(e) * x)
976 */
977 if (key->fog_mode) {
978 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
979 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
980 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
981 if (key->fog_range) {
982 ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx);
983 ureg_RSQ(ureg, tmp_z, _X(tmp));
984 ureg_MUL(ureg, tmp_z, _Z(tmp), _X(tmp));
985 } else {
986 ureg_MOV(ureg, tmp_z, ureg_abs(_ZZZZ(vs->aVtx)));
987 }
988
989 if (key->fog_mode == D3DFOG_EXP) {
990 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
991 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
992 ureg_EX2(ureg, tmp_x, _X(tmp));
993 } else
994 if (key->fog_mode == D3DFOG_EXP2) {
995 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
996 ureg_MUL(ureg, tmp_x, _X(tmp), _X(tmp));
997 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
998 ureg_EX2(ureg, tmp_x, _X(tmp));
999 } else
1000 if (key->fog_mode == D3DFOG_LINEAR) {
1001 ureg_ADD(ureg, tmp_x, _XXXX(_CONST(28)), ureg_negate(_Z(tmp)));
1002 ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28)));
1003 }
1004 ureg_MOV(ureg, oFog, _X(tmp));
1005 ureg_release_temporary(ureg, tmp);
1006 } else if (key->fog && !(key->passthrough & (1 << NINE_DECLUSAGE_FOG))) {
1007 ureg_MOV(ureg, oFog, ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W));
1008 }
1009
1010 if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) {
1011 struct ureg_src input;
1012 struct ureg_dst output;
1013 input = vs->aWgt;
1014 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 19);
1015 ureg_MOV(ureg, output, input);
1016 }
1017 if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) {
1018 struct ureg_src input;
1019 struct ureg_dst output;
1020 input = vs->aInd;
1021 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 20);
1022 ureg_MOV(ureg, output, input);
1023 }
1024 if (key->passthrough & (1 << NINE_DECLUSAGE_NORMAL)) {
1025 struct ureg_src input;
1026 struct ureg_dst output;
1027 input = vs->aNrm;
1028 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 21);
1029 ureg_MOV(ureg, output, input);
1030 }
1031 if (key->passthrough & (1 << NINE_DECLUSAGE_TANGENT)) {
1032 struct ureg_src input;
1033 struct ureg_dst output;
1034 input = build_vs_add_input(vs, NINE_DECLUSAGE_TANGENT);
1035 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 22);
1036 ureg_MOV(ureg, output, input);
1037 }
1038 if (key->passthrough & (1 << NINE_DECLUSAGE_BINORMAL)) {
1039 struct ureg_src input;
1040 struct ureg_dst output;
1041 input = build_vs_add_input(vs, NINE_DECLUSAGE_BINORMAL);
1042 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 23);
1043 ureg_MOV(ureg, output, input);
1044 }
1045 if (key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
1046 struct ureg_src input;
1047 struct ureg_dst output;
1048 input = build_vs_add_input(vs, NINE_DECLUSAGE_FOG);
1049 input = ureg_scalar(input, TGSI_SWIZZLE_X);
1050 output = oFog;
1051 ureg_MOV(ureg, output, input);
1052 }
1053 if (key->passthrough & (1 << NINE_DECLUSAGE_DEPTH)) {
1054 (void) 0; /* TODO: replace z of position output ? */
1055 }
1056
1057 /* ucp for ff applies on world coordinates.
1058 * aVtx is in worldview coordinates. */
1059 if (key->ucp) {
1060 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1061 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(12));
1062 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(13), ureg_src(tmp));
1063 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(14), ureg_src(tmp));
1064 if (!key->clipplane_emulate) {
1065 struct ureg_dst clipVect = ureg_DECL_output(ureg, TGSI_SEMANTIC_CLIPVERTEX, 0);
1066 ureg_ADD(ureg, clipVect, _CONST(15), ureg_src(tmp));
1067 } else {
1068 struct ureg_dst clipdist[2] = {ureg_dst_undef(), ureg_dst_undef()};
1069 int num_clipdist = ffs(key->clipplane_emulate);
1070 ureg_ADD(ureg, tmp, _CONST(15), ureg_src(tmp));
1071 clipdist[0] = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_CLIPDIST, 0,
1072 ((1 << num_clipdist) - 1) & 0xf, 0, 1);
1073 if (num_clipdist >= 5)
1074 clipdist[1] = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_CLIPDIST, 1,
1075 ((1 << (num_clipdist - 4)) - 1) & 0xf, 0, 1);
1076 ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED, num_clipdist);
1077 for (i = 0; i < num_clipdist; i++) {
1078 assert(!ureg_dst_is_undef(clipdist[i>>2]));
1079 if (!(key->clipplane_emulate & (1 << i)))
1080 ureg_MOV(ureg, ureg_writemask(clipdist[i>>2], 1 << (i & 0x2)), ureg_imm1f(ureg, 0.f));
1081 else
1082 ureg_DP4(ureg, ureg_writemask(clipdist[i>>2], 1 << (i & 0x2)),
1083 ureg_src(tmp), _CONST(196+i));
1084 }
1085 }
1086 ureg_release_temporary(ureg, tmp);
1087 }
1088
1089 if (key->position_t && device->driver_caps.window_space_position_support)
1090 ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, true);
1091
1092 ureg_END(ureg);
1093 nine_ureg_tgsi_dump(ureg, false);
1094 return nine_create_shader_with_so_and_destroy(ureg, device->context.pipe, NULL);
1095 }
1096
1097 /* PS FF constants layout:
1098 *
1099 * CONST[ 0.. 7] stage[i].D3DTSS_CONSTANT
1100 * CONST[ 8..15].x___ stage[i].D3DTSS_BUMPENVMAT00
1101 * CONST[ 8..15]._y__ stage[i].D3DTSS_BUMPENVMAT01
1102 * CONST[ 8..15].__z_ stage[i].D3DTSS_BUMPENVMAT10
1103 * CONST[ 8..15].___w stage[i].D3DTSS_BUMPENVMAT11
1104 * CONST[16..19].x_z_ stage[i].D3DTSS_BUMPENVLSCALE
1105 * CONST[17..19]._y_w stage[i].D3DTSS_BUMPENVLOFFSET
1106 *
1107 * CONST[20] D3DRS_TEXTUREFACTOR
1108 * CONST[21] D3DRS_FOGCOLOR
1109 * CONST[22].x___ RS.FogEnd
1110 * CONST[22]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
1111 * CONST[22].__z_ RS.FogDensity
1112 * CONST[22].___w Alpha ref
1113 */
1114 struct ps_build_ctx
1115 {
1116 struct ureg_program *ureg;
1117 unsigned color_interpolate_flag;
1118
1119 struct ureg_src vC[2]; /* DIFFUSE, SPECULAR */
1120 struct ureg_src vT[8]; /* TEXCOORD[i] */
1121 struct ureg_dst rCur; /* D3DTA_CURRENT */
1122 struct ureg_dst rMod;
1123 struct ureg_src rCurSrc;
1124 struct ureg_dst rTmp; /* D3DTA_TEMP */
1125 struct ureg_src rTmpSrc;
1126 struct ureg_dst rTex;
1127 struct ureg_src rTexSrc;
1128 struct ureg_src cBEM[8];
1129 struct ureg_src s[8];
1130
1131 struct {
1132 unsigned index;
1133 unsigned index_pre_mod;
1134 } stage;
1135 };
1136
1137 static struct ureg_src
ps_get_ts_arg(struct ps_build_ctx * ps,unsigned ta)1138 ps_get_ts_arg(struct ps_build_ctx *ps, unsigned ta)
1139 {
1140 struct ureg_src reg;
1141
1142 switch (ta & D3DTA_SELECTMASK) {
1143 case D3DTA_CONSTANT:
1144 reg = ureg_DECL_constant(ps->ureg, ps->stage.index);
1145 break;
1146 case D3DTA_CURRENT:
1147 reg = (ps->stage.index == ps->stage.index_pre_mod) ? ureg_src(ps->rMod) : ps->rCurSrc;
1148 break;
1149 case D3DTA_DIFFUSE:
1150 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, ps->color_interpolate_flag);
1151 break;
1152 case D3DTA_SPECULAR:
1153 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, ps->color_interpolate_flag);
1154 break;
1155 case D3DTA_TEMP:
1156 reg = ps->rTmpSrc;
1157 break;
1158 case D3DTA_TEXTURE:
1159 reg = ps->rTexSrc;
1160 break;
1161 case D3DTA_TFACTOR:
1162 reg = ureg_DECL_constant(ps->ureg, 20);
1163 break;
1164 default:
1165 assert(0);
1166 reg = ureg_src_undef();
1167 break;
1168 }
1169 if (ta & D3DTA_COMPLEMENT) {
1170 struct ureg_dst dst = ureg_DECL_temporary(ps->ureg);
1171 ureg_ADD(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), ureg_negate(reg));
1172 reg = ureg_src(dst);
1173 }
1174 if (ta & D3DTA_ALPHAREPLICATE)
1175 reg = _WWWW(reg);
1176 return reg;
1177 }
1178
1179 static struct ureg_dst
ps_get_ts_dst(struct ps_build_ctx * ps,unsigned ta)1180 ps_get_ts_dst(struct ps_build_ctx *ps, unsigned ta)
1181 {
1182 assert(!(ta & (D3DTA_COMPLEMENT | D3DTA_ALPHAREPLICATE)));
1183
1184 switch (ta & D3DTA_SELECTMASK) {
1185 case D3DTA_CURRENT:
1186 return ps->rCur;
1187 case D3DTA_TEMP:
1188 return ps->rTmp;
1189 default:
1190 assert(0);
1191 return ureg_dst_undef();
1192 }
1193 }
1194
ps_d3dtop_args_mask(D3DTEXTUREOP top)1195 static uint8_t ps_d3dtop_args_mask(D3DTEXTUREOP top)
1196 {
1197 switch (top) {
1198 case D3DTOP_DISABLE:
1199 return 0x0;
1200 case D3DTOP_SELECTARG1:
1201 case D3DTOP_PREMODULATE:
1202 return 0x2;
1203 case D3DTOP_SELECTARG2:
1204 return 0x4;
1205 case D3DTOP_MULTIPLYADD:
1206 case D3DTOP_LERP:
1207 return 0x7;
1208 default:
1209 return 0x6;
1210 }
1211 }
1212
1213 static inline bool
is_MOV_no_op(struct ureg_dst dst,struct ureg_src src)1214 is_MOV_no_op(struct ureg_dst dst, struct ureg_src src)
1215 {
1216 return !dst.WriteMask ||
1217 (dst.File == src.File &&
1218 dst.Index == src.Index &&
1219 !dst.Indirect &&
1220 !dst.Saturate &&
1221 !src.Indirect &&
1222 !src.Negate &&
1223 !src.Absolute &&
1224 (!(dst.WriteMask & TGSI_WRITEMASK_X) || (src.SwizzleX == TGSI_SWIZZLE_X)) &&
1225 (!(dst.WriteMask & TGSI_WRITEMASK_Y) || (src.SwizzleY == TGSI_SWIZZLE_Y)) &&
1226 (!(dst.WriteMask & TGSI_WRITEMASK_Z) || (src.SwizzleZ == TGSI_SWIZZLE_Z)) &&
1227 (!(dst.WriteMask & TGSI_WRITEMASK_W) || (src.SwizzleW == TGSI_SWIZZLE_W)));
1228
1229 }
1230
1231 static void
ps_do_ts_op(struct ps_build_ctx * ps,unsigned top,struct ureg_dst dst,struct ureg_src * arg)1232 ps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct ureg_src *arg)
1233 {
1234 struct ureg_program *ureg = ps->ureg;
1235 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1236 struct ureg_dst tmp2 = ureg_DECL_temporary(ureg);
1237 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
1238
1239 tmp.WriteMask = dst.WriteMask;
1240
1241 if (top != D3DTOP_SELECTARG1 && top != D3DTOP_SELECTARG2 &&
1242 top != D3DTOP_MODULATE && top != D3DTOP_PREMODULATE &&
1243 top != D3DTOP_BLENDDIFFUSEALPHA && top != D3DTOP_BLENDTEXTUREALPHA &&
1244 top != D3DTOP_BLENDFACTORALPHA && top != D3DTOP_BLENDCURRENTALPHA &&
1245 top != D3DTOP_BUMPENVMAP && top != D3DTOP_BUMPENVMAPLUMINANCE &&
1246 top != D3DTOP_LERP)
1247 dst = ureg_saturate(dst);
1248
1249 switch (top) {
1250 case D3DTOP_SELECTARG1:
1251 if (!is_MOV_no_op(dst, arg[1]))
1252 ureg_MOV(ureg, dst, arg[1]);
1253 break;
1254 case D3DTOP_SELECTARG2:
1255 if (!is_MOV_no_op(dst, arg[2]))
1256 ureg_MOV(ureg, dst, arg[2]);
1257 break;
1258 case D3DTOP_MODULATE:
1259 ureg_MUL(ureg, dst, arg[1], arg[2]);
1260 break;
1261 case D3DTOP_MODULATE2X:
1262 ureg_MUL(ureg, tmp, arg[1], arg[2]);
1263 ureg_ADD(ureg, dst, ureg_src(tmp), ureg_src(tmp));
1264 break;
1265 case D3DTOP_MODULATE4X:
1266 ureg_MUL(ureg, tmp, arg[1], arg[2]);
1267 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 4.0f));
1268 break;
1269 case D3DTOP_ADD:
1270 ureg_ADD(ureg, dst, arg[1], arg[2]);
1271 break;
1272 case D3DTOP_ADDSIGNED:
1273 ureg_ADD(ureg, tmp, arg[1], arg[2]);
1274 ureg_ADD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, -0.5f));
1275 break;
1276 case D3DTOP_ADDSIGNED2X:
1277 ureg_ADD(ureg, tmp, arg[1], arg[2]);
1278 ureg_MAD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1279 break;
1280 case D3DTOP_SUBTRACT:
1281 ureg_ADD(ureg, dst, arg[1], ureg_negate(arg[2]));
1282 break;
1283 case D3DTOP_ADDSMOOTH:
1284 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1]));
1285 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], arg[1]);
1286 break;
1287 case D3DTOP_BLENDDIFFUSEALPHA:
1288 ureg_LRP(ureg, dst, _WWWW(ps->vC[0]), arg[1], arg[2]);
1289 break;
1290 case D3DTOP_BLENDTEXTUREALPHA:
1291 /* XXX: alpha taken from previous stage, texture or result ? */
1292 ureg_LRP(ureg, dst, _W(ps->rTex), arg[1], arg[2]);
1293 break;
1294 case D3DTOP_BLENDFACTORALPHA:
1295 ureg_LRP(ureg, dst, _WWWW(_CONST(20)), arg[1], arg[2]);
1296 break;
1297 case D3DTOP_BLENDTEXTUREALPHAPM:
1298 ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_W(ps->rTex)));
1299 ureg_MAD(ureg, dst, arg[2], _X(tmp), arg[1]);
1300 break;
1301 case D3DTOP_BLENDCURRENTALPHA:
1302 ureg_LRP(ureg, dst, _WWWW(ps->rCurSrc), arg[1], arg[2]);
1303 break;
1304 case D3DTOP_PREMODULATE:
1305 ureg_MOV(ureg, dst, arg[1]);
1306 ps->stage.index_pre_mod = ps->stage.index + 1;
1307 break;
1308 case D3DTOP_MODULATEALPHA_ADDCOLOR:
1309 ureg_MAD(ureg, dst, _WWWW(arg[1]), arg[2], arg[1]);
1310 break;
1311 case D3DTOP_MODULATECOLOR_ADDALPHA:
1312 ureg_MAD(ureg, dst, arg[1], arg[2], _WWWW(arg[1]));
1313 break;
1314 case D3DTOP_MODULATEINVALPHA_ADDCOLOR:
1315 ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_WWWW(arg[1])));
1316 ureg_MAD(ureg, dst, _X(tmp), arg[2], arg[1]);
1317 break;
1318 case D3DTOP_MODULATEINVCOLOR_ADDALPHA:
1319 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1]));
1320 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], _WWWW(arg[1]));
1321 break;
1322 case D3DTOP_BUMPENVMAP:
1323 break;
1324 case D3DTOP_BUMPENVMAPLUMINANCE:
1325 break;
1326 case D3DTOP_DOTPRODUCT3:
1327 ureg_ADD(ureg, tmp, arg[1], ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5));
1328 ureg_ADD(ureg, tmp2, arg[2] , ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5));
1329 ureg_DP3(ureg, tmp, ureg_src(tmp), ureg_src(tmp2));
1330 ureg_MUL(ureg, ureg_saturate(dst), ureg_src(tmp), ureg_imm4f(ureg,4.0,4.0,4.0,4.0));
1331 break;
1332 case D3DTOP_MULTIPLYADD:
1333 ureg_MAD(ureg, dst, arg[1], arg[2], arg[0]);
1334 break;
1335 case D3DTOP_LERP:
1336 ureg_LRP(ureg, dst, arg[0], arg[1], arg[2]);
1337 break;
1338 case D3DTOP_DISABLE:
1339 /* no-op ? */
1340 break;
1341 default:
1342 assert(!"invalid D3DTOP");
1343 break;
1344 }
1345 ureg_release_temporary(ureg, tmp);
1346 ureg_release_temporary(ureg, tmp2);
1347 }
1348
1349 static void *
nine_ff_build_ps(struct NineDevice9 * device,struct nine_ff_ps_key * key)1350 nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
1351 {
1352 struct ps_build_ctx ps;
1353 struct ureg_program *ureg = ureg_create(PIPE_SHADER_FRAGMENT);
1354 struct ureg_dst oCol;
1355 unsigned s;
1356 const unsigned texcoord_sn = get_texcoord_sn(device->screen);
1357
1358 memset(&ps, 0, sizeof(ps));
1359 ps.ureg = ureg;
1360 ps.color_interpolate_flag = key->flatshade ? TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
1361 ps.stage.index_pre_mod = -1;
1362
1363 ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, ps.color_interpolate_flag);
1364
1365 ps.rCur = ureg_DECL_temporary(ureg);
1366 ps.rTmp = ureg_DECL_temporary(ureg);
1367 ps.rTex = ureg_DECL_temporary(ureg);
1368 ps.rCurSrc = ureg_src(ps.rCur);
1369 ps.rTmpSrc = ureg_src(ps.rTmp);
1370 ps.rTexSrc = ureg_src(ps.rTex);
1371
1372 /* Initial values */
1373 ureg_MOV(ureg, ps.rCur, ps.vC[0]);
1374 ureg_MOV(ureg, ps.rTmp, ureg_imm1f(ureg, 0.0f));
1375 ureg_MOV(ureg, ps.rTex, ureg_imm1f(ureg, 0.0f));
1376
1377 for (s = 0; s < 8; ++s) {
1378 ps.s[s] = ureg_src_undef();
1379
1380 if (key->ts[s].colorop != D3DTOP_DISABLE) {
1381 if (key->ts[s].colorarg0 == D3DTA_SPECULAR ||
1382 key->ts[s].colorarg1 == D3DTA_SPECULAR ||
1383 key->ts[s].colorarg2 == D3DTA_SPECULAR)
1384 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, ps.color_interpolate_flag);
1385
1386 if (key->ts[s].colorarg0 == D3DTA_TEXTURE ||
1387 key->ts[s].colorarg1 == D3DTA_TEXTURE ||
1388 key->ts[s].colorarg2 == D3DTA_TEXTURE ||
1389 key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHA ||
1390 key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHAPM) {
1391 ps.s[s] = ureg_DECL_sampler(ureg, s);
1392 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1393 }
1394 if (s && (key->ts[s - 1].colorop == D3DTOP_PREMODULATE ||
1395 key->ts[s - 1].alphaop == D3DTOP_PREMODULATE))
1396 ps.s[s] = ureg_DECL_sampler(ureg, s);
1397 }
1398
1399 if (key->ts[s].alphaop != D3DTOP_DISABLE) {
1400 if (key->ts[s].alphaarg0 == D3DTA_SPECULAR ||
1401 key->ts[s].alphaarg1 == D3DTA_SPECULAR ||
1402 key->ts[s].alphaarg2 == D3DTA_SPECULAR)
1403 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, ps.color_interpolate_flag);
1404
1405 if (key->ts[s].alphaarg0 == D3DTA_TEXTURE ||
1406 key->ts[s].alphaarg1 == D3DTA_TEXTURE ||
1407 key->ts[s].alphaarg2 == D3DTA_TEXTURE ||
1408 key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHA ||
1409 key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHAPM) {
1410 ps.s[s] = ureg_DECL_sampler(ureg, s);
1411 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1412 }
1413 }
1414 }
1415 if (key->specular)
1416 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, ps.color_interpolate_flag);
1417
1418 oCol = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
1419
1420 /* Run stages.
1421 */
1422 for (s = 0; s < 8; ++s) {
1423 unsigned colorarg[3];
1424 unsigned alphaarg[3];
1425 const uint8_t used_c = ps_d3dtop_args_mask(key->ts[s].colorop);
1426 const uint8_t used_a = ps_d3dtop_args_mask(key->ts[s].alphaop);
1427 struct ureg_dst dst;
1428 struct ureg_src arg[3];
1429
1430 if (key->ts[s].colorop == D3DTOP_DISABLE) {
1431 assert (key->ts[s].alphaop == D3DTOP_DISABLE);
1432 continue;
1433 }
1434 ps.stage.index = s;
1435
1436 DBG("STAGE[%u]: colorop=%s alphaop=%s\n", s,
1437 nine_D3DTOP_to_str(key->ts[s].colorop),
1438 nine_D3DTOP_to_str(key->ts[s].alphaop));
1439
1440 if (!ureg_src_is_undef(ps.s[s])) {
1441 unsigned target;
1442 struct ureg_src texture_coord = ps.vT[s];
1443 struct ureg_dst delta;
1444 switch (key->ts[s].textarget) {
1445 case 0: target = TGSI_TEXTURE_1D; break;
1446 case 1: target = TGSI_TEXTURE_2D; break;
1447 case 2: target = TGSI_TEXTURE_3D; break;
1448 case 3: target = TGSI_TEXTURE_CUBE; break;
1449 /* this is a 2 bit bitfield, do I really need a default case ? */
1450 }
1451
1452 /* Modify coordinates */
1453 if (s >= 1 &&
1454 (key->ts[s-1].colorop == D3DTOP_BUMPENVMAP ||
1455 key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)) {
1456 delta = ureg_DECL_temporary(ureg);
1457 /* Du' = D3DTSS_BUMPENVMAT00(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT10(stage s-1)*t(s-1)G */
1458 ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _X(ps.rTex), _XXXX(_CONST(8 + s - 1)));
1459 ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _Y(ps.rTex), _ZZZZ(_CONST(8 + s - 1)), ureg_src(delta));
1460 /* Dv' = D3DTSS_BUMPENVMAT01(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT11(stage s-1)*t(s-1)G */
1461 ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _X(ps.rTex), _YYYY(_CONST(8 + s - 1)));
1462 ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _Y(ps.rTex), _WWWW(_CONST(8 + s - 1)), ureg_src(delta));
1463 texture_coord = ureg_src(ureg_DECL_temporary(ureg));
1464 ureg_MOV(ureg, ureg_writemask(ureg_dst(texture_coord), ureg_dst(ps.vT[s]).WriteMask), ps.vT[s]);
1465 ureg_ADD(ureg, ureg_writemask(ureg_dst(texture_coord), TGSI_WRITEMASK_XY), texture_coord, ureg_src(delta));
1466 /* Prepare luminance multiplier
1467 * t(s)RGBA = t(s)RGBA * clamp[(t(s-1)B * D3DTSS_BUMPENVLSCALE(stage s-1)) + D3DTSS_BUMPENVLOFFSET(stage s-1)] */
1468 if (key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE) {
1469 struct ureg_src bumpenvlscale = ((s-1) & 1) ? _ZZZZ(_CONST(16 + (s-1) / 2)) : _XXXX(_CONST(16 + (s-1) / 2));
1470 struct ureg_src bumpenvloffset = ((s-1) & 1) ? _WWWW(_CONST(16 + (s-1) / 2)) : _YYYY(_CONST(16 + (s-1) / 2));
1471
1472 ureg_MAD(ureg, ureg_saturate(ureg_writemask(delta, TGSI_WRITEMASK_X)), _Z(ps.rTex), bumpenvlscale, bumpenvloffset);
1473 }
1474 }
1475 if (key->projected & (3 << (s *2))) {
1476 unsigned dim = 1 + ((key->projected >> (2 * s)) & 3);
1477 if (dim == 4)
1478 ureg_TXP(ureg, ps.rTex, target, texture_coord, ps.s[s]);
1479 else {
1480 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1481 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(texture_coord, dim-1));
1482 ureg_MUL(ureg, ps.rTmp, _X(tmp), texture_coord);
1483 ureg_TEX(ureg, ps.rTex, target, ps.rTmpSrc, ps.s[s]);
1484 ureg_release_temporary(ureg, tmp);
1485 }
1486 } else {
1487 ureg_TEX(ureg, ps.rTex, target, texture_coord, ps.s[s]);
1488 }
1489 if (s >= 1 && key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)
1490 ureg_MUL(ureg, ps.rTex, ureg_src(ps.rTex), _X(delta));
1491 }
1492
1493 if (key->ts[s].colorop == D3DTOP_BUMPENVMAP ||
1494 key->ts[s].colorop == D3DTOP_BUMPENVMAPLUMINANCE)
1495 continue;
1496
1497 dst = ps_get_ts_dst(&ps, key->ts[s].resultarg ? D3DTA_TEMP : D3DTA_CURRENT);
1498
1499 if (ps.stage.index_pre_mod == ps.stage.index) {
1500 ps.rMod = ureg_DECL_temporary(ureg);
1501 ureg_MUL(ureg, ps.rMod, ps.rCurSrc, ps.rTexSrc);
1502 }
1503
1504 colorarg[0] = (key->ts[s].colorarg0 | (((key->colorarg_b4[0] >> s) & 0x1) << 4) | ((key->colorarg_b5[0] >> s) << 5)) & 0x3f;
1505 colorarg[1] = (key->ts[s].colorarg1 | (((key->colorarg_b4[1] >> s) & 0x1) << 4) | ((key->colorarg_b5[1] >> s) << 5)) & 0x3f;
1506 colorarg[2] = (key->ts[s].colorarg2 | (((key->colorarg_b4[2] >> s) & 0x1) << 4) | ((key->colorarg_b5[2] >> s) << 5)) & 0x3f;
1507 alphaarg[0] = (key->ts[s].alphaarg0 | ((key->alphaarg_b4[0] >> s) << 4)) & 0x1f;
1508 alphaarg[1] = (key->ts[s].alphaarg1 | ((key->alphaarg_b4[1] >> s) << 4)) & 0x1f;
1509 alphaarg[2] = (key->ts[s].alphaarg2 | ((key->alphaarg_b4[2] >> s) << 4)) & 0x1f;
1510
1511 if (key->ts[s].colorop != key->ts[s].alphaop ||
1512 colorarg[0] != alphaarg[0] ||
1513 colorarg[1] != alphaarg[1] ||
1514 colorarg[2] != alphaarg[2])
1515 dst.WriteMask = TGSI_WRITEMASK_XYZ;
1516
1517 /* Special DOTPRODUCT behaviour (see wine tests) */
1518 if (key->ts[s].colorop == D3DTOP_DOTPRODUCT3)
1519 dst.WriteMask = TGSI_WRITEMASK_XYZW;
1520
1521 if (used_c & 0x1) arg[0] = ps_get_ts_arg(&ps, colorarg[0]);
1522 if (used_c & 0x2) arg[1] = ps_get_ts_arg(&ps, colorarg[1]);
1523 if (used_c & 0x4) arg[2] = ps_get_ts_arg(&ps, colorarg[2]);
1524 ps_do_ts_op(&ps, key->ts[s].colorop, dst, arg);
1525
1526 if (dst.WriteMask != TGSI_WRITEMASK_XYZW) {
1527 dst.WriteMask = TGSI_WRITEMASK_W;
1528
1529 if (used_a & 0x1) arg[0] = ps_get_ts_arg(&ps, alphaarg[0]);
1530 if (used_a & 0x2) arg[1] = ps_get_ts_arg(&ps, alphaarg[1]);
1531 if (used_a & 0x4) arg[2] = ps_get_ts_arg(&ps, alphaarg[2]);
1532 ps_do_ts_op(&ps, key->ts[s].alphaop, dst, arg);
1533 }
1534 }
1535
1536 if (key->specular)
1537 ureg_ADD(ureg, ureg_writemask(ps.rCur, TGSI_WRITEMASK_XYZ), ps.rCurSrc, ps.vC[1]);
1538
1539 if (key->alpha_test_emulation == PIPE_FUNC_NEVER) {
1540 ureg_KILL(ureg);
1541 } else if (key->alpha_test_emulation != PIPE_FUNC_ALWAYS) {
1542 unsigned cmp_op;
1543 struct ureg_src src[2];
1544 struct ureg_dst tmp = ps.rTmp;
1545 cmp_op = pipe_comp_to_tgsi_opposite(key->alpha_test_emulation);
1546 src[0] = ureg_scalar(ps.rCurSrc, TGSI_SWIZZLE_W); /* Read color alpha channel */
1547 src[1] = _WWWW(_CONST(22)); /* Read alpha ref */
1548 ureg_insn(ureg, cmp_op, &tmp, 1, src, 2, 0);
1549 ureg_KILL_IF(ureg, ureg_negate(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X))); /* if opposite test passes, discard */
1550 }
1551
1552 /* Fog.
1553 */
1554 if (key->fog_mode) {
1555 struct ureg_dst rFog = ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X);
1556 struct ureg_src vPos;
1557 if (device->screen->get_param(device->screen,
1558 PIPE_CAP_FS_POSITION_IS_SYSVAL)) {
1559 vPos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
1560 } else {
1561 vPos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
1562 TGSI_INTERPOLATE_LINEAR);
1563 }
1564
1565 /* Source is either W or Z.
1566 * Z is when an orthogonal projection matrix is detected,
1567 * W (WFOG) else.
1568 */
1569 if (!key->fog_source)
1570 ureg_MOV(ureg, rFog, _ZZZZ(vPos));
1571 else
1572 /* Position's w is 1/w */
1573 ureg_RCP(ureg, rFog, _WWWW(vPos));
1574
1575 if (key->fog_mode == D3DFOG_EXP) {
1576 ureg_MUL(ureg, rFog, _X(rFog), _ZZZZ(_CONST(22)));
1577 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
1578 ureg_EX2(ureg, rFog, _X(rFog));
1579 } else
1580 if (key->fog_mode == D3DFOG_EXP2) {
1581 ureg_MUL(ureg, rFog, _X(rFog), _ZZZZ(_CONST(22)));
1582 ureg_MUL(ureg, rFog, _X(rFog), _X(rFog));
1583 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
1584 ureg_EX2(ureg, rFog, _X(rFog));
1585 } else
1586 if (key->fog_mode == D3DFOG_LINEAR) {
1587 ureg_ADD(ureg, rFog, _XXXX(_CONST(22)), ureg_negate(_X(rFog)));
1588 ureg_MUL(ureg, ureg_saturate(rFog), _X(rFog), _YYYY(_CONST(22)));
1589 }
1590 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _X(rFog), ps.rCurSrc, _CONST(21));
1591 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
1592 } else
1593 if (key->fog) {
1594 struct ureg_src vFog = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 16, TGSI_INTERPOLATE_PERSPECTIVE);
1595 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _XXXX(vFog), ps.rCurSrc, _CONST(21));
1596 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
1597 } else {
1598 ureg_MOV(ureg, oCol, ps.rCurSrc);
1599 }
1600
1601 ureg_END(ureg);
1602 nine_ureg_tgsi_dump(ureg, false);
1603 return nine_create_shader_with_so_and_destroy(ureg, device->context.pipe, NULL);
1604 }
1605
1606 static struct NineVertexShader9 *
nine_ff_get_vs(struct NineDevice9 * device)1607 nine_ff_get_vs(struct NineDevice9 *device)
1608 {
1609 const struct nine_context *context = &device->context;
1610 struct NineVertexShader9 *vs;
1611 struct vs_build_ctx bld;
1612 struct nine_ff_vs_key key;
1613 unsigned s, i;
1614 bool has_indexes = false;
1615 bool has_weights = false;
1616 int8_t input_texture_coord[8];
1617
1618 assert(sizeof(key) <= sizeof(key.value32));
1619
1620 memset(&key, 0, sizeof(key));
1621 memset(&bld, 0, sizeof(bld));
1622 memset(&input_texture_coord, 0, sizeof(input_texture_coord));
1623
1624 bld.key = &key;
1625
1626 /* FIXME: this shouldn't be NULL, but it is on init */
1627 if (context->vdecl) {
1628 key.color0in_one = 1;
1629 key.color1in_zero = 1;
1630 for (i = 0; i < context->vdecl->nelems; i++) {
1631 uint16_t usage = context->vdecl->usage_map[i];
1632 if (usage == NINE_DECLUSAGE_POSITIONT)
1633 key.position_t = 1;
1634 else if (usage == NINE_DECLUSAGE_i(COLOR, 0))
1635 key.color0in_one = 0;
1636 else if (usage == NINE_DECLUSAGE_i(COLOR, 1))
1637 key.color1in_zero = 0;
1638 else if (usage == NINE_DECLUSAGE_i(BLENDINDICES, 0)) {
1639 has_indexes = true;
1640 key.passthrough |= 1 << usage;
1641 } else if (usage == NINE_DECLUSAGE_i(BLENDWEIGHT, 0)) {
1642 has_weights = true;
1643 key.passthrough |= 1 << usage;
1644 } else if (usage == NINE_DECLUSAGE_i(NORMAL, 0)) {
1645 key.has_normal = 1;
1646 key.passthrough |= 1 << usage;
1647 } else if (usage == NINE_DECLUSAGE_PSIZE)
1648 key.vertexpointsize = 1;
1649 else if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) {
1650 s = usage / NINE_DECLUSAGE_COUNT;
1651 if (s < 8)
1652 input_texture_coord[s] = nine_decltype_get_dim(context->vdecl->decls[i].Type);
1653 else
1654 DBG("FF given texture coordinate >= 8. Ignoring\n");
1655 } else if (usage < NINE_DECLUSAGE_NONE)
1656 key.passthrough |= 1 << usage;
1657 }
1658 }
1659 /* ff vs + ps 3.0: some elements are passed to the ps (wine test).
1660 * We do restrict to indices 0 */
1661 key.passthrough &= ~((1 << NINE_DECLUSAGE_POSITION) | (1 << NINE_DECLUSAGE_PSIZE) |
1662 (1 << NINE_DECLUSAGE_TEXCOORD) | (1 << NINE_DECLUSAGE_POSITIONT) |
1663 (1 << NINE_DECLUSAGE_TESSFACTOR) | (1 << NINE_DECLUSAGE_SAMPLE));
1664 if (!key.position_t)
1665 key.passthrough = 0;
1666 key.pointscale = !!context->rs[D3DRS_POINTSCALEENABLE];
1667
1668 key.lighting = !!context->rs[D3DRS_LIGHTING] && context->ff.num_lights_active;
1669 key.darkness = !!context->rs[D3DRS_LIGHTING] && !context->ff.num_lights_active;
1670 if (key.position_t) {
1671 key.darkness = 0; /* |= key.lighting; */ /* XXX ? */
1672 key.lighting = 0;
1673 }
1674 if ((key.lighting | key.darkness) && context->rs[D3DRS_COLORVERTEX]) {
1675 uint32_t mask = (key.color0in_one ? 0 : 1) | (key.color1in_zero ? 0 : 2);
1676 key.mtl_diffuse = context->rs[D3DRS_DIFFUSEMATERIALSOURCE] & mask;
1677 key.mtl_ambient = context->rs[D3DRS_AMBIENTMATERIALSOURCE] & mask;
1678 key.mtl_specular = context->rs[D3DRS_SPECULARMATERIALSOURCE] & mask;
1679 key.mtl_emissive = context->rs[D3DRS_EMISSIVEMATERIALSOURCE] & mask;
1680 }
1681 key.fog = !!context->rs[D3DRS_FOGENABLE];
1682 key.fog_mode = (!key.position_t && context->rs[D3DRS_FOGENABLE]) ? context->rs[D3DRS_FOGVERTEXMODE] : 0;
1683 if (key.fog_mode)
1684 key.fog_range = context->rs[D3DRS_RANGEFOGENABLE];
1685
1686 key.localviewer = !!context->rs[D3DRS_LOCALVIEWER];
1687 key.normalizenormals = !!context->rs[D3DRS_NORMALIZENORMALS];
1688 key.ucp = !!context->rs[D3DRS_CLIPPLANEENABLE];
1689 key.clipplane_emulate = device->driver_caps.emulate_ucp ? (context->rs[D3DRS_CLIPPLANEENABLE] & 0xff) : 0;
1690
1691 if (context->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
1692 key.vertexblend_indexed = !!context->rs[D3DRS_INDEXEDVERTEXBLENDENABLE] && has_indexes;
1693
1694 switch (context->rs[D3DRS_VERTEXBLEND]) {
1695 case D3DVBF_0WEIGHTS: key.vertexblend = key.vertexblend_indexed; break;
1696 case D3DVBF_1WEIGHTS: key.vertexblend = 2; break;
1697 case D3DVBF_2WEIGHTS: key.vertexblend = 3; break;
1698 case D3DVBF_3WEIGHTS: key.vertexblend = 4; break;
1699 case D3DVBF_TWEENING: key.vertextween = 1; break;
1700 default:
1701 assert(!"invalid D3DVBF");
1702 break;
1703 }
1704 if (!has_weights && context->rs[D3DRS_VERTEXBLEND] != D3DVBF_0WEIGHTS)
1705 key.vertexblend = 0; /* TODO: if key.vertexblend_indexed, perhaps it should use 1.0 as weight, or revert to D3DVBF_0WEIGHTS */
1706 }
1707
1708 for (s = 0; s < 8; ++s) {
1709 unsigned gen = (context->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1;
1710 unsigned idx = context->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] & 7;
1711 unsigned dim;
1712
1713 if (key.position_t && gen > NINED3DTSS_TCI_PASSTHRU)
1714 gen = NINED3DTSS_TCI_PASSTHRU;
1715
1716 if (!input_texture_coord[idx] && gen == NINED3DTSS_TCI_PASSTHRU)
1717 gen = NINED3DTSS_TCI_DISABLE;
1718
1719 key.tc_gen |= gen << (s * 3);
1720 key.tc_idx |= idx << (s * 3);
1721 key.tc_dim_input |= ((input_texture_coord[idx]-1) & 0x3) << (s * 2);
1722
1723 dim = context->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7;
1724 if (dim > 4)
1725 dim = input_texture_coord[idx];
1726 if (dim == 1) /* NV behaviour */
1727 dim = 0;
1728 key.tc_dim_output |= dim << (s * 3);
1729 }
1730
1731 DBG("VS ff key hash: %x\n", nine_ff_vs_key_hash(&key));
1732 vs = util_hash_table_get(device->ff.ht_vs, &key);
1733 if (vs)
1734 return vs;
1735 NineVertexShader9_new(device, &vs, NULL, nine_ff_build_vs(device, &bld));
1736
1737 nine_ff_prune_vs(device);
1738 if (vs) {
1739 unsigned n;
1740
1741 memcpy(&vs->ff_key, &key, sizeof(vs->ff_key));
1742
1743 _mesa_hash_table_insert(device->ff.ht_vs, &vs->ff_key, vs);
1744 device->ff.num_vs++;
1745
1746 vs->num_inputs = bld.num_inputs;
1747 for (n = 0; n < bld.num_inputs; ++n)
1748 vs->input_map[n].ndecl = bld.input[n];
1749
1750 vs->position_t = key.position_t;
1751 vs->point_size = key.vertexpointsize | key.pointscale | device->driver_caps.always_output_pointsize;
1752 }
1753 return vs;
1754 }
1755
1756 #define GET_D3DTS(n) nine_state_access_transform(&context->ff, D3DTS_##n, FALSE)
1757 #define IS_D3DTS_DIRTY(s,n) ((s)->ff.changed.transform[(D3DTS_##n) / 32] & (1 << ((D3DTS_##n) % 32)))
1758
1759 static struct NinePixelShader9 *
nine_ff_get_ps(struct NineDevice9 * device)1760 nine_ff_get_ps(struct NineDevice9 *device)
1761 {
1762 struct nine_context *context = &device->context;
1763 struct NinePixelShader9 *ps;
1764 struct nine_ff_ps_key key;
1765 unsigned s;
1766 uint8_t sampler_mask = 0;
1767
1768 assert(sizeof(key) <= sizeof(key.value32));
1769
1770 memset(&key, 0, sizeof(key));
1771 for (s = 0; s < 8; ++s) {
1772 key.ts[s].colorop = context->ff.tex_stage[s][D3DTSS_COLOROP];
1773 key.ts[s].alphaop = context->ff.tex_stage[s][D3DTSS_ALPHAOP];
1774 const uint8_t used_c = ps_d3dtop_args_mask(key.ts[s].colorop);
1775 const uint8_t used_a = ps_d3dtop_args_mask(key.ts[s].alphaop);
1776 /* MSDN says D3DTOP_DISABLE disables this and all subsequent stages.
1777 * ALPHAOP cannot be enabled if COLOROP is disabled.
1778 * Verified on Windows. */
1779 if (key.ts[s].colorop == D3DTOP_DISABLE) {
1780 key.ts[s].alphaop = D3DTOP_DISABLE; /* DISABLE == 1, avoid degenerate keys */
1781 break;
1782 }
1783
1784 if (!context->texture[s].enabled &&
1785 ((context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE &&
1786 used_c & 0x1) ||
1787 (context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE &&
1788 used_c & 0x2) ||
1789 (context->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE &&
1790 used_c & 0x4))) {
1791 /* Tested on Windows: Invalid texture read disables the stage
1792 * and the subsequent ones, but only for colorop. For alpha,
1793 * it's as if the texture had alpha of 1.0, which is what
1794 * has our dummy texture in that case. Invalid color also
1795 * disabled the following alpha stages. */
1796 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
1797 break;
1798 }
1799
1800 if (context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE ||
1801 context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE ||
1802 context->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE ||
1803 context->ff.tex_stage[s][D3DTSS_ALPHAARG0] == D3DTA_TEXTURE ||
1804 context->ff.tex_stage[s][D3DTSS_ALPHAARG1] == D3DTA_TEXTURE ||
1805 context->ff.tex_stage[s][D3DTSS_ALPHAARG2] == D3DTA_TEXTURE)
1806 sampler_mask |= (1 << s);
1807
1808 if (key.ts[s].colorop != D3DTOP_DISABLE) {
1809 if (used_c & 0x1) key.ts[s].colorarg0 = context->ff.tex_stage[s][D3DTSS_COLORARG0] & 0x7;
1810 if (used_c & 0x2) key.ts[s].colorarg1 = context->ff.tex_stage[s][D3DTSS_COLORARG1] & 0x7;
1811 if (used_c & 0x4) key.ts[s].colorarg2 = context->ff.tex_stage[s][D3DTSS_COLORARG2] & 0x7;
1812 if (used_c & 0x1) key.colorarg_b4[0] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 4) & 0x1) << s;
1813 if (used_c & 0x1) key.colorarg_b5[0] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 5) & 0x1) << s;
1814 if (used_c & 0x2) key.colorarg_b4[1] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 4) & 0x1) << s;
1815 if (used_c & 0x2) key.colorarg_b5[1] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 5) & 0x1) << s;
1816 if (used_c & 0x4) key.colorarg_b4[2] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 4) & 0x1) << s;
1817 if (used_c & 0x4) key.colorarg_b5[2] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 5) & 0x1) << s;
1818 }
1819 if (key.ts[s].alphaop != D3DTOP_DISABLE) {
1820 if (used_a & 0x1) key.ts[s].alphaarg0 = context->ff.tex_stage[s][D3DTSS_ALPHAARG0] & 0x7;
1821 if (used_a & 0x2) key.ts[s].alphaarg1 = context->ff.tex_stage[s][D3DTSS_ALPHAARG1] & 0x7;
1822 if (used_a & 0x4) key.ts[s].alphaarg2 = context->ff.tex_stage[s][D3DTSS_ALPHAARG2] & 0x7;
1823 if (used_a & 0x1) key.alphaarg_b4[0] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG0] >> 4) & 0x1) << s;
1824 if (used_a & 0x2) key.alphaarg_b4[1] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG1] >> 4) & 0x1) << s;
1825 if (used_a & 0x4) key.alphaarg_b4[2] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG2] >> 4) & 0x1) << s;
1826 }
1827 key.ts[s].resultarg = context->ff.tex_stage[s][D3DTSS_RESULTARG] == D3DTA_TEMP;
1828
1829 if (context->texture[s].enabled) {
1830 switch (context->texture[s].type) {
1831 case D3DRTYPE_TEXTURE: key.ts[s].textarget = 1; break;
1832 case D3DRTYPE_VOLUMETEXTURE: key.ts[s].textarget = 2; break;
1833 case D3DRTYPE_CUBETEXTURE: key.ts[s].textarget = 3; break;
1834 default:
1835 assert(!"unexpected texture type");
1836 break;
1837 }
1838 } else {
1839 key.ts[s].textarget = 1;
1840 }
1841 }
1842
1843 /* Note: If colorop is D3DTOP_DISABLE for the first stage
1844 * (which implies alphaop is too), nothing particular happens,
1845 * that is, current is equal to diffuse (which is the case anyway,
1846 * because it is how it is initialized).
1847 * Special case seems if alphaop is D3DTOP_DISABLE and not colorop,
1848 * because then if the resultarg is TEMP, then diffuse alpha is written
1849 * to it. */
1850 if (key.ts[0].colorop != D3DTOP_DISABLE &&
1851 key.ts[0].alphaop == D3DTOP_DISABLE &&
1852 key.ts[0].resultarg != 0) {
1853 key.ts[0].alphaop = D3DTOP_SELECTARG1;
1854 key.ts[0].alphaarg1 = D3DTA_DIFFUSE;
1855 }
1856 /* When no alpha stage writes to current, diffuse alpha is taken.
1857 * Since we initialize current to diffuse, we have the behaviour. */
1858
1859 /* Last stage always writes to Current */
1860 if (s >= 1)
1861 key.ts[s-1].resultarg = 0;
1862
1863 key.projected = nine_ff_get_projected_key_ff(context);
1864 key.specular = !!context->rs[D3DRS_SPECULARENABLE];
1865 key.flatshade = context->rs[D3DRS_SHADEMODE] == D3DSHADE_FLAT;
1866
1867 for (; s < 8; ++s)
1868 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
1869 if (context->rs[D3DRS_FOGENABLE])
1870 key.fog_mode = context->rs[D3DRS_FOGTABLEMODE];
1871 key.fog = !!context->rs[D3DRS_FOGENABLE];
1872 if (key.fog_mode && key.fog)
1873 key.fog_source = !context->zfog;
1874 key.alpha_test_emulation = context->rs[NINED3DRS_EMULATED_ALPHATEST] & 0x7;
1875
1876 DBG("PS ff key hash: %x\n", nine_ff_ps_key_hash(&key));
1877 ps = util_hash_table_get(device->ff.ht_ps, &key);
1878 if (ps)
1879 return ps;
1880 NinePixelShader9_new(device, &ps, NULL, nine_ff_build_ps(device, &key));
1881
1882 nine_ff_prune_ps(device);
1883 if (ps) {
1884 memcpy(&ps->ff_key, &key, sizeof(ps->ff_key));
1885
1886 _mesa_hash_table_insert(device->ff.ht_ps, &ps->ff_key, ps);
1887 device->ff.num_ps++;
1888
1889 ps->rt_mask = 0x1;
1890 ps->sampler_mask = sampler_mask;
1891 }
1892 return ps;
1893 }
1894
1895 static void
nine_ff_load_vs_transforms(struct NineDevice9 * device)1896 nine_ff_load_vs_transforms(struct NineDevice9 *device)
1897 {
1898 struct nine_context *context = &device->context;
1899 D3DMATRIX T;
1900 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
1901 unsigned i;
1902
1903 /* TODO: make this nicer, and only upload the ones we need */
1904 /* TODO: use ff.vs_const as storage of W, V, P matrices */
1905
1906 if (IS_D3DTS_DIRTY(context, WORLD) ||
1907 IS_D3DTS_DIRTY(context, VIEW) ||
1908 IS_D3DTS_DIRTY(context, PROJECTION)) {
1909 /* WVP, WV matrices */
1910 nine_d3d_matrix_matrix_mul(&M[1], GET_D3DTS(WORLD), GET_D3DTS(VIEW));
1911 nine_d3d_matrix_matrix_mul(&M[0], &M[1], GET_D3DTS(PROJECTION));
1912
1913 /* normal matrix == transpose(inverse(WV)) */
1914 nine_d3d_matrix_inverse(&T, &M[1]);
1915 nine_d3d_matrix_transpose(&M[4], &T);
1916
1917 /* P matrix */
1918 M[2] = *GET_D3DTS(PROJECTION);
1919
1920 /* V and W matrix */
1921 nine_d3d_matrix_inverse(&M[3], GET_D3DTS(VIEW));
1922 M[40] = M[1];
1923 }
1924
1925 if (context->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
1926 /* load other world matrices */
1927 for (i = 1; i <= 8; ++i) {
1928 nine_d3d_matrix_matrix_mul(&M[40 + i], GET_D3DTS(WORLDMATRIX(i)), GET_D3DTS(VIEW));
1929 }
1930 }
1931
1932 device->ff.vs_const[30 * 4] = asfloat(context->rs[D3DRS_TWEENFACTOR]);
1933 }
1934
1935 static void
nine_ff_load_lights(struct NineDevice9 * device)1936 nine_ff_load_lights(struct NineDevice9 *device)
1937 {
1938 struct nine_context *context = &device->context;
1939 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1940 unsigned l;
1941
1942 if (context->changed.group & NINE_STATE_FF_MATERIAL) {
1943 const D3DMATERIAL9 *mtl = &context->ff.material;
1944
1945 memcpy(&dst[20], &mtl->Diffuse, 4 * sizeof(float));
1946 memcpy(&dst[21], &mtl->Ambient, 4 * sizeof(float));
1947 memcpy(&dst[22], &mtl->Specular, 4 * sizeof(float));
1948 dst[23].x = mtl->Power;
1949 memcpy(&dst[24], &mtl->Emissive, 4 * sizeof(float));
1950 d3dcolor_to_rgba(&dst[25].x, context->rs[D3DRS_AMBIENT]);
1951 dst[19].x = dst[25].x * mtl->Ambient.r + mtl->Emissive.r;
1952 dst[19].y = dst[25].y * mtl->Ambient.g + mtl->Emissive.g;
1953 dst[19].z = dst[25].z * mtl->Ambient.b + mtl->Emissive.b;
1954 }
1955
1956 if (!(context->changed.group & NINE_STATE_FF_LIGHTING))
1957 return;
1958
1959 for (l = 0; l < context->ff.num_lights_active; ++l) {
1960 const D3DLIGHT9 *light = &context->ff.light[context->ff.active_light[l]];
1961
1962 dst[32 + l * 8].x = light->Type;
1963 dst[32 + l * 8].y = light->Attenuation0;
1964 dst[32 + l * 8].z = light->Attenuation1;
1965 dst[32 + l * 8].w = light->Attenuation2;
1966 memcpy(&dst[33 + l * 8].x, &light->Diffuse, sizeof(light->Diffuse));
1967 memcpy(&dst[34 + l * 8].x, &light->Specular, sizeof(light->Specular));
1968 memcpy(&dst[35 + l * 8].x, &light->Ambient, sizeof(light->Ambient));
1969 nine_d3d_vector4_matrix_mul((D3DVECTOR *)&dst[36 + l * 8].x, &light->Position, GET_D3DTS(VIEW));
1970 nine_d3d_vector3_matrix_mul((D3DVECTOR *)&dst[37 + l * 8].x, &light->Direction, GET_D3DTS(VIEW));
1971 dst[36 + l * 8].w = light->Type == D3DLIGHT_DIRECTIONAL ? 1e9f : light->Range;
1972 dst[37 + l * 8].w = light->Falloff;
1973 dst[38 + l * 8].x = cosf(light->Theta * 0.5f);
1974 dst[38 + l * 8].y = cosf(light->Phi * 0.5f);
1975 dst[38 + l * 8].z = 1.0f / (dst[38 + l * 8].x - dst[38 + l * 8].y);
1976 dst[39 + l * 8].w = (float)((l + 1) == context->ff.num_lights_active);
1977 }
1978 }
1979
1980 static void
nine_ff_load_point_and_fog_params(struct NineDevice9 * device)1981 nine_ff_load_point_and_fog_params(struct NineDevice9 *device)
1982 {
1983 struct nine_context *context = &device->context;
1984 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1985
1986 if (!(context->changed.group & NINE_STATE_FF_VS_OTHER))
1987 return;
1988 dst[26].x = asfloat(context->rs[D3DRS_POINTSIZE_MIN]);
1989 dst[26].y = asfloat(context->rs[D3DRS_POINTSIZE_MAX]);
1990 dst[26].z = CLAMP(asfloat(context->rs[D3DRS_POINTSIZE]),
1991 asfloat(context->rs[D3DRS_POINTSIZE_MIN]),
1992 asfloat(context->rs[D3DRS_POINTSIZE_MAX]));
1993 dst[26].w = asfloat(context->rs[D3DRS_POINTSCALE_A]);
1994 dst[27].x = asfloat(context->rs[D3DRS_POINTSCALE_B]);
1995 dst[27].y = asfloat(context->rs[D3DRS_POINTSCALE_C]);
1996 dst[28].x = asfloat(context->rs[D3DRS_FOGEND]);
1997 dst[28].y = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART]));
1998 if (isinf(dst[28].y))
1999 dst[28].y = 0.0f;
2000 dst[28].z = asfloat(context->rs[D3DRS_FOGDENSITY]);
2001 if (device->driver_caps.emulate_ucp)
2002 memcpy(&dst[196], &context->clip.ucp, sizeof(context->clip));
2003 }
2004
2005 static void
nine_ff_load_tex_matrices(struct NineDevice9 * device)2006 nine_ff_load_tex_matrices(struct NineDevice9 *device)
2007 {
2008 struct nine_context *context = &device->context;
2009 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
2010 unsigned s;
2011
2012 if (!(context->ff.changed.transform[0] & 0xff0000))
2013 return;
2014 for (s = 0; s < 8; ++s) {
2015 if (IS_D3DTS_DIRTY(context, TEXTURE0 + s))
2016 nine_d3d_matrix_transpose(&M[32 + s], nine_state_access_transform(&context->ff, D3DTS_TEXTURE0 + s, false));
2017 }
2018 }
2019
2020 static void
nine_ff_load_ps_params(struct NineDevice9 * device)2021 nine_ff_load_ps_params(struct NineDevice9 *device)
2022 {
2023 struct nine_context *context = &device->context;
2024 struct fvec4 *dst = (struct fvec4 *)device->ff.ps_const;
2025 unsigned s;
2026
2027 if (!(context->changed.group & NINE_STATE_FF_PS_CONSTS))
2028 return;
2029
2030 for (s = 0; s < 8; ++s)
2031 d3dcolor_to_rgba(&dst[s].x, context->ff.tex_stage[s][D3DTSS_CONSTANT]);
2032
2033 for (s = 0; s < 8; ++s) {
2034 dst[8 + s].x = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT00]);
2035 dst[8 + s].y = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT01]);
2036 dst[8 + s].z = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT10]);
2037 dst[8 + s].w = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT11]);
2038 if (s & 1) {
2039 dst[16 + s / 2].z = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
2040 dst[16 + s / 2].w = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
2041 } else {
2042 dst[16 + s / 2].x = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
2043 dst[16 + s / 2].y = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
2044 }
2045 }
2046
2047 d3dcolor_to_rgba(&dst[20].x, context->rs[D3DRS_TEXTUREFACTOR]);
2048 d3dcolor_to_rgba(&dst[21].x, context->rs[D3DRS_FOGCOLOR]);
2049 dst[22].x = asfloat(context->rs[D3DRS_FOGEND]);
2050 dst[22].y = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART]));
2051 dst[22].z = asfloat(context->rs[D3DRS_FOGDENSITY]);
2052 dst[22].w = (float)context->rs[D3DRS_ALPHAREF] / 255.f;
2053 }
2054
2055 static void
nine_ff_load_viewport_info(struct NineDevice9 * device)2056 nine_ff_load_viewport_info(struct NineDevice9 *device)
2057 {
2058 D3DVIEWPORT9 *viewport = &device->context.viewport;
2059 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
2060 float diffZ = viewport->MaxZ - viewport->MinZ;
2061
2062 /* Note: the other functions avoids to fill the const again if nothing changed.
2063 * But we don't have much to fill, and adding code to allow that may be complex
2064 * so just fill it always */
2065 dst[100].x = 2.0f / (float)(viewport->Width);
2066 dst[100].y = 2.0f / (float)(viewport->Height);
2067 dst[100].z = (diffZ == 0.0f) ? 0.0f : (1.0f / diffZ);
2068 dst[100].w = (float)(viewport->Width);
2069 dst[101].x = (float)(viewport->X);
2070 dst[101].y = (float)(viewport->Y);
2071 dst[101].z = (float)(viewport->MinZ);
2072 }
2073
2074 void
nine_ff_update(struct NineDevice9 * device)2075 nine_ff_update(struct NineDevice9 *device)
2076 {
2077 struct nine_context *context = &device->context;
2078 struct pipe_constant_buffer cb;
2079
2080 DBG("vs=%p ps=%p\n", context->vs, context->ps);
2081
2082 /* NOTE: the only reference belongs to the hash table */
2083 if (!context->programmable_vs) {
2084 device->ff.vs = nine_ff_get_vs(device);
2085 context->changed.group |= NINE_STATE_VS;
2086 }
2087 if (!context->ps) {
2088 device->ff.ps = nine_ff_get_ps(device);
2089 context->changed.group |= NINE_STATE_PS;
2090 }
2091
2092 if (!context->programmable_vs) {
2093 nine_ff_load_vs_transforms(device);
2094 nine_ff_load_tex_matrices(device);
2095 nine_ff_load_lights(device);
2096 nine_ff_load_point_and_fog_params(device);
2097 nine_ff_load_viewport_info(device);
2098
2099 memset(context->ff.changed.transform, 0, sizeof(context->ff.changed.transform));
2100
2101 cb.buffer_offset = 0;
2102 cb.buffer = NULL;
2103 cb.user_buffer = device->ff.vs_const;
2104 cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float);
2105
2106 context->pipe_data.cb_vs_ff = cb;
2107 context->commit |= NINE_STATE_COMMIT_CONST_VS;
2108
2109 context->changed.group &= ~NINE_STATE_FF_VS;
2110 }
2111
2112 if (!context->ps) {
2113 nine_ff_load_ps_params(device);
2114
2115 cb.buffer_offset = 0;
2116 cb.buffer = NULL;
2117 cb.user_buffer = device->ff.ps_const;
2118 cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float);
2119
2120 context->pipe_data.cb_ps_ff = cb;
2121 context->commit |= NINE_STATE_COMMIT_CONST_PS;
2122
2123 context->changed.group &= ~NINE_STATE_FF_PS;
2124 }
2125 }
2126
2127
2128 bool
nine_ff_init(struct NineDevice9 * device)2129 nine_ff_init(struct NineDevice9 *device)
2130 {
2131 device->ff.ht_vs = _mesa_hash_table_create(NULL, nine_ff_vs_key_hash,
2132 nine_ff_vs_key_comp);
2133 device->ff.ht_ps = _mesa_hash_table_create(NULL, nine_ff_ps_key_hash,
2134 nine_ff_ps_key_comp);
2135
2136 device->ff.ht_fvf = _mesa_hash_table_create(NULL, nine_ff_fvf_key_hash,
2137 nine_ff_fvf_key_comp);
2138
2139 device->ff.vs_const = CALLOC(NINE_FF_NUM_VS_CONST, 4 * sizeof(float));
2140 device->ff.ps_const = CALLOC(NINE_FF_NUM_PS_CONST, 4 * sizeof(float));
2141
2142 return device->ff.ht_vs && device->ff.ht_ps &&
2143 device->ff.ht_fvf &&
2144 device->ff.vs_const && device->ff.ps_const;
2145 }
2146
nine_ff_ht_delete_cb(void * key,void * value,void * data)2147 static enum pipe_error nine_ff_ht_delete_cb(void *key, void *value, void *data)
2148 {
2149 NineUnknown_Unbind(NineUnknown(value));
2150 return PIPE_OK;
2151 }
2152
2153 void
nine_ff_fini(struct NineDevice9 * device)2154 nine_ff_fini(struct NineDevice9 *device)
2155 {
2156 if (device->ff.ht_vs) {
2157 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
2158 _mesa_hash_table_destroy(device->ff.ht_vs, NULL);
2159 }
2160 if (device->ff.ht_ps) {
2161 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
2162 _mesa_hash_table_destroy(device->ff.ht_ps, NULL);
2163 }
2164 if (device->ff.ht_fvf) {
2165 util_hash_table_foreach(device->ff.ht_fvf, nine_ff_ht_delete_cb, NULL);
2166 _mesa_hash_table_destroy(device->ff.ht_fvf, NULL);
2167 }
2168 device->ff.vs = NULL; /* destroyed by unbinding from hash table */
2169 device->ff.ps = NULL;
2170
2171 FREE(device->ff.vs_const);
2172 FREE(device->ff.ps_const);
2173 }
2174
2175 static void
nine_ff_prune_vs(struct NineDevice9 * device)2176 nine_ff_prune_vs(struct NineDevice9 *device)
2177 {
2178 struct nine_context *context = &device->context;
2179
2180 if (device->ff.num_vs > 1024) {
2181 /* could destroy the bound one here, so unbind */
2182 context->pipe->bind_vs_state(context->pipe, NULL);
2183 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
2184 _mesa_hash_table_clear(device->ff.ht_vs, NULL);
2185 device->ff.num_vs = 0;
2186 context->changed.group |= NINE_STATE_VS;
2187 }
2188 }
2189 static void
nine_ff_prune_ps(struct NineDevice9 * device)2190 nine_ff_prune_ps(struct NineDevice9 *device)
2191 {
2192 struct nine_context *context = &device->context;
2193
2194 if (device->ff.num_ps > 1024) {
2195 /* could destroy the bound one here, so unbind */
2196 context->pipe->bind_fs_state(context->pipe, NULL);
2197 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
2198 _mesa_hash_table_clear(device->ff.ht_ps, NULL);
2199 device->ff.num_ps = 0;
2200 context->changed.group |= NINE_STATE_PS;
2201 }
2202 }
2203
2204 /* ========================================================================== */
2205
2206 /* Matrix multiplication:
2207 *
2208 * in memory: 0 1 2 3 (row major)
2209 * 4 5 6 7
2210 * 8 9 a b
2211 * c d e f
2212 *
2213 * cA cB cC cD
2214 * r0 = (r0 * cA) (r0 * cB) . .
2215 * r1 = (r1 * cA) (r1 * cB)
2216 * r2 = (r2 * cA) .
2217 * r3 = (r3 * cA) .
2218 *
2219 * r: (11) (12) (13) (14)
2220 * (21) (22) (23) (24)
2221 * (31) (32) (33) (34)
2222 * (41) (42) (43) (44)
2223 * l: (11 12 13 14)
2224 * (21 22 23 24)
2225 * (31 32 33 34)
2226 * (41 42 43 44)
2227 *
2228 * v: (x y z 1 )
2229 *
2230 * t.xyzw = MUL(v.xxxx, r[0]);
2231 * t.xyzw = MAD(v.yyyy, r[1], t.xyzw);
2232 * t.xyzw = MAD(v.zzzz, r[2], t.xyzw);
2233 * v.xyzw = MAD(v.wwww, r[3], t.xyzw);
2234 *
2235 * v.x = DP4(v, c[0]);
2236 * v.y = DP4(v, c[1]);
2237 * v.z = DP4(v, c[2]);
2238 * v.w = DP4(v, c[3]) = 1
2239 */
2240
2241 /*
2242 static void
2243 nine_D3DMATRIX_print(const D3DMATRIX *M)
2244 {
2245 DBG("\n(%f %f %f %f)\n"
2246 "(%f %f %f %f)\n"
2247 "(%f %f %f %f)\n"
2248 "(%f %f %f %f)\n",
2249 M->m[0][0], M->m[0][1], M->m[0][2], M->m[0][3],
2250 M->m[1][0], M->m[1][1], M->m[1][2], M->m[1][3],
2251 M->m[2][0], M->m[2][1], M->m[2][2], M->m[2][3],
2252 M->m[3][0], M->m[3][1], M->m[3][2], M->m[3][3]);
2253 }
2254 */
2255
2256 static inline float
nine_DP4_row_col(const D3DMATRIX * A,int r,const D3DMATRIX * B,int c)2257 nine_DP4_row_col(const D3DMATRIX *A, int r, const D3DMATRIX *B, int c)
2258 {
2259 return A->m[r][0] * B->m[0][c] +
2260 A->m[r][1] * B->m[1][c] +
2261 A->m[r][2] * B->m[2][c] +
2262 A->m[r][3] * B->m[3][c];
2263 }
2264
2265 static inline float
nine_DP4_vec_col(const D3DVECTOR * v,const D3DMATRIX * M,int c)2266 nine_DP4_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
2267 {
2268 return v->x * M->m[0][c] +
2269 v->y * M->m[1][c] +
2270 v->z * M->m[2][c] +
2271 1.0f * M->m[3][c];
2272 }
2273
2274 static inline float
nine_DP3_vec_col(const D3DVECTOR * v,const D3DMATRIX * M,int c)2275 nine_DP3_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
2276 {
2277 return v->x * M->m[0][c] +
2278 v->y * M->m[1][c] +
2279 v->z * M->m[2][c];
2280 }
2281
2282 void
nine_d3d_matrix_matrix_mul(D3DMATRIX * D,const D3DMATRIX * L,const D3DMATRIX * R)2283 nine_d3d_matrix_matrix_mul(D3DMATRIX *D, const D3DMATRIX *L, const D3DMATRIX *R)
2284 {
2285 D->_11 = nine_DP4_row_col(L, 0, R, 0);
2286 D->_12 = nine_DP4_row_col(L, 0, R, 1);
2287 D->_13 = nine_DP4_row_col(L, 0, R, 2);
2288 D->_14 = nine_DP4_row_col(L, 0, R, 3);
2289
2290 D->_21 = nine_DP4_row_col(L, 1, R, 0);
2291 D->_22 = nine_DP4_row_col(L, 1, R, 1);
2292 D->_23 = nine_DP4_row_col(L, 1, R, 2);
2293 D->_24 = nine_DP4_row_col(L, 1, R, 3);
2294
2295 D->_31 = nine_DP4_row_col(L, 2, R, 0);
2296 D->_32 = nine_DP4_row_col(L, 2, R, 1);
2297 D->_33 = nine_DP4_row_col(L, 2, R, 2);
2298 D->_34 = nine_DP4_row_col(L, 2, R, 3);
2299
2300 D->_41 = nine_DP4_row_col(L, 3, R, 0);
2301 D->_42 = nine_DP4_row_col(L, 3, R, 1);
2302 D->_43 = nine_DP4_row_col(L, 3, R, 2);
2303 D->_44 = nine_DP4_row_col(L, 3, R, 3);
2304 }
2305
2306 void
nine_d3d_vector4_matrix_mul(D3DVECTOR * d,const D3DVECTOR * v,const D3DMATRIX * M)2307 nine_d3d_vector4_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)
2308 {
2309 d->x = nine_DP4_vec_col(v, M, 0);
2310 d->y = nine_DP4_vec_col(v, M, 1);
2311 d->z = nine_DP4_vec_col(v, M, 2);
2312 }
2313
2314 void
nine_d3d_vector3_matrix_mul(D3DVECTOR * d,const D3DVECTOR * v,const D3DMATRIX * M)2315 nine_d3d_vector3_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)
2316 {
2317 d->x = nine_DP3_vec_col(v, M, 0);
2318 d->y = nine_DP3_vec_col(v, M, 1);
2319 d->z = nine_DP3_vec_col(v, M, 2);
2320 }
2321
2322 void
nine_d3d_matrix_transpose(D3DMATRIX * D,const D3DMATRIX * M)2323 nine_d3d_matrix_transpose(D3DMATRIX *D, const D3DMATRIX *M)
2324 {
2325 unsigned i, j;
2326 for (i = 0; i < 4; ++i)
2327 for (j = 0; j < 4; ++j)
2328 D->m[i][j] = M->m[j][i];
2329 }
2330
2331 #define _M_ADD_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2332 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2333 if (t > 0.0f) pos += t; else neg += t; } while(0)
2334
2335 #define _M_SUB_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2336 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2337 if (t > 0.0f) neg -= t; else pos -= t; } while(0)
2338 float
nine_d3d_matrix_det(const D3DMATRIX * M)2339 nine_d3d_matrix_det(const D3DMATRIX *M)
2340 {
2341 float pos = 0.0f;
2342 float neg = 0.0f;
2343
2344 _M_ADD_PROD_1i_2j_3k_4l(1, 2, 3, 4);
2345 _M_ADD_PROD_1i_2j_3k_4l(1, 3, 4, 2);
2346 _M_ADD_PROD_1i_2j_3k_4l(1, 4, 2, 3);
2347
2348 _M_ADD_PROD_1i_2j_3k_4l(2, 1, 4, 3);
2349 _M_ADD_PROD_1i_2j_3k_4l(2, 3, 1, 4);
2350 _M_ADD_PROD_1i_2j_3k_4l(2, 4, 3, 1);
2351
2352 _M_ADD_PROD_1i_2j_3k_4l(3, 1, 2, 4);
2353 _M_ADD_PROD_1i_2j_3k_4l(3, 2, 4, 1);
2354 _M_ADD_PROD_1i_2j_3k_4l(3, 4, 1, 2);
2355
2356 _M_ADD_PROD_1i_2j_3k_4l(4, 1, 3, 2);
2357 _M_ADD_PROD_1i_2j_3k_4l(4, 2, 1, 3);
2358 _M_ADD_PROD_1i_2j_3k_4l(4, 3, 2, 1);
2359
2360 _M_SUB_PROD_1i_2j_3k_4l(1, 2, 4, 3);
2361 _M_SUB_PROD_1i_2j_3k_4l(1, 3, 2, 4);
2362 _M_SUB_PROD_1i_2j_3k_4l(1, 4, 3, 2);
2363
2364 _M_SUB_PROD_1i_2j_3k_4l(2, 1, 3, 4);
2365 _M_SUB_PROD_1i_2j_3k_4l(2, 3, 4, 1);
2366 _M_SUB_PROD_1i_2j_3k_4l(2, 4, 1, 3);
2367
2368 _M_SUB_PROD_1i_2j_3k_4l(3, 1, 4, 2);
2369 _M_SUB_PROD_1i_2j_3k_4l(3, 2, 1, 4);
2370 _M_SUB_PROD_1i_2j_3k_4l(3, 4, 2, 1);
2371
2372 _M_SUB_PROD_1i_2j_3k_4l(4, 1, 2, 3);
2373 _M_SUB_PROD_1i_2j_3k_4l(4, 2, 3, 1);
2374 _M_SUB_PROD_1i_2j_3k_4l(4, 3, 1, 2);
2375
2376 return pos + neg;
2377 }
2378
2379 /* XXX: Probably better to just use src/mesa/math/m_matrix.c because
2380 * I have no idea where this code came from.
2381 */
2382 void
nine_d3d_matrix_inverse(D3DMATRIX * D,const D3DMATRIX * M)2383 nine_d3d_matrix_inverse(D3DMATRIX *D, const D3DMATRIX *M)
2384 {
2385 int i, k;
2386 float det;
2387
2388 D->m[0][0] =
2389 M->m[1][1] * M->m[2][2] * M->m[3][3] -
2390 M->m[1][1] * M->m[3][2] * M->m[2][3] -
2391 M->m[1][2] * M->m[2][1] * M->m[3][3] +
2392 M->m[1][2] * M->m[3][1] * M->m[2][3] +
2393 M->m[1][3] * M->m[2][1] * M->m[3][2] -
2394 M->m[1][3] * M->m[3][1] * M->m[2][2];
2395
2396 D->m[0][1] =
2397 -M->m[0][1] * M->m[2][2] * M->m[3][3] +
2398 M->m[0][1] * M->m[3][2] * M->m[2][3] +
2399 M->m[0][2] * M->m[2][1] * M->m[3][3] -
2400 M->m[0][2] * M->m[3][1] * M->m[2][3] -
2401 M->m[0][3] * M->m[2][1] * M->m[3][2] +
2402 M->m[0][3] * M->m[3][1] * M->m[2][2];
2403
2404 D->m[0][2] =
2405 M->m[0][1] * M->m[1][2] * M->m[3][3] -
2406 M->m[0][1] * M->m[3][2] * M->m[1][3] -
2407 M->m[0][2] * M->m[1][1] * M->m[3][3] +
2408 M->m[0][2] * M->m[3][1] * M->m[1][3] +
2409 M->m[0][3] * M->m[1][1] * M->m[3][2] -
2410 M->m[0][3] * M->m[3][1] * M->m[1][2];
2411
2412 D->m[0][3] =
2413 -M->m[0][1] * M->m[1][2] * M->m[2][3] +
2414 M->m[0][1] * M->m[2][2] * M->m[1][3] +
2415 M->m[0][2] * M->m[1][1] * M->m[2][3] -
2416 M->m[0][2] * M->m[2][1] * M->m[1][3] -
2417 M->m[0][3] * M->m[1][1] * M->m[2][2] +
2418 M->m[0][3] * M->m[2][1] * M->m[1][2];
2419
2420 D->m[1][0] =
2421 -M->m[1][0] * M->m[2][2] * M->m[3][3] +
2422 M->m[1][0] * M->m[3][2] * M->m[2][3] +
2423 M->m[1][2] * M->m[2][0] * M->m[3][3] -
2424 M->m[1][2] * M->m[3][0] * M->m[2][3] -
2425 M->m[1][3] * M->m[2][0] * M->m[3][2] +
2426 M->m[1][3] * M->m[3][0] * M->m[2][2];
2427
2428 D->m[1][1] =
2429 M->m[0][0] * M->m[2][2] * M->m[3][3] -
2430 M->m[0][0] * M->m[3][2] * M->m[2][3] -
2431 M->m[0][2] * M->m[2][0] * M->m[3][3] +
2432 M->m[0][2] * M->m[3][0] * M->m[2][3] +
2433 M->m[0][3] * M->m[2][0] * M->m[3][2] -
2434 M->m[0][3] * M->m[3][0] * M->m[2][2];
2435
2436 D->m[1][2] =
2437 -M->m[0][0] * M->m[1][2] * M->m[3][3] +
2438 M->m[0][0] * M->m[3][2] * M->m[1][3] +
2439 M->m[0][2] * M->m[1][0] * M->m[3][3] -
2440 M->m[0][2] * M->m[3][0] * M->m[1][3] -
2441 M->m[0][3] * M->m[1][0] * M->m[3][2] +
2442 M->m[0][3] * M->m[3][0] * M->m[1][2];
2443
2444 D->m[1][3] =
2445 M->m[0][0] * M->m[1][2] * M->m[2][3] -
2446 M->m[0][0] * M->m[2][2] * M->m[1][3] -
2447 M->m[0][2] * M->m[1][0] * M->m[2][3] +
2448 M->m[0][2] * M->m[2][0] * M->m[1][3] +
2449 M->m[0][3] * M->m[1][0] * M->m[2][2] -
2450 M->m[0][3] * M->m[2][0] * M->m[1][2];
2451
2452 D->m[2][0] =
2453 M->m[1][0] * M->m[2][1] * M->m[3][3] -
2454 M->m[1][0] * M->m[3][1] * M->m[2][3] -
2455 M->m[1][1] * M->m[2][0] * M->m[3][3] +
2456 M->m[1][1] * M->m[3][0] * M->m[2][3] +
2457 M->m[1][3] * M->m[2][0] * M->m[3][1] -
2458 M->m[1][3] * M->m[3][0] * M->m[2][1];
2459
2460 D->m[2][1] =
2461 -M->m[0][0] * M->m[2][1] * M->m[3][3] +
2462 M->m[0][0] * M->m[3][1] * M->m[2][3] +
2463 M->m[0][1] * M->m[2][0] * M->m[3][3] -
2464 M->m[0][1] * M->m[3][0] * M->m[2][3] -
2465 M->m[0][3] * M->m[2][0] * M->m[3][1] +
2466 M->m[0][3] * M->m[3][0] * M->m[2][1];
2467
2468 D->m[2][2] =
2469 M->m[0][0] * M->m[1][1] * M->m[3][3] -
2470 M->m[0][0] * M->m[3][1] * M->m[1][3] -
2471 M->m[0][1] * M->m[1][0] * M->m[3][3] +
2472 M->m[0][1] * M->m[3][0] * M->m[1][3] +
2473 M->m[0][3] * M->m[1][0] * M->m[3][1] -
2474 M->m[0][3] * M->m[3][0] * M->m[1][1];
2475
2476 D->m[2][3] =
2477 -M->m[0][0] * M->m[1][1] * M->m[2][3] +
2478 M->m[0][0] * M->m[2][1] * M->m[1][3] +
2479 M->m[0][1] * M->m[1][0] * M->m[2][3] -
2480 M->m[0][1] * M->m[2][0] * M->m[1][3] -
2481 M->m[0][3] * M->m[1][0] * M->m[2][1] +
2482 M->m[0][3] * M->m[2][0] * M->m[1][1];
2483
2484 D->m[3][0] =
2485 -M->m[1][0] * M->m[2][1] * M->m[3][2] +
2486 M->m[1][0] * M->m[3][1] * M->m[2][2] +
2487 M->m[1][1] * M->m[2][0] * M->m[3][2] -
2488 M->m[1][1] * M->m[3][0] * M->m[2][2] -
2489 M->m[1][2] * M->m[2][0] * M->m[3][1] +
2490 M->m[1][2] * M->m[3][0] * M->m[2][1];
2491
2492 D->m[3][1] =
2493 M->m[0][0] * M->m[2][1] * M->m[3][2] -
2494 M->m[0][0] * M->m[3][1] * M->m[2][2] -
2495 M->m[0][1] * M->m[2][0] * M->m[3][2] +
2496 M->m[0][1] * M->m[3][0] * M->m[2][2] +
2497 M->m[0][2] * M->m[2][0] * M->m[3][1] -
2498 M->m[0][2] * M->m[3][0] * M->m[2][1];
2499
2500 D->m[3][2] =
2501 -M->m[0][0] * M->m[1][1] * M->m[3][2] +
2502 M->m[0][0] * M->m[3][1] * M->m[1][2] +
2503 M->m[0][1] * M->m[1][0] * M->m[3][2] -
2504 M->m[0][1] * M->m[3][0] * M->m[1][2] -
2505 M->m[0][2] * M->m[1][0] * M->m[3][1] +
2506 M->m[0][2] * M->m[3][0] * M->m[1][1];
2507
2508 D->m[3][3] =
2509 M->m[0][0] * M->m[1][1] * M->m[2][2] -
2510 M->m[0][0] * M->m[2][1] * M->m[1][2] -
2511 M->m[0][1] * M->m[1][0] * M->m[2][2] +
2512 M->m[0][1] * M->m[2][0] * M->m[1][2] +
2513 M->m[0][2] * M->m[1][0] * M->m[2][1] -
2514 M->m[0][2] * M->m[2][0] * M->m[1][1];
2515
2516 det =
2517 M->m[0][0] * D->m[0][0] +
2518 M->m[1][0] * D->m[0][1] +
2519 M->m[2][0] * D->m[0][2] +
2520 M->m[3][0] * D->m[0][3];
2521
2522 if (fabsf(det) < 1e-30) {/* non inversible */
2523 *D = *M; /* wine tests */
2524 return;
2525 }
2526
2527 det = 1.0 / det;
2528
2529 for (i = 0; i < 4; i++)
2530 for (k = 0; k < 4; k++)
2531 D->m[i][k] *= det;
2532
2533 #if defined(DEBUG) || !defined(NDEBUG)
2534 {
2535 D3DMATRIX I;
2536
2537 nine_d3d_matrix_matrix_mul(&I, D, M);
2538
2539 for (i = 0; i < 4; ++i)
2540 for (k = 0; k < 4; ++k)
2541 if (fabsf(I.m[i][k] - (float)(i == k)) > 1e-3)
2542 DBG("Matrix inversion check FAILED !\n");
2543 }
2544 #endif
2545 }
2546