1 /**********************************************************
2 * Copyright 2008-2009 VMware, Inc. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26
27 #include "pipe/p_shader_tokens.h"
28 #include "tgsi/tgsi_parse.h"
29 #include "util/u_memory.h"
30
31 #include "svga_tgsi_emit.h"
32
33
34 /**
35 * Translate TGSI semantic info into SVGA3d semantic info.
36 * This is called for VS outputs and PS inputs only.
37 */
38 static boolean
translate_vs_ps_semantic(struct svga_shader_emitter * emit,struct tgsi_declaration_semantic semantic,unsigned * usage,unsigned * idx)39 translate_vs_ps_semantic(struct svga_shader_emitter *emit,
40 struct tgsi_declaration_semantic semantic,
41 unsigned *usage,
42 unsigned *idx)
43 {
44 switch (semantic.Name) {
45 case TGSI_SEMANTIC_POSITION:
46 *idx = semantic.Index;
47 *usage = SVGA3D_DECLUSAGE_POSITION;
48 break;
49 case TGSI_SEMANTIC_COLOR:
50 *idx = semantic.Index;
51 *usage = SVGA3D_DECLUSAGE_COLOR;
52 break;
53 case TGSI_SEMANTIC_BCOLOR:
54 *idx = semantic.Index + 2; /* sharing with COLOR */
55 *usage = SVGA3D_DECLUSAGE_COLOR;
56 break;
57 case TGSI_SEMANTIC_FOG:
58 *idx = 0;
59 assert(semantic.Index == 0);
60 *usage = SVGA3D_DECLUSAGE_TEXCOORD;
61 break;
62 case TGSI_SEMANTIC_PSIZE:
63 *idx = semantic.Index;
64 *usage = SVGA3D_DECLUSAGE_PSIZE;
65 break;
66 case TGSI_SEMANTIC_GENERIC:
67 *idx = svga_remap_generic_index(emit->key.generic_remap_table,
68 semantic.Index);
69 *usage = SVGA3D_DECLUSAGE_TEXCOORD;
70 break;
71 case TGSI_SEMANTIC_NORMAL:
72 *idx = semantic.Index;
73 *usage = SVGA3D_DECLUSAGE_NORMAL;
74 break;
75 case TGSI_SEMANTIC_CLIPDIST:
76 case TGSI_SEMANTIC_CLIPVERTEX:
77 /* XXX at this time we don't support clip distance or clip vertices */
78 debug_warn_once("unsupported clip distance/vertex attribute\n");
79 *usage = SVGA3D_DECLUSAGE_TEXCOORD;
80 *idx = 0;
81 return TRUE;
82 default:
83 assert(0);
84 *usage = SVGA3D_DECLUSAGE_TEXCOORD;
85 *idx = 0;
86 return FALSE;
87 }
88
89 return TRUE;
90 }
91
92
93 /**
94 * Emit a PS input (or VS depth/fog output) register declaration.
95 * For example, if usage = SVGA3D_DECLUSAGE_TEXCOORD, reg.num = 1, and
96 * index = 3, we'll emit "dcl_texcoord3 v1".
97 */
98 static boolean
emit_decl(struct svga_shader_emitter * emit,SVGA3dShaderDestToken reg,unsigned usage,unsigned index)99 emit_decl(struct svga_shader_emitter *emit,
100 SVGA3dShaderDestToken reg,
101 unsigned usage,
102 unsigned index)
103 {
104 SVGA3DOpDclArgs dcl;
105 SVGA3dShaderInstToken opcode;
106
107 /* check values against bitfield sizes */
108 assert(index < 16);
109 assert(usage <= SVGA3D_DECLUSAGE_MAX);
110
111 opcode = inst_token(SVGA3DOP_DCL);
112 dcl.values[0] = 0;
113 dcl.values[1] = 0;
114
115 dcl.dst = reg;
116 dcl.usage = usage;
117 dcl.index = index;
118 dcl.values[0] |= 1<<31;
119
120 return (emit_instruction(emit, opcode) &&
121 svga_shader_emit_dwords(emit, dcl.values, ARRAY_SIZE(dcl.values)));
122 }
123
124
125 /**
126 * Emit declaration for PS front/back-face input register.
127 */
128 static boolean
emit_vface_decl(struct svga_shader_emitter * emit)129 emit_vface_decl(struct svga_shader_emitter *emit)
130 {
131 if (!emit->emitted_vface) {
132 SVGA3dShaderDestToken reg =
133 dst_register(SVGA3DREG_MISCTYPE, SVGA3DMISCREG_FACE);
134
135 if (!emit_decl(emit, reg, 0, 0))
136 return FALSE;
137
138 emit->emitted_vface = TRUE;
139 }
140 return TRUE;
141 }
142
143
144 /**
145 * Emit PS input register to pass depth/fog coordinates.
146 * Note that this always goes into texcoord[0].
147 */
148 static boolean
ps30_input_emit_depth_fog(struct svga_shader_emitter * emit,struct src_register * out)149 ps30_input_emit_depth_fog(struct svga_shader_emitter *emit,
150 struct src_register *out)
151 {
152 struct src_register reg;
153
154 if (emit->emitted_depth_fog) {
155 *out = emit->ps_depth_fog;
156 return TRUE;
157 }
158
159 if (emit->ps30_input_count >= SVGA3D_INPUTREG_MAX)
160 return FALSE;
161
162 reg = src_register(SVGA3DREG_INPUT,
163 emit->ps30_input_count++);
164
165 *out = emit->ps_depth_fog = reg;
166
167 emit->emitted_depth_fog = TRUE;
168
169 return emit_decl(emit, dst(reg), SVGA3D_DECLUSAGE_TEXCOORD, 0);
170 }
171
172
173 /**
174 * Process a PS input declaration.
175 * We'll emit a declaration like "dcl_texcoord1 v2"
176 */
177 static boolean
ps30_input(struct svga_shader_emitter * emit,struct tgsi_declaration_semantic semantic,unsigned idx)178 ps30_input(struct svga_shader_emitter *emit,
179 struct tgsi_declaration_semantic semantic,
180 unsigned idx)
181 {
182 unsigned usage, index;
183 SVGA3dShaderDestToken reg;
184
185 if (semantic.Name == TGSI_SEMANTIC_POSITION) {
186
187 emit->ps_true_pos = src_register(SVGA3DREG_MISCTYPE,
188 SVGA3DMISCREG_POSITION);
189 emit->ps_true_pos.base.swizzle = TRANSLATE_SWIZZLE(TGSI_SWIZZLE_X,
190 TGSI_SWIZZLE_Y,
191 TGSI_SWIZZLE_Y,
192 TGSI_SWIZZLE_Y);
193 reg = writemask(dst(emit->ps_true_pos),
194 TGSI_WRITEMASK_XY);
195 emit->ps_reads_pos = TRUE;
196
197 if (emit->info.reads_z) {
198 emit->ps_temp_pos = dst_register(SVGA3DREG_TEMP,
199 emit->nr_hw_temp);
200
201 emit->input_map[idx] = src_register(SVGA3DREG_TEMP,
202 emit->nr_hw_temp);
203 emit->nr_hw_temp++;
204
205 if (!ps30_input_emit_depth_fog(emit, &emit->ps_depth_pos))
206 return FALSE;
207
208 emit->ps_depth_pos.base.swizzle = TRANSLATE_SWIZZLE(TGSI_SWIZZLE_Z,
209 TGSI_SWIZZLE_Z,
210 TGSI_SWIZZLE_Z,
211 TGSI_SWIZZLE_W);
212 }
213 else {
214 emit->input_map[idx] = emit->ps_true_pos;
215 }
216
217 return emit_decl(emit, reg, 0, 0);
218 }
219 else if (emit->key.fs.light_twoside &&
220 (semantic.Name == TGSI_SEMANTIC_COLOR)) {
221
222 if (!translate_vs_ps_semantic(emit, semantic, &usage, &index))
223 return FALSE;
224
225 emit->internal_color_idx[emit->internal_color_count] = idx;
226 emit->input_map[idx] =
227 src_register(SVGA3DREG_INPUT, emit->ps30_input_count);
228 emit->ps30_input_count++;
229 emit->internal_color_count++;
230
231 reg = dst(emit->input_map[idx]);
232
233 if (!emit_decl(emit, reg, usage, index))
234 return FALSE;
235
236 semantic.Name = TGSI_SEMANTIC_BCOLOR;
237 if (!translate_vs_ps_semantic(emit, semantic, &usage, &index))
238 return FALSE;
239
240 if (emit->ps30_input_count >= SVGA3D_INPUTREG_MAX)
241 return FALSE;
242
243 reg = dst_register(SVGA3DREG_INPUT, emit->ps30_input_count++);
244
245 if (!emit_decl(emit, reg, usage, index))
246 return FALSE;
247
248 if (!emit_vface_decl(emit))
249 return FALSE;
250
251 return TRUE;
252 }
253 else if (semantic.Name == TGSI_SEMANTIC_FACE) {
254 if (!emit_vface_decl(emit))
255 return FALSE;
256 emit->emit_frontface = TRUE;
257 emit->internal_frontface_idx = idx;
258 return TRUE;
259 }
260 else if (semantic.Name == TGSI_SEMANTIC_FOG) {
261
262 assert(semantic.Index == 0);
263
264 if (!ps30_input_emit_depth_fog(emit, &emit->input_map[idx]))
265 return FALSE;
266
267 emit->input_map[idx].base.swizzle = TRANSLATE_SWIZZLE(TGSI_SWIZZLE_X,
268 TGSI_SWIZZLE_X,
269 TGSI_SWIZZLE_X,
270 TGSI_SWIZZLE_X);
271 return TRUE;
272 }
273 else {
274
275 if (!translate_vs_ps_semantic(emit, semantic, &usage, &index))
276 return FALSE;
277
278 if (emit->ps30_input_count >= SVGA3D_INPUTREG_MAX)
279 return FALSE;
280
281 emit->input_map[idx] =
282 src_register(SVGA3DREG_INPUT, emit->ps30_input_count++);
283
284 reg = dst(emit->input_map[idx]);
285
286 if (!emit_decl(emit, reg, usage, index))
287 return FALSE;
288
289 if (semantic.Name == TGSI_SEMANTIC_GENERIC &&
290 emit->key.sprite_origin_lower_left &&
291 index >= 1 &&
292 emit->key.sprite_coord_enable & (1 << semantic.Index)) {
293 /* This is a sprite texture coord with lower-left origin.
294 * We need to invert the texture T coordinate since the SVGA3D
295 * device only supports an upper-left origin.
296 */
297 unsigned unit = index - 1;
298
299 emit->inverted_texcoords |= (1 << unit);
300
301 /* save original texcoord reg */
302 emit->ps_true_texcoord[unit] = emit->input_map[idx];
303
304 /* this temp register will be the results of the MAD instruction */
305 emit->ps_inverted_texcoord[unit] =
306 src_register(SVGA3DREG_TEMP, emit->nr_hw_temp);
307 emit->nr_hw_temp++;
308
309 emit->ps_inverted_texcoord_input[unit] = idx;
310
311 /* replace input_map entry with the temp register */
312 emit->input_map[idx] = emit->ps_inverted_texcoord[unit];
313 }
314
315 return TRUE;
316 }
317
318 }
319
320
321 /**
322 * Process a PS output declaration.
323 * Note that we don't actually emit a SVGA3DOpDcl for PS outputs.
324 * \idx register index, such as OUT[2] (not semantic index)
325 */
326 static boolean
ps30_output(struct svga_shader_emitter * emit,struct tgsi_declaration_semantic semantic,unsigned idx)327 ps30_output(struct svga_shader_emitter *emit,
328 struct tgsi_declaration_semantic semantic,
329 unsigned idx)
330 {
331 switch (semantic.Name) {
332 case TGSI_SEMANTIC_COLOR:
333 if (emit->unit == PIPE_SHADER_FRAGMENT) {
334 if (emit->key.fs.white_fragments) {
335 /* Used for XOR logicop mode */
336 emit->output_map[idx] = dst_register(SVGA3DREG_TEMP,
337 emit->nr_hw_temp++);
338 emit->temp_color_output[idx] = emit->output_map[idx];
339 emit->true_color_output[idx] = dst_register(SVGA3DREG_COLOROUT,
340 semantic.Index);
341 }
342 else if (emit->key.fs.write_color0_to_n_cbufs) {
343 /* We'll write color output [0] to all render targets.
344 * Prepare all the output registers here, but only when the
345 * semantic.Index == 0 so we don't do this more than once.
346 */
347 if (semantic.Index == 0) {
348 unsigned i;
349 for (i = 0; i < emit->key.fs.write_color0_to_n_cbufs; i++) {
350 emit->output_map[idx+i] = dst_register(SVGA3DREG_TEMP,
351 emit->nr_hw_temp++);
352 emit->temp_color_output[i] = emit->output_map[idx+i];
353 emit->true_color_output[i] = dst_register(SVGA3DREG_COLOROUT,
354 i);
355 }
356 }
357 }
358 else {
359 emit->output_map[idx] =
360 dst_register(SVGA3DREG_COLOROUT, semantic.Index);
361 }
362 }
363 else {
364 emit->output_map[idx] = dst_register(SVGA3DREG_COLOROUT,
365 semantic.Index);
366 }
367 break;
368 case TGSI_SEMANTIC_POSITION:
369 emit->output_map[idx] = dst_register(SVGA3DREG_TEMP,
370 emit->nr_hw_temp++);
371 emit->temp_pos = emit->output_map[idx];
372 emit->true_pos = dst_register(SVGA3DREG_DEPTHOUT,
373 semantic.Index);
374 break;
375 default:
376 assert(0);
377 /* A wild stab in the dark. */
378 emit->output_map[idx] = dst_register(SVGA3DREG_COLOROUT, 0);
379 break;
380 }
381
382 return TRUE;
383 }
384
385
386 /**
387 * Declare a VS input register.
388 * We still make up the input semantics the same as in 2.0
389 */
390 static boolean
vs30_input(struct svga_shader_emitter * emit,struct tgsi_declaration_semantic semantic,unsigned idx)391 vs30_input(struct svga_shader_emitter *emit,
392 struct tgsi_declaration_semantic semantic,
393 unsigned idx)
394 {
395 SVGA3DOpDclArgs dcl;
396 SVGA3dShaderInstToken opcode;
397 unsigned usage, index;
398
399 opcode = inst_token(SVGA3DOP_DCL);
400 dcl.values[0] = 0;
401 dcl.values[1] = 0;
402
403 emit->input_map[idx] = src_register(SVGA3DREG_INPUT, idx);
404 dcl.dst = dst_register(SVGA3DREG_INPUT, idx);
405
406 assert(dcl.dst.reserved0);
407
408 svga_generate_vdecl_semantics(idx, &usage, &index);
409
410 dcl.usage = usage;
411 dcl.index = index;
412 dcl.values[0] |= 1<<31;
413
414 return (emit_instruction(emit, opcode) &&
415 svga_shader_emit_dwords(emit, dcl.values, ARRAY_SIZE(dcl.values)));
416 }
417
418
419 /**
420 * Declare VS output for holding depth/fog.
421 */
422 static boolean
vs30_output_emit_depth_fog(struct svga_shader_emitter * emit,SVGA3dShaderDestToken * out)423 vs30_output_emit_depth_fog(struct svga_shader_emitter *emit,
424 SVGA3dShaderDestToken *out)
425 {
426 SVGA3dShaderDestToken reg;
427
428 if (emit->emitted_depth_fog) {
429 *out = emit->vs_depth_fog;
430 return TRUE;
431 }
432
433 reg = dst_register(SVGA3DREG_OUTPUT, emit->vs30_output_count++);
434
435 *out = emit->vs_depth_fog = reg;
436
437 emit->emitted_depth_fog = TRUE;
438
439 return emit_decl(emit, reg, SVGA3D_DECLUSAGE_TEXCOORD, 0);
440 }
441
442
443 /**
444 * Declare a VS output.
445 * VS3.0 outputs have proper declarations and semantic info for
446 * matching against PS inputs.
447 */
448 static boolean
vs30_output(struct svga_shader_emitter * emit,struct tgsi_declaration_semantic semantic,unsigned idx)449 vs30_output(struct svga_shader_emitter *emit,
450 struct tgsi_declaration_semantic semantic,
451 unsigned idx)
452 {
453 SVGA3DOpDclArgs dcl;
454 SVGA3dShaderInstToken opcode;
455 unsigned usage, index;
456
457 opcode = inst_token(SVGA3DOP_DCL);
458 dcl.values[0] = 0;
459 dcl.values[1] = 0;
460
461 if (!translate_vs_ps_semantic(emit, semantic, &usage, &index))
462 return FALSE;
463
464 if (emit->vs30_output_count >= SVGA3D_OUTPUTREG_MAX)
465 return FALSE;
466
467 dcl.dst = dst_register(SVGA3DREG_OUTPUT, emit->vs30_output_count++);
468 dcl.usage = usage;
469 dcl.index = index;
470 dcl.values[0] |= 1<<31;
471
472 if (semantic.Name == TGSI_SEMANTIC_POSITION) {
473 assert(idx == 0);
474 emit->output_map[idx] = dst_register(SVGA3DREG_TEMP,
475 emit->nr_hw_temp++);
476 emit->temp_pos = emit->output_map[idx];
477 emit->true_pos = dcl.dst;
478
479 /* Grab an extra output for the depth output */
480 if (!vs30_output_emit_depth_fog(emit, &emit->depth_pos))
481 return FALSE;
482
483 }
484 else if (semantic.Name == TGSI_SEMANTIC_PSIZE) {
485 emit->output_map[idx] = dst_register(SVGA3DREG_TEMP,
486 emit->nr_hw_temp++);
487 emit->temp_psiz = emit->output_map[idx];
488
489 /* This has the effect of not declaring psiz (below) and not
490 * emitting the final MOV to true_psiz in the postamble.
491 */
492 if (!emit->key.vs.allow_psiz)
493 return TRUE;
494
495 emit->true_psiz = dcl.dst;
496 }
497 else if (semantic.Name == TGSI_SEMANTIC_FOG) {
498 /*
499 * Fog is shared with depth.
500 * So we need to decrement out_count since emit_depth_fog will increment it.
501 */
502 emit->vs30_output_count--;
503
504 if (!vs30_output_emit_depth_fog(emit, &emit->output_map[idx]))
505 return FALSE;
506
507 return TRUE;
508 }
509 else {
510 emit->output_map[idx] = dcl.dst;
511 }
512
513 return (emit_instruction(emit, opcode) &&
514 svga_shader_emit_dwords(emit, dcl.values, ARRAY_SIZE(dcl.values)));
515 }
516
517
518 /** Translate PIPE_TEXTURE_x to SVGA3DSAMP_x */
519 static ubyte
svga_tgsi_sampler_type(const struct svga_shader_emitter * emit,int idx)520 svga_tgsi_sampler_type(const struct svga_shader_emitter *emit, int idx)
521 {
522 switch (emit->sampler_target[idx]) {
523 case TGSI_TEXTURE_1D:
524 return SVGA3DSAMP_2D;
525 case TGSI_TEXTURE_2D:
526 case TGSI_TEXTURE_RECT:
527 return SVGA3DSAMP_2D;
528 case TGSI_TEXTURE_SHADOW2D:
529 return SVGA3DSAMP_2D_SHADOW;
530 case TGSI_TEXTURE_3D:
531 return SVGA3DSAMP_VOLUME;
532 case TGSI_TEXTURE_CUBE:
533 return SVGA3DSAMP_CUBE;
534 }
535
536 return SVGA3DSAMP_UNKNOWN;
537 }
538
539
540 static boolean
ps30_sampler(struct svga_shader_emitter * emit,unsigned idx)541 ps30_sampler(struct svga_shader_emitter *emit,
542 unsigned idx)
543 {
544 SVGA3DOpDclArgs dcl;
545 SVGA3dShaderInstToken opcode;
546
547 opcode = inst_token(SVGA3DOP_DCL);
548 dcl.values[0] = 0;
549 dcl.values[1] = 0;
550
551 dcl.dst = dst_register(SVGA3DREG_SAMPLER, idx);
552 dcl.type = svga_tgsi_sampler_type(emit, idx);
553 dcl.values[0] |= 1<<31;
554
555 return (emit_instruction(emit, opcode) &&
556 svga_shader_emit_dwords(emit, dcl.values, ARRAY_SIZE(dcl.values)));
557 }
558
559
560 boolean
svga_shader_emit_samplers_decl(struct svga_shader_emitter * emit)561 svga_shader_emit_samplers_decl(struct svga_shader_emitter *emit)
562 {
563 unsigned i;
564
565 for (i = 0; i < emit->num_samplers; i++) {
566 if (!ps30_sampler(emit, i))
567 return FALSE;
568 }
569 return TRUE;
570 }
571
572
573 boolean
svga_translate_decl_sm30(struct svga_shader_emitter * emit,const struct tgsi_full_declaration * decl)574 svga_translate_decl_sm30(struct svga_shader_emitter *emit,
575 const struct tgsi_full_declaration *decl)
576 {
577 unsigned first = decl->Range.First;
578 unsigned last = decl->Range.Last;
579 unsigned idx;
580
581 for (idx = first; idx <= last; idx++) {
582 boolean ok = TRUE;
583
584 switch (decl->Declaration.File) {
585 case TGSI_FILE_SAMPLER:
586 assert (emit->unit == PIPE_SHADER_FRAGMENT);
587 /* just keep track of the number of samplers here.
588 * Will emit the declaration in the helpers function.
589 */
590 emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
591 break;
592
593 case TGSI_FILE_INPUT:
594 if (emit->unit == PIPE_SHADER_VERTEX)
595 ok = vs30_input(emit, decl->Semantic, idx);
596 else
597 ok = ps30_input(emit, decl->Semantic, idx);
598 break;
599
600 case TGSI_FILE_OUTPUT:
601 if (emit->unit == PIPE_SHADER_VERTEX)
602 ok = vs30_output(emit, decl->Semantic, idx);
603 else
604 ok = ps30_output(emit, decl->Semantic, idx);
605 break;
606
607 case TGSI_FILE_SAMPLER_VIEW:
608 {
609 unsigned unit = decl->Range.First;
610 assert(decl->Range.First == decl->Range.Last);
611 emit->sampler_target[unit] = decl->SamplerView.Resource;
612 }
613 break;
614
615 default:
616 /* don't need to declare other vars */
617 ok = TRUE;
618 }
619
620 if (!ok)
621 return FALSE;
622 }
623
624 return TRUE;
625 }
626