1 /*
2 * Copyright (c) 2008-2024 Broadcom. All Rights Reserved.
3 * The term “Broadcom” refers to Broadcom Inc.
4 * and/or its subsidiaries.
5 * SPDX-License-Identifier: MIT
6 */
7
8
9 #include "pipe/p_shader_tokens.h"
10 #include "tgsi/tgsi_parse.h"
11 #include "util/u_memory.h"
12
13 #include "svga_tgsi_emit.h"
14
15
16 /**
17 * Translate TGSI semantic info into SVGA3d semantic info.
18 * This is called for VS outputs and PS inputs only.
19 */
20 static bool
translate_vs_ps_semantic(struct svga_shader_emitter * emit,struct tgsi_declaration_semantic semantic,unsigned * usage,unsigned * idx)21 translate_vs_ps_semantic(struct svga_shader_emitter *emit,
22 struct tgsi_declaration_semantic semantic,
23 unsigned *usage,
24 unsigned *idx)
25 {
26 switch (semantic.Name) {
27 case TGSI_SEMANTIC_POSITION:
28 *idx = semantic.Index;
29 *usage = SVGA3D_DECLUSAGE_POSITION;
30 break;
31 case TGSI_SEMANTIC_COLOR:
32 *idx = semantic.Index;
33 *usage = SVGA3D_DECLUSAGE_COLOR;
34 break;
35 case TGSI_SEMANTIC_BCOLOR:
36 *idx = semantic.Index + 2; /* sharing with COLOR */
37 *usage = SVGA3D_DECLUSAGE_COLOR;
38 break;
39 case TGSI_SEMANTIC_FOG:
40 *idx = 0;
41 assert(semantic.Index == 0);
42 *usage = SVGA3D_DECLUSAGE_TEXCOORD;
43 break;
44 case TGSI_SEMANTIC_PSIZE:
45 *idx = semantic.Index;
46 *usage = SVGA3D_DECLUSAGE_PSIZE;
47 break;
48 case TGSI_SEMANTIC_GENERIC:
49 *idx = svga_remap_generic_index(emit->key.generic_remap_table,
50 semantic.Index);
51 *usage = SVGA3D_DECLUSAGE_TEXCOORD;
52 break;
53 case TGSI_SEMANTIC_NORMAL:
54 *idx = semantic.Index;
55 *usage = SVGA3D_DECLUSAGE_NORMAL;
56 break;
57 case TGSI_SEMANTIC_CLIPDIST:
58 case TGSI_SEMANTIC_CLIPVERTEX:
59 /* XXX at this time we don't support clip distance or clip vertices */
60 debug_warn_once("unsupported clip distance/vertex attribute\n");
61 *usage = SVGA3D_DECLUSAGE_TEXCOORD;
62 *idx = 0;
63 return true;
64 default:
65 assert(0);
66 *usage = SVGA3D_DECLUSAGE_TEXCOORD;
67 *idx = 0;
68 return false;
69 }
70
71 return true;
72 }
73
74
75 /**
76 * Emit a PS input (or VS depth/fog output) register declaration.
77 * For example, if usage = SVGA3D_DECLUSAGE_TEXCOORD, reg.num = 1, and
78 * index = 3, we'll emit "dcl_texcoord3 v1".
79 */
80 static bool
emit_decl(struct svga_shader_emitter * emit,SVGA3dShaderDestToken reg,unsigned usage,unsigned index)81 emit_decl(struct svga_shader_emitter *emit,
82 SVGA3dShaderDestToken reg,
83 unsigned usage,
84 unsigned index)
85 {
86 SVGA3DOpDclArgs dcl;
87 SVGA3dShaderInstToken opcode;
88
89 /* check values against bitfield sizes */
90 assert(index < 16);
91 assert(usage <= SVGA3D_DECLUSAGE_MAX);
92
93 opcode = inst_token(SVGA3DOP_DCL);
94 dcl.values[0] = 0;
95 dcl.values[1] = 0;
96
97 dcl.dst = reg;
98 dcl.usage = usage;
99 dcl.index = index;
100 dcl.values[0] |= 1<<31;
101
102 return (emit_instruction(emit, opcode) &&
103 svga_shader_emit_dwords(emit, dcl.values, ARRAY_SIZE(dcl.values)));
104 }
105
106
107 /**
108 * Emit declaration for PS front/back-face input register.
109 */
110 static bool
emit_vface_decl(struct svga_shader_emitter * emit)111 emit_vface_decl(struct svga_shader_emitter *emit)
112 {
113 if (!emit->emitted_vface) {
114 SVGA3dShaderDestToken reg =
115 dst_register(SVGA3DREG_MISCTYPE, SVGA3DMISCREG_FACE);
116
117 if (!emit_decl(emit, reg, 0, 0))
118 return false;
119
120 emit->emitted_vface = true;
121 }
122 return true;
123 }
124
125
126 /**
127 * Emit PS input register to pass depth/fog coordinates.
128 * Note that this always goes into texcoord[0].
129 */
130 static bool
ps30_input_emit_depth_fog(struct svga_shader_emitter * emit,struct src_register * out)131 ps30_input_emit_depth_fog(struct svga_shader_emitter *emit,
132 struct src_register *out)
133 {
134 struct src_register reg;
135
136 if (emit->emitted_depth_fog) {
137 *out = emit->ps_depth_fog;
138 return true;
139 }
140
141 if (emit->ps30_input_count >= SVGA3D_INPUTREG_MAX)
142 return false;
143
144 reg = src_register(SVGA3DREG_INPUT,
145 emit->ps30_input_count++);
146
147 *out = emit->ps_depth_fog = reg;
148
149 emit->emitted_depth_fog = true;
150
151 return emit_decl(emit, dst(reg), SVGA3D_DECLUSAGE_TEXCOORD, 0);
152 }
153
154
155 /**
156 * Process a PS input declaration.
157 * We'll emit a declaration like "dcl_texcoord1 v2"
158 */
159 static bool
ps30_input(struct svga_shader_emitter * emit,struct tgsi_declaration_semantic semantic,unsigned idx)160 ps30_input(struct svga_shader_emitter *emit,
161 struct tgsi_declaration_semantic semantic,
162 unsigned idx)
163 {
164 unsigned usage, index;
165 SVGA3dShaderDestToken reg;
166
167 if (semantic.Name == TGSI_SEMANTIC_POSITION) {
168
169 emit->ps_true_pos = src_register(SVGA3DREG_MISCTYPE,
170 SVGA3DMISCREG_POSITION);
171 emit->ps_true_pos.base.swizzle = TRANSLATE_SWIZZLE(TGSI_SWIZZLE_X,
172 TGSI_SWIZZLE_Y,
173 TGSI_SWIZZLE_Y,
174 TGSI_SWIZZLE_Y);
175 reg = writemask(dst(emit->ps_true_pos),
176 TGSI_WRITEMASK_XY);
177 emit->ps_reads_pos = true;
178
179 if (emit->info.reads_z) {
180 emit->ps_temp_pos = dst_register(SVGA3DREG_TEMP,
181 emit->nr_hw_temp);
182
183 emit->input_map[idx] = src_register(SVGA3DREG_TEMP,
184 emit->nr_hw_temp);
185 emit->nr_hw_temp++;
186
187 if (!ps30_input_emit_depth_fog(emit, &emit->ps_depth_pos))
188 return false;
189
190 emit->ps_depth_pos.base.swizzle = TRANSLATE_SWIZZLE(TGSI_SWIZZLE_Z,
191 TGSI_SWIZZLE_Z,
192 TGSI_SWIZZLE_Z,
193 TGSI_SWIZZLE_W);
194 }
195 else {
196 emit->input_map[idx] = emit->ps_true_pos;
197 }
198
199 return emit_decl(emit, reg, 0, 0);
200 }
201 else if (emit->key.fs.light_twoside &&
202 (semantic.Name == TGSI_SEMANTIC_COLOR)) {
203
204 if (!translate_vs_ps_semantic(emit, semantic, &usage, &index))
205 return false;
206
207 emit->internal_color_idx[emit->internal_color_count] = idx;
208 emit->input_map[idx] =
209 src_register(SVGA3DREG_INPUT, emit->ps30_input_count);
210 emit->ps30_input_count++;
211 emit->internal_color_count++;
212
213 reg = dst(emit->input_map[idx]);
214
215 if (!emit_decl(emit, reg, usage, index))
216 return false;
217
218 semantic.Name = TGSI_SEMANTIC_BCOLOR;
219 if (!translate_vs_ps_semantic(emit, semantic, &usage, &index))
220 return false;
221
222 if (emit->ps30_input_count >= SVGA3D_INPUTREG_MAX)
223 return false;
224
225 reg = dst_register(SVGA3DREG_INPUT, emit->ps30_input_count++);
226
227 if (!emit_decl(emit, reg, usage, index))
228 return false;
229
230 if (!emit_vface_decl(emit))
231 return false;
232
233 return true;
234 }
235 else if (semantic.Name == TGSI_SEMANTIC_FACE) {
236 if (!emit_vface_decl(emit))
237 return false;
238 emit->emit_frontface = true;
239 emit->internal_frontface_idx = idx;
240 return true;
241 }
242 else if (semantic.Name == TGSI_SEMANTIC_FOG) {
243
244 assert(semantic.Index == 0);
245
246 if (!ps30_input_emit_depth_fog(emit, &emit->input_map[idx]))
247 return false;
248
249 emit->input_map[idx].base.swizzle = TRANSLATE_SWIZZLE(TGSI_SWIZZLE_X,
250 TGSI_SWIZZLE_X,
251 TGSI_SWIZZLE_X,
252 TGSI_SWIZZLE_X);
253 return true;
254 }
255 else {
256
257 if (!translate_vs_ps_semantic(emit, semantic, &usage, &index))
258 return false;
259
260 if (emit->ps30_input_count >= SVGA3D_INPUTREG_MAX)
261 return false;
262
263 emit->input_map[idx] =
264 src_register(SVGA3DREG_INPUT, emit->ps30_input_count++);
265
266 reg = dst(emit->input_map[idx]);
267
268 if (!emit_decl(emit, reg, usage, index))
269 return false;
270
271 if (semantic.Name == TGSI_SEMANTIC_GENERIC &&
272 emit->key.sprite_origin_lower_left &&
273 index >= 1 &&
274 emit->key.sprite_coord_enable & (1 << semantic.Index)) {
275 /* This is a sprite texture coord with lower-left origin.
276 * We need to invert the texture T coordinate since the SVGA3D
277 * device only supports an upper-left origin.
278 */
279 unsigned unit = index - 1;
280
281 emit->inverted_texcoords |= (1 << unit);
282
283 /* save original texcoord reg */
284 emit->ps_true_texcoord[unit] = emit->input_map[idx];
285
286 /* this temp register will be the results of the MAD instruction */
287 emit->ps_inverted_texcoord[unit] =
288 src_register(SVGA3DREG_TEMP, emit->nr_hw_temp);
289 emit->nr_hw_temp++;
290
291 emit->ps_inverted_texcoord_input[unit] = idx;
292
293 /* replace input_map entry with the temp register */
294 emit->input_map[idx] = emit->ps_inverted_texcoord[unit];
295 }
296
297 return true;
298 }
299
300 }
301
302
303 /**
304 * Process a PS output declaration.
305 * Note that we don't actually emit a SVGA3DOpDcl for PS outputs.
306 * \idx register index, such as OUT[2] (not semantic index)
307 */
308 static bool
ps30_output(struct svga_shader_emitter * emit,struct tgsi_declaration_semantic semantic,unsigned idx)309 ps30_output(struct svga_shader_emitter *emit,
310 struct tgsi_declaration_semantic semantic,
311 unsigned idx)
312 {
313 switch (semantic.Name) {
314 case TGSI_SEMANTIC_COLOR:
315 if (emit->unit == PIPE_SHADER_FRAGMENT) {
316 if (emit->key.fs.white_fragments) {
317 /* Used for XOR logicop mode */
318 emit->output_map[idx] = dst_register(SVGA3DREG_TEMP,
319 emit->nr_hw_temp++);
320 emit->temp_color_output[idx] = emit->output_map[idx];
321 emit->true_color_output[idx] = dst_register(SVGA3DREG_COLOROUT,
322 semantic.Index);
323 }
324 else if (emit->key.fs.write_color0_to_n_cbufs) {
325 /* We'll write color output [0] to all render targets.
326 * Prepare all the output registers here, but only when the
327 * semantic.Index == 0 so we don't do this more than once.
328 */
329 if (semantic.Index == 0) {
330 unsigned i;
331 for (i = 0; i < emit->key.fs.write_color0_to_n_cbufs; i++) {
332 emit->output_map[idx+i] = dst_register(SVGA3DREG_TEMP,
333 emit->nr_hw_temp++);
334 emit->temp_color_output[i] = emit->output_map[idx+i];
335 emit->true_color_output[i] = dst_register(SVGA3DREG_COLOROUT,
336 i);
337 }
338 }
339 }
340 else {
341 emit->output_map[idx] =
342 dst_register(SVGA3DREG_COLOROUT, semantic.Index);
343 }
344 }
345 else {
346 emit->output_map[idx] = dst_register(SVGA3DREG_COLOROUT,
347 semantic.Index);
348 }
349 break;
350 case TGSI_SEMANTIC_POSITION:
351 emit->output_map[idx] = dst_register(SVGA3DREG_TEMP,
352 emit->nr_hw_temp++);
353 emit->temp_pos = emit->output_map[idx];
354 emit->true_pos = dst_register(SVGA3DREG_DEPTHOUT,
355 semantic.Index);
356 break;
357 default:
358 assert(0);
359 /* A wild stab in the dark. */
360 emit->output_map[idx] = dst_register(SVGA3DREG_COLOROUT, 0);
361 break;
362 }
363
364 return true;
365 }
366
367
368 /**
369 * Declare a VS input register.
370 * We still make up the input semantics the same as in 2.0
371 */
372 static bool
vs30_input(struct svga_shader_emitter * emit,struct tgsi_declaration_semantic semantic,unsigned idx)373 vs30_input(struct svga_shader_emitter *emit,
374 struct tgsi_declaration_semantic semantic,
375 unsigned idx)
376 {
377 SVGA3DOpDclArgs dcl;
378 SVGA3dShaderInstToken opcode;
379 unsigned usage, index;
380
381 opcode = inst_token(SVGA3DOP_DCL);
382 dcl.values[0] = 0;
383 dcl.values[1] = 0;
384
385 emit->input_map[idx] = src_register(SVGA3DREG_INPUT, idx);
386 dcl.dst = dst_register(SVGA3DREG_INPUT, idx);
387
388 assert(dcl.dst.reserved0);
389
390 svga_generate_vdecl_semantics(idx, &usage, &index);
391
392 dcl.usage = usage;
393 dcl.index = index;
394 dcl.values[0] |= 1<<31;
395
396 return (emit_instruction(emit, opcode) &&
397 svga_shader_emit_dwords(emit, dcl.values, ARRAY_SIZE(dcl.values)));
398 }
399
400
401 /**
402 * Declare VS output for holding depth/fog.
403 */
404 static bool
vs30_output_emit_depth_fog(struct svga_shader_emitter * emit,SVGA3dShaderDestToken * out)405 vs30_output_emit_depth_fog(struct svga_shader_emitter *emit,
406 SVGA3dShaderDestToken *out)
407 {
408 SVGA3dShaderDestToken reg;
409
410 if (emit->emitted_depth_fog) {
411 *out = emit->vs_depth_fog;
412 return true;
413 }
414
415 reg = dst_register(SVGA3DREG_OUTPUT, emit->vs30_output_count++);
416
417 *out = emit->vs_depth_fog = reg;
418
419 emit->emitted_depth_fog = true;
420
421 return emit_decl(emit, reg, SVGA3D_DECLUSAGE_TEXCOORD, 0);
422 }
423
424
425 /**
426 * Declare a VS output.
427 * VS3.0 outputs have proper declarations and semantic info for
428 * matching against PS inputs.
429 */
430 static bool
vs30_output(struct svga_shader_emitter * emit,struct tgsi_declaration_semantic semantic,unsigned idx)431 vs30_output(struct svga_shader_emitter *emit,
432 struct tgsi_declaration_semantic semantic,
433 unsigned idx)
434 {
435 SVGA3DOpDclArgs dcl;
436 SVGA3dShaderInstToken opcode;
437 unsigned usage, index;
438
439 opcode = inst_token(SVGA3DOP_DCL);
440 dcl.values[0] = 0;
441 dcl.values[1] = 0;
442
443 if (!translate_vs_ps_semantic(emit, semantic, &usage, &index))
444 return false;
445
446 if (emit->vs30_output_count >= SVGA3D_OUTPUTREG_MAX)
447 return false;
448
449 dcl.dst = dst_register(SVGA3DREG_OUTPUT, emit->vs30_output_count++);
450 dcl.usage = usage;
451 dcl.index = index;
452 dcl.values[0] |= 1<<31;
453
454 if (semantic.Name == TGSI_SEMANTIC_POSITION) {
455 assert(idx == 0);
456 emit->output_map[idx] = dst_register(SVGA3DREG_TEMP,
457 emit->nr_hw_temp++);
458 emit->temp_pos = emit->output_map[idx];
459 emit->true_pos = dcl.dst;
460
461 /* Grab an extra output for the depth output */
462 if (!vs30_output_emit_depth_fog(emit, &emit->depth_pos))
463 return false;
464
465 }
466 else if (semantic.Name == TGSI_SEMANTIC_PSIZE) {
467 emit->output_map[idx] = dst_register(SVGA3DREG_TEMP,
468 emit->nr_hw_temp++);
469 emit->temp_psiz = emit->output_map[idx];
470
471 /* This has the effect of not declaring psiz (below) and not
472 * emitting the final MOV to true_psiz in the postamble.
473 */
474 if (!emit->key.vs.allow_psiz)
475 return true;
476
477 emit->true_psiz = dcl.dst;
478 }
479 else if (semantic.Name == TGSI_SEMANTIC_FOG) {
480 /*
481 * Fog is shared with depth.
482 * So we need to decrement out_count since emit_depth_fog will increment it.
483 */
484 emit->vs30_output_count--;
485
486 if (!vs30_output_emit_depth_fog(emit, &emit->output_map[idx]))
487 return false;
488
489 return true;
490 }
491 else {
492 emit->output_map[idx] = dcl.dst;
493 }
494
495 return (emit_instruction(emit, opcode) &&
496 svga_shader_emit_dwords(emit, dcl.values, ARRAY_SIZE(dcl.values)));
497 }
498
499
500 /** Translate PIPE_TEXTURE_x to SVGA3DSAMP_x */
501 static uint8_t
svga_tgsi_sampler_type(const struct svga_shader_emitter * emit,int idx)502 svga_tgsi_sampler_type(const struct svga_shader_emitter *emit, int idx)
503 {
504 switch (emit->sampler_target[idx]) {
505 case TGSI_TEXTURE_1D:
506 return SVGA3DSAMP_2D;
507 case TGSI_TEXTURE_2D:
508 case TGSI_TEXTURE_RECT:
509 return SVGA3DSAMP_2D;
510 case TGSI_TEXTURE_SHADOW2D:
511 return SVGA3DSAMP_2D_SHADOW;
512 case TGSI_TEXTURE_3D:
513 return SVGA3DSAMP_VOLUME;
514 case TGSI_TEXTURE_CUBE:
515 return SVGA3DSAMP_CUBE;
516 }
517
518 return SVGA3DSAMP_UNKNOWN;
519 }
520
521
522 static bool
ps30_sampler(struct svga_shader_emitter * emit,unsigned idx)523 ps30_sampler(struct svga_shader_emitter *emit,
524 unsigned idx)
525 {
526 SVGA3DOpDclArgs dcl;
527 SVGA3dShaderInstToken opcode;
528
529 opcode = inst_token(SVGA3DOP_DCL);
530 dcl.values[0] = 0;
531 dcl.values[1] = 0;
532
533 dcl.dst = dst_register(SVGA3DREG_SAMPLER, idx);
534 dcl.type = svga_tgsi_sampler_type(emit, idx);
535 dcl.values[0] |= 1<<31;
536
537 return (emit_instruction(emit, opcode) &&
538 svga_shader_emit_dwords(emit, dcl.values, ARRAY_SIZE(dcl.values)));
539 }
540
541
542 bool
svga_shader_emit_samplers_decl(struct svga_shader_emitter * emit)543 svga_shader_emit_samplers_decl(struct svga_shader_emitter *emit)
544 {
545 unsigned i;
546
547 for (i = 0; i < emit->num_samplers; i++) {
548 if (!ps30_sampler(emit, i))
549 return false;
550 }
551 return true;
552 }
553
554
555 bool
svga_translate_decl_sm30(struct svga_shader_emitter * emit,const struct tgsi_full_declaration * decl)556 svga_translate_decl_sm30(struct svga_shader_emitter *emit,
557 const struct tgsi_full_declaration *decl)
558 {
559 unsigned first = decl->Range.First;
560 unsigned last = decl->Range.Last;
561 unsigned idx;
562
563 for (idx = first; idx <= last; idx++) {
564 bool ok = true;
565
566 switch (decl->Declaration.File) {
567 case TGSI_FILE_SAMPLER:
568 assert (emit->unit == PIPE_SHADER_FRAGMENT);
569 /* just keep track of the number of samplers here.
570 * Will emit the declaration in the helpers function.
571 */
572 emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
573 break;
574
575 case TGSI_FILE_INPUT:
576 if (emit->unit == PIPE_SHADER_VERTEX)
577 ok = vs30_input(emit, decl->Semantic, idx);
578 else
579 ok = ps30_input(emit, decl->Semantic, idx);
580 break;
581
582 case TGSI_FILE_OUTPUT:
583 if (emit->unit == PIPE_SHADER_VERTEX)
584 ok = vs30_output(emit, decl->Semantic, idx);
585 else
586 ok = ps30_output(emit, decl->Semantic, idx);
587 break;
588
589 case TGSI_FILE_SAMPLER_VIEW:
590 {
591 unsigned unit = decl->Range.First;
592 assert(decl->Range.First == decl->Range.Last);
593 emit->sampler_target[unit] = decl->SamplerView.Resource;
594 }
595 break;
596
597 default:
598 /* don't need to declare other vars */
599 ok = true;
600 }
601
602 if (!ok)
603 return false;
604 }
605
606 return true;
607 }
608