1 /**********************************************************
2 * Copyright 1998-2022 VMware, Inc. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **********************************************************/
25
26 /**
27 * @file svga_tgsi_vgpu10.c
28 *
29 * TGSI -> VGPU10 shader translation.
30 *
31 * \author Mingcheng Chen
32 * \author Brian Paul
33 */
34
35 #include "util/compiler.h"
36 #include "pipe/p_shader_tokens.h"
37 #include "pipe/p_defines.h"
38 #include "tgsi/tgsi_dump.h"
39 #include "tgsi/tgsi_info.h"
40 #include "tgsi/tgsi_parse.h"
41 #include "tgsi/tgsi_scan.h"
42 #include "tgsi/tgsi_strings.h"
43 #include "tgsi/tgsi_two_side.h"
44 #include "tgsi/tgsi_aa_point.h"
45 #include "tgsi/tgsi_util.h"
46 #include "util/u_math.h"
47 #include "util/u_memory.h"
48 #include "util/u_bitmask.h"
49 #include "util/u_debug.h"
50 #include "util/u_pstipple.h"
51
52 #include "svga_context.h"
53 #include "svga_debug.h"
54 #include "svga_link.h"
55 #include "svga_shader.h"
56 #include "svga_tgsi.h"
57
58 #include "VGPU10ShaderTokens.h"
59
60
61 #define INVALID_INDEX 99999
62 #define MAX_INTERNAL_TEMPS 4
63 #define MAX_SYSTEM_VALUES 4
64 #define MAX_IMMEDIATE_COUNT \
65 (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4)
66 #define MAX_TEMP_ARRAYS 64 /* Enough? */
67
68 /**
69 * Clipping is complicated. There's four different cases which we
70 * handle during VS/GS shader translation:
71 */
72 enum clipping_mode
73 {
74 CLIP_NONE, /**< No clipping enabled */
75 CLIP_LEGACY, /**< The shader has no clipping declarations or code but
76 * one or more user-defined clip planes are enabled. We
77 * generate extra code to emit clip distances.
78 */
79 CLIP_DISTANCE, /**< The shader already declares clip distance output
80 * registers and has code to write to them.
81 */
82 CLIP_VERTEX /**< The shader declares a clip vertex output register and
83 * has code that writes to the register. We convert the
84 * clipvertex position into one or more clip distances.
85 */
86 };
87
88
89 /* Shader signature info */
90 struct svga_shader_signature
91 {
92 SVGA3dDXShaderSignatureHeader header;
93 SVGA3dDXShaderSignatureEntry inputs[PIPE_MAX_SHADER_INPUTS];
94 SVGA3dDXShaderSignatureEntry outputs[PIPE_MAX_SHADER_OUTPUTS];
95 SVGA3dDXShaderSignatureEntry patchConstants[PIPE_MAX_SHADER_OUTPUTS];
96 };
97
98 static inline void
set_shader_signature_entry(SVGA3dDXShaderSignatureEntry * e,unsigned index,SVGA3dDXSignatureSemanticName sgnName,unsigned mask,SVGA3dDXSignatureRegisterComponentType compType,SVGA3dDXSignatureMinPrecision minPrecision)99 set_shader_signature_entry(SVGA3dDXShaderSignatureEntry *e,
100 unsigned index,
101 SVGA3dDXSignatureSemanticName sgnName,
102 unsigned mask,
103 SVGA3dDXSignatureRegisterComponentType compType,
104 SVGA3dDXSignatureMinPrecision minPrecision)
105 {
106 e->registerIndex = index;
107 e->semanticName = sgnName;
108 e->mask = mask;
109 e->componentType = compType;
110 e->minPrecision = minPrecision;
111 };
112
113 static const SVGA3dDXSignatureSemanticName
114 tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_COUNT] = {
115 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION,
116 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
117 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
118 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
119 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
120 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
121 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
122 SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE,
123 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
124 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID,
125 SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID,
126 SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID,
127 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
128 SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE,
129 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
130 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
131 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
132 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
133 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
134 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
135 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
136 SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX,
137 SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX,
138 SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX,
139 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
140 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
141 SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID,
142 SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID,
143 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
144 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
145 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
146 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
147 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
148 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
149 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
150 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
151 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
152 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
153 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
154 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
155 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
156 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
157 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
158 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
159 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
160 };
161
162
163 /**
164 * Map tgsi semantic name to SVGA signature semantic name
165 */
166 static inline SVGA3dDXSignatureSemanticName
map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name)167 map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name)
168 {
169 assert(name < TGSI_SEMANTIC_COUNT);
170
171 /* Do a few asserts here to spot check the mapping */
172 assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_PRIMID] ==
173 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
174 assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_VIEWPORT_INDEX] ==
175 SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX);
176 assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_INVOCATIONID] ==
177 SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID);
178
179 return tgsi_semantic_to_sgn_name[name];
180 }
181
182 enum reemit_mode {
183 REEMIT_FALSE = 0,
184 REEMIT_TRUE = 1,
185 REEMIT_IN_PROGRESS = 2
186 };
187
188 struct svga_raw_buf_tmp {
189 bool indirect;
190 unsigned buffer_index:8;
191 unsigned element_index:8;
192 unsigned element_rel:8;
193 };
194
195 struct svga_shader_emitter_v10
196 {
197 /* The token output buffer */
198 unsigned size;
199 char *buf;
200 char *ptr;
201
202 /* Information about the shader and state (does not change) */
203 struct svga_compile_key key;
204 struct tgsi_shader_info info;
205 unsigned unit;
206 unsigned version; /**< Either 40, 41, 50 or 51 at this time */
207
208 unsigned cur_tgsi_token; /**< current tgsi token position */
209 unsigned inst_start_token;
210 bool discard_instruction; /**< throw away current instruction? */
211 bool reemit_instruction; /**< reemit current instruction */
212 bool reemit_tgsi_instruction; /**< reemit current tgsi instruction */
213 bool skip_instruction; /**< skip current instruction */
214 bool use_sampler_state_mapping; /* use sampler state mapping */
215 enum reemit_mode reemit_rawbuf_instruction;
216
217 union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4];
218 double (*immediates_dbl)[2];
219 unsigned num_immediates; /**< Number of immediates emitted */
220 unsigned common_immediate_pos[20]; /**< literals for common immediates */
221 unsigned num_common_immediates;
222 unsigned num_immediates_emitted;
223 unsigned num_new_immediates; /** pending immediates to be declared */
224 unsigned immediates_block_start_token;
225 unsigned immediates_block_next_token;
226
227 unsigned num_outputs; /**< include any extra outputs */
228 /** The first extra output is reserved for
229 * non-adjusted vertex position for
230 * stream output purpose
231 */
232
233 /* Temporary Registers */
234 unsigned num_shader_temps; /**< num of temps used by original shader */
235 unsigned internal_temp_count; /**< currently allocated internal temps */
236 struct {
237 unsigned start, size;
238 } temp_arrays[MAX_TEMP_ARRAYS];
239 unsigned num_temp_arrays;
240
241 /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */
242 struct {
243 unsigned arrayId, index;
244 bool initialized;
245 } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */
246
247 unsigned initialize_temp_index;
248
249 /** Number of constants used by original shader for each constant buffer.
250 * The size should probably always match with that of svga_state.constbufs.
251 */
252 unsigned num_shader_consts[SVGA_MAX_CONST_BUFS];
253
254 /* Raw constant buffers */
255 unsigned raw_buf_srv_start_index; /* starting srv index for raw buffers */
256 unsigned raw_bufs; /* raw buffers bitmask */
257 unsigned raw_buf_tmp_index; /* starting temp index for raw buffers */
258 unsigned raw_buf_cur_tmp_index; /* current temp index for raw buffers */
259 struct svga_raw_buf_tmp raw_buf_tmp[3]; /* temporaries for raw buf source */
260
261 /* Samplers */
262 unsigned num_samplers;
263 bool sampler_view[PIPE_MAX_SAMPLERS]; /**< True if sampler view exists*/
264 uint8_t sampler_target[PIPE_MAX_SAMPLERS]; /**< TGSI_TEXTURE_x */
265 uint8_t sampler_return_type[PIPE_MAX_SAMPLERS]; /**< TGSI_RETURN_TYPE_x */
266
267 /* Images */
268 unsigned num_images;
269 unsigned image_mask;
270 struct tgsi_declaration_image image[PIPE_MAX_SHADER_IMAGES];
271 unsigned image_size_index; /* starting index to cbuf for image size */
272
273 /* Shader buffers */
274 unsigned num_shader_bufs;
275 unsigned raw_shaderbuf_srv_start_index; /* starting srv index for raw shaderbuf */
276 uint64_t raw_shaderbufs; /* raw shader buffers bitmask */
277
278 /* HW atomic buffers */
279 unsigned num_atomic_bufs;
280 unsigned atomic_bufs_mask;
281 unsigned max_atomic_counter_index;
282 VGPU10_OPCODE_TYPE cur_atomic_opcode; /* current atomic opcode */
283
284 bool uav_declared; /* True if uav is declared */
285
286 /* Index Range declaration */
287 struct {
288 unsigned start_index;
289 unsigned count;
290 bool required;
291 unsigned operandType;
292 unsigned size;
293 unsigned dim;
294 } index_range;
295
296 /* Address regs (really implemented with temps) */
297 unsigned num_address_regs;
298 unsigned address_reg_index[MAX_VGPU10_ADDR_REGS];
299
300 /* Output register usage masks */
301 uint8_t output_usage_mask[PIPE_MAX_SHADER_OUTPUTS];
302
303 /* To map TGSI system value index to VGPU shader input indexes */
304 uint8_t system_value_indexes[MAX_SYSTEM_VALUES];
305
306 struct {
307 /* vertex position scale/translation */
308 unsigned out_index; /**< the real position output reg */
309 unsigned tmp_index; /**< the fake/temp position output reg */
310 unsigned so_index; /**< the non-adjusted position output reg */
311 unsigned prescale_cbuf_index; /* index to the const buf for prescale */
312 unsigned prescale_scale_index, prescale_trans_index;
313 unsigned num_prescale; /* number of prescale factor in const buf */
314 unsigned viewport_index;
315 unsigned need_prescale:1;
316 unsigned have_prescale:1;
317 } vposition;
318
319 /* Shader limits */
320 unsigned max_vs_inputs;
321 unsigned max_vs_outputs;
322 unsigned max_gs_inputs;
323
324 /* For vertex shaders only */
325 struct {
326 /* viewport constant */
327 unsigned viewport_index;
328
329 unsigned vertex_id_bias_index;
330 unsigned vertex_id_sys_index;
331 unsigned vertex_id_tmp_index;
332
333 /* temp index of adjusted vertex attributes */
334 unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS];
335 } vs;
336
337 /* For fragment shaders only */
338 struct {
339 unsigned color_out_index[PIPE_MAX_COLOR_BUFS]; /**< the real color output regs */
340 unsigned num_color_outputs;
341 unsigned color_tmp_index; /**< fake/temp color output reg */
342 unsigned alpha_ref_index; /**< immediate constant for alpha ref */
343
344 /* front-face */
345 unsigned face_input_index; /**< real fragment shader face reg (bool) */
346 unsigned face_tmp_index; /**< temp face reg converted to -1 / +1 */
347
348 unsigned pstipple_sampler_unit;
349 unsigned pstipple_sampler_state_index;
350
351 unsigned fragcoord_input_index; /**< real fragment position input reg */
352 unsigned fragcoord_tmp_index; /**< 1/w modified position temp reg */
353
354 unsigned sample_id_sys_index; /**< TGSI index of sample id sys value */
355
356 unsigned sample_pos_sys_index; /**< TGSI index of sample pos sys value */
357 unsigned sample_pos_tmp_index; /**< which temp reg has the sample pos */
358
359 /** TGSI index of sample mask input sys value */
360 unsigned sample_mask_in_sys_index;
361
362 /* layer */
363 unsigned layer_input_index; /**< TGSI index of layer */
364 unsigned layer_imm_index; /**< immediate for default layer 0 */
365
366 bool forceEarlyDepthStencil; /**< true if Early Depth stencil test is enabled */
367 } fs;
368
369 /* For geometry shaders only */
370 struct {
371 VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */
372 VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */
373 unsigned input_size; /**< size of input arrays */
374 unsigned prim_id_index; /**< primitive id register index */
375 unsigned max_out_vertices; /**< maximum number of output vertices */
376 unsigned invocations;
377 unsigned invocation_id_sys_index;
378
379 unsigned viewport_index_out_index;
380 unsigned viewport_index_tmp_index;
381 } gs;
382
383 /* For tessellation control shaders only */
384 struct {
385 unsigned vertices_per_patch_index; /**< vertices_per_patch system value index */
386 unsigned imm_index; /**< immediate for tcs */
387 unsigned invocation_id_sys_index; /**< invocation id */
388 unsigned invocation_id_tmp_index;
389 unsigned instruction_token_pos; /* token pos for the first instruction */
390 unsigned control_point_input_index; /* control point input register index */
391 unsigned control_point_addr_index; /* control point input address register */
392 unsigned control_point_out_index; /* control point output register index */
393 unsigned control_point_tmp_index; /* control point temporary register */
394 unsigned control_point_out_count; /* control point output count */
395 bool control_point_phase; /* true if in control point phase */
396 bool fork_phase_add_signature; /* true if needs to add signature in fork phase */
397 unsigned patch_generic_out_count; /* per-patch generic output count */
398 unsigned patch_generic_out_index; /* per-patch generic output register index*/
399 unsigned patch_generic_tmp_index; /* per-patch generic temporary register index*/
400 unsigned prim_id_index; /* primitive id */
401 struct {
402 unsigned out_index; /* real tessinner output register */
403 unsigned temp_index; /* tessinner temp register */
404 unsigned tgsi_index; /* tgsi tessinner output register */
405 } inner;
406 struct {
407 unsigned out_index; /* real tessouter output register */
408 unsigned temp_index; /* tessouter temp register */
409 unsigned tgsi_index; /* tgsi tessouter output register */
410 } outer;
411 } tcs;
412
413 /* For tessellation evaluation shaders only */
414 struct {
415 enum mesa_prim prim_mode;
416 enum pipe_tess_spacing spacing;
417 bool vertices_order_cw;
418 bool point_mode;
419 unsigned tesscoord_sys_index;
420 unsigned swizzle_max;
421 unsigned prim_id_index; /* primitive id */
422 struct {
423 unsigned in_index; /* real tessinner input register */
424 unsigned temp_index; /* tessinner temp register */
425 unsigned tgsi_index; /* tgsi tessinner input register */
426 } inner;
427 struct {
428 unsigned in_index; /* real tessouter input register */
429 unsigned temp_index; /* tessouter temp register */
430 unsigned tgsi_index; /* tgsi tessouter input register */
431 } outer;
432 } tes;
433
434 struct {
435 unsigned block_width; /* thread group size in x dimension */
436 unsigned block_height; /* thread group size in y dimension */
437 unsigned block_depth; /* thread group size in z dimension */
438 unsigned thread_id_index; /* thread id tgsi index */
439 unsigned block_id_index; /* block id tgsi index */
440 bool shared_memory_declared; /* set if shared memory is declared */
441 struct {
442 unsigned tgsi_index; /* grid size tgsi index */
443 unsigned imm_index; /* grid size imm index */
444 } grid_size;
445 } cs;
446
447 /* For vertex or geometry shaders */
448 enum clipping_mode clip_mode;
449 unsigned clip_dist_out_index; /**< clip distance output register index */
450 unsigned clip_dist_tmp_index; /**< clip distance temporary register */
451 unsigned clip_dist_so_index; /**< clip distance shadow copy */
452
453 /** Index of temporary holding the clipvertex coordinate */
454 unsigned clip_vertex_out_index; /**< clip vertex output register index */
455 unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */
456
457 /* user clip plane constant slot indexes */
458 unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES];
459
460 unsigned num_output_writes;
461 bool constant_color_output;
462
463 bool uses_flat_interp;
464
465 unsigned reserved_token; /* index to the reserved token */
466 bool uses_precise_qualifier;
467
468 /* For all shaders: const reg index for RECT coord scaling */
469 unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS];
470
471 /* For all shaders: const reg index for texture buffer size */
472 unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS];
473
474 /** Which texture units are doing shadow comparison in the shader code */
475 unsigned shadow_compare_units;
476
477 /* VS/TCS/TES/GS/FS Linkage info */
478 struct shader_linkage linkage;
479 struct tgsi_shader_info *prevShaderInfo;
480
481 /* Shader signature */
482 struct svga_shader_signature signature;
483
484 bool register_overflow; /**< Set if we exceed a VGPU10 register limit */
485
486 /* For util_debug_message */
487 struct util_debug_callback svga_debug_callback;
488
489 /* current loop depth in shader */
490 unsigned current_loop_depth;
491 };
492
493
494 static void emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit);
495 static void emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit);
496 static bool emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit);
497 static bool emit_constant_declaration(struct svga_shader_emitter_v10 *emit);
498 static bool emit_sampler_declarations(struct svga_shader_emitter_v10 *emit);
499 static bool emit_resource_declarations(struct svga_shader_emitter_v10 *emit);
500 static bool emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit);
501 static bool emit_index_range_declaration(struct svga_shader_emitter_v10 *emit);
502 static void emit_image_declarations(struct svga_shader_emitter_v10 *emit);
503 static void emit_shader_buf_declarations(struct svga_shader_emitter_v10 *emit);
504 static void emit_atomic_buf_declarations(struct svga_shader_emitter_v10 *emit);
505 static void emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit);
506
507 static bool
508 emit_post_helpers(struct svga_shader_emitter_v10 *emit);
509
510 static bool
511 emit_vertex(struct svga_shader_emitter_v10 *emit,
512 const struct tgsi_full_instruction *inst);
513
514 static bool
515 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
516 unsigned inst_number,
517 const struct tgsi_full_instruction *inst);
518
519 static void
520 emit_input_declaration(struct svga_shader_emitter_v10 *emit,
521 VGPU10_OPCODE_TYPE opcodeType,
522 VGPU10_OPERAND_TYPE operandType,
523 VGPU10_OPERAND_INDEX_DIMENSION dim,
524 unsigned index, unsigned size,
525 VGPU10_SYSTEM_NAME name,
526 VGPU10_OPERAND_NUM_COMPONENTS numComp,
527 VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,
528 unsigned usageMask,
529 VGPU10_INTERPOLATION_MODE interpMode,
530 bool addSignature,
531 SVGA3dDXSignatureSemanticName sgnName);
532
533 static bool
534 emit_rawbuf_instruction(struct svga_shader_emitter_v10 *emit,
535 unsigned inst_number,
536 const struct tgsi_full_instruction *inst);
537
538 static void
539 create_temp_array(struct svga_shader_emitter_v10 *emit,
540 unsigned arrayID, unsigned first, unsigned count,
541 unsigned startIndex);
542
543 static char err_buf[128];
544
545 static bool
expand(struct svga_shader_emitter_v10 * emit)546 expand(struct svga_shader_emitter_v10 *emit)
547 {
548 char *new_buf;
549 unsigned newsize = emit->size * 2;
550
551 if (emit->buf != err_buf)
552 new_buf = REALLOC(emit->buf, emit->size, newsize);
553 else
554 new_buf = NULL;
555
556 if (!new_buf) {
557 emit->ptr = err_buf;
558 emit->buf = err_buf;
559 emit->size = sizeof(err_buf);
560 return false;
561 }
562
563 emit->size = newsize;
564 emit->ptr = new_buf + (emit->ptr - emit->buf);
565 emit->buf = new_buf;
566 return true;
567 }
568
569 /**
570 * Create and initialize a new svga_shader_emitter_v10 object.
571 */
572 static struct svga_shader_emitter_v10 *
alloc_emitter(void)573 alloc_emitter(void)
574 {
575 struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit));
576
577 if (!emit)
578 return NULL;
579
580 /* to initialize the output buffer */
581 emit->size = 512;
582 if (!expand(emit)) {
583 FREE(emit);
584 return NULL;
585 }
586 return emit;
587 }
588
589 /**
590 * Free an svga_shader_emitter_v10 object.
591 */
592 static void
free_emitter(struct svga_shader_emitter_v10 * emit)593 free_emitter(struct svga_shader_emitter_v10 *emit)
594 {
595 assert(emit);
596 FREE(emit->buf); /* will be NULL if translation succeeded */
597 FREE(emit);
598 }
599
600 static inline bool
reserve(struct svga_shader_emitter_v10 * emit,unsigned nr_dwords)601 reserve(struct svga_shader_emitter_v10 *emit,
602 unsigned nr_dwords)
603 {
604 while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) {
605 if (!expand(emit))
606 return false;
607 }
608
609 return true;
610 }
611
612 static bool
emit_dword(struct svga_shader_emitter_v10 * emit,uint32 dword)613 emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword)
614 {
615 if (!reserve(emit, 1))
616 return false;
617
618 *(uint32 *)emit->ptr = dword;
619 emit->ptr += sizeof dword;
620 return true;
621 }
622
623 static bool
emit_dwords(struct svga_shader_emitter_v10 * emit,const uint32 * dwords,unsigned nr)624 emit_dwords(struct svga_shader_emitter_v10 *emit,
625 const uint32 *dwords,
626 unsigned nr)
627 {
628 if (!reserve(emit, nr))
629 return false;
630
631 memcpy(emit->ptr, dwords, nr * sizeof *dwords);
632 emit->ptr += nr * sizeof *dwords;
633 return true;
634 }
635
636 /** Return the number of tokens in the emitter's buffer */
637 static unsigned
emit_get_num_tokens(const struct svga_shader_emitter_v10 * emit)638 emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit)
639 {
640 return (emit->ptr - emit->buf) / sizeof(unsigned);
641 }
642
643
644 /**
645 * Check for register overflow. If we overflow we'll set an
646 * error flag. This function can be called for register declarations
647 * or use as src/dst instruction operands.
648 * \param type register type. One of VGPU10_OPERAND_TYPE_x
649 or VGPU10_OPCODE_DCL_x
650 * \param index the register index
651 */
652 static void
check_register_index(struct svga_shader_emitter_v10 * emit,unsigned operandType,unsigned index)653 check_register_index(struct svga_shader_emitter_v10 *emit,
654 unsigned operandType, unsigned index)
655 {
656 bool overflow_before = emit->register_overflow;
657
658 switch (operandType) {
659 case VGPU10_OPERAND_TYPE_TEMP:
660 case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP:
661 case VGPU10_OPCODE_DCL_TEMPS:
662 if (index >= VGPU10_MAX_TEMPS) {
663 emit->register_overflow = true;
664 }
665 break;
666 case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER:
667 case VGPU10_OPCODE_DCL_CONSTANT_BUFFER:
668 if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
669 emit->register_overflow = true;
670 }
671 break;
672 case VGPU10_OPERAND_TYPE_INPUT:
673 case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID:
674 case VGPU10_OPCODE_DCL_INPUT:
675 case VGPU10_OPCODE_DCL_INPUT_SGV:
676 case VGPU10_OPCODE_DCL_INPUT_SIV:
677 case VGPU10_OPCODE_DCL_INPUT_PS:
678 case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
679 case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
680 if ((emit->unit == PIPE_SHADER_VERTEX &&
681 index >= emit->max_vs_inputs) ||
682 (emit->unit == PIPE_SHADER_GEOMETRY &&
683 index >= emit->max_gs_inputs) ||
684 (emit->unit == PIPE_SHADER_FRAGMENT &&
685 index >= VGPU10_MAX_FS_INPUTS) ||
686 (emit->unit == PIPE_SHADER_TESS_CTRL &&
687 index >= VGPU11_MAX_HS_INPUT_CONTROL_POINTS) ||
688 (emit->unit == PIPE_SHADER_TESS_EVAL &&
689 index >= VGPU11_MAX_DS_INPUT_CONTROL_POINTS)) {
690 emit->register_overflow = true;
691 }
692 break;
693 case VGPU10_OPERAND_TYPE_OUTPUT:
694 case VGPU10_OPCODE_DCL_OUTPUT:
695 case VGPU10_OPCODE_DCL_OUTPUT_SGV:
696 case VGPU10_OPCODE_DCL_OUTPUT_SIV:
697 /* Note: we are skipping two output indices in tcs for
698 * tessinner/outer levels. Implementation will not exceed
699 * number of output count but it allows index to go beyond
700 * VGPU11_MAX_HS_OUTPUTS.
701 * Index will never be >= index >= VGPU11_MAX_HS_OUTPUTS + 2
702 */
703 if ((emit->unit == PIPE_SHADER_VERTEX &&
704 index >= emit->max_vs_outputs) ||
705 (emit->unit == PIPE_SHADER_GEOMETRY &&
706 index >= VGPU10_MAX_GS_OUTPUTS) ||
707 (emit->unit == PIPE_SHADER_FRAGMENT &&
708 index >= VGPU10_MAX_FS_OUTPUTS) ||
709 (emit->unit == PIPE_SHADER_TESS_CTRL &&
710 index >= VGPU11_MAX_HS_OUTPUTS + 2) ||
711 (emit->unit == PIPE_SHADER_TESS_EVAL &&
712 index >= VGPU11_MAX_DS_OUTPUTS)) {
713 emit->register_overflow = true;
714 }
715 break;
716 case VGPU10_OPERAND_TYPE_SAMPLER:
717 case VGPU10_OPCODE_DCL_SAMPLER:
718 if (index >= VGPU10_MAX_SAMPLERS) {
719 emit->register_overflow = true;
720 }
721 break;
722 case VGPU10_OPERAND_TYPE_RESOURCE:
723 case VGPU10_OPCODE_DCL_RESOURCE:
724 if (index >= VGPU10_MAX_RESOURCES) {
725 emit->register_overflow = true;
726 }
727 break;
728 case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
729 if (index >= MAX_IMMEDIATE_COUNT) {
730 emit->register_overflow = true;
731 }
732 break;
733 case VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
734 case VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID:
735 case VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID:
736 case VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT:
737 case VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT:
738 case VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT:
739 case VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID:
740 case VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP:
741 /* nothing */
742 break;
743 default:
744 assert(0);
745 ; /* nothing */
746 }
747
748 if (emit->register_overflow && !overflow_before) {
749 debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n",
750 operandType, index);
751 }
752 }
753
754
755 /**
756 * Examine misc state to determine the clipping mode.
757 */
758 static void
determine_clipping_mode(struct svga_shader_emitter_v10 * emit)759 determine_clipping_mode(struct svga_shader_emitter_v10 *emit)
760 {
761 /* num_written_clipdistance in the shader info for tessellation
762 * control shader is always 0 because the TGSI_PROPERTY_NUM_CLIPDIST_ENABLED
763 * is not defined for this shader. So we go through all the output declarations
764 * to set the num_written_clipdistance. This is just to determine the
765 * clipping mode.
766 */
767 if (emit->unit == PIPE_SHADER_TESS_CTRL) {
768 unsigned i;
769 for (i = 0; i < emit->info.num_outputs; i++) {
770 if (emit->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) {
771 emit->info.num_written_clipdistance =
772 4 * (emit->info.output_semantic_index[i] + 1);
773 }
774 }
775 }
776
777 if (emit->info.num_written_clipdistance > 0) {
778 emit->clip_mode = CLIP_DISTANCE;
779 }
780 else if (emit->info.writes_clipvertex) {
781 emit->clip_mode = CLIP_VERTEX;
782 }
783 else if (emit->key.clip_plane_enable && emit->key.last_vertex_stage) {
784 /*
785 * Only the last shader in the vertex processing stage needs to
786 * handle the legacy clip mode.
787 */
788 emit->clip_mode = CLIP_LEGACY;
789 }
790 else {
791 emit->clip_mode = CLIP_NONE;
792 }
793 }
794
795
796 /**
797 * For clip distance register declarations and clip distance register
798 * writes we need to mask the declaration usage or instruction writemask
799 * (respectively) against the set of the really-enabled clipping planes.
800 *
801 * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables
802 * has a VS that writes to all 8 clip distance registers, but the plane enable
803 * flags are a subset of that.
804 *
805 * This function is used to apply the plane enable flags to the register
806 * declaration or instruction writemask.
807 *
808 * \param writemask the declaration usage mask or instruction writemask
809 * \param clip_reg_index which clip plane register is being declared/written.
810 * The legal values are 0 and 1 (two clip planes per
811 * register, for a total of 8 clip planes)
812 */
813 static unsigned
apply_clip_plane_mask(struct svga_shader_emitter_v10 * emit,unsigned writemask,unsigned clip_reg_index)814 apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit,
815 unsigned writemask, unsigned clip_reg_index)
816 {
817 unsigned shift;
818
819 assert(clip_reg_index < 2);
820
821 /* four clip planes per clip register: */
822 shift = clip_reg_index * 4;
823 writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf);
824
825 return writemask;
826 }
827
828
829 /**
830 * Translate gallium shader type into VGPU10 type.
831 */
832 static VGPU10_PROGRAM_TYPE
translate_shader_type(unsigned type)833 translate_shader_type(unsigned type)
834 {
835 switch (type) {
836 case PIPE_SHADER_VERTEX:
837 return VGPU10_VERTEX_SHADER;
838 case PIPE_SHADER_GEOMETRY:
839 return VGPU10_GEOMETRY_SHADER;
840 case PIPE_SHADER_FRAGMENT:
841 return VGPU10_PIXEL_SHADER;
842 case PIPE_SHADER_TESS_CTRL:
843 return VGPU10_HULL_SHADER;
844 case PIPE_SHADER_TESS_EVAL:
845 return VGPU10_DOMAIN_SHADER;
846 case PIPE_SHADER_COMPUTE:
847 return VGPU10_COMPUTE_SHADER;
848 default:
849 assert(!"Unexpected shader type");
850 return VGPU10_VERTEX_SHADER;
851 }
852 }
853
854
855 /**
856 * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x
857 * Note: we only need to translate the opcodes for "simple" instructions,
858 * as seen below. All other opcodes are handled/translated specially.
859 */
860 static VGPU10_OPCODE_TYPE
translate_opcode(enum tgsi_opcode opcode)861 translate_opcode(enum tgsi_opcode opcode)
862 {
863 switch (opcode) {
864 case TGSI_OPCODE_MOV:
865 return VGPU10_OPCODE_MOV;
866 case TGSI_OPCODE_MUL:
867 return VGPU10_OPCODE_MUL;
868 case TGSI_OPCODE_ADD:
869 return VGPU10_OPCODE_ADD;
870 case TGSI_OPCODE_DP3:
871 return VGPU10_OPCODE_DP3;
872 case TGSI_OPCODE_DP4:
873 return VGPU10_OPCODE_DP4;
874 case TGSI_OPCODE_MIN:
875 return VGPU10_OPCODE_MIN;
876 case TGSI_OPCODE_MAX:
877 return VGPU10_OPCODE_MAX;
878 case TGSI_OPCODE_MAD:
879 return VGPU10_OPCODE_MAD;
880 case TGSI_OPCODE_SQRT:
881 return VGPU10_OPCODE_SQRT;
882 case TGSI_OPCODE_FRC:
883 return VGPU10_OPCODE_FRC;
884 case TGSI_OPCODE_FLR:
885 return VGPU10_OPCODE_ROUND_NI;
886 case TGSI_OPCODE_FSEQ:
887 return VGPU10_OPCODE_EQ;
888 case TGSI_OPCODE_FSGE:
889 return VGPU10_OPCODE_GE;
890 case TGSI_OPCODE_FSNE:
891 return VGPU10_OPCODE_NE;
892 case TGSI_OPCODE_DDX:
893 return VGPU10_OPCODE_DERIV_RTX;
894 case TGSI_OPCODE_DDY:
895 return VGPU10_OPCODE_DERIV_RTY;
896 case TGSI_OPCODE_RET:
897 return VGPU10_OPCODE_RET;
898 case TGSI_OPCODE_DIV:
899 return VGPU10_OPCODE_DIV;
900 case TGSI_OPCODE_IDIV:
901 return VGPU10_OPCODE_VMWARE;
902 case TGSI_OPCODE_DP2:
903 return VGPU10_OPCODE_DP2;
904 case TGSI_OPCODE_BRK:
905 return VGPU10_OPCODE_BREAK;
906 case TGSI_OPCODE_IF:
907 return VGPU10_OPCODE_IF;
908 case TGSI_OPCODE_ELSE:
909 return VGPU10_OPCODE_ELSE;
910 case TGSI_OPCODE_ENDIF:
911 return VGPU10_OPCODE_ENDIF;
912 case TGSI_OPCODE_CEIL:
913 return VGPU10_OPCODE_ROUND_PI;
914 case TGSI_OPCODE_I2F:
915 return VGPU10_OPCODE_ITOF;
916 case TGSI_OPCODE_NOT:
917 return VGPU10_OPCODE_NOT;
918 case TGSI_OPCODE_TRUNC:
919 return VGPU10_OPCODE_ROUND_Z;
920 case TGSI_OPCODE_SHL:
921 return VGPU10_OPCODE_ISHL;
922 case TGSI_OPCODE_AND:
923 return VGPU10_OPCODE_AND;
924 case TGSI_OPCODE_OR:
925 return VGPU10_OPCODE_OR;
926 case TGSI_OPCODE_XOR:
927 return VGPU10_OPCODE_XOR;
928 case TGSI_OPCODE_CONT:
929 return VGPU10_OPCODE_CONTINUE;
930 case TGSI_OPCODE_EMIT:
931 return VGPU10_OPCODE_EMIT;
932 case TGSI_OPCODE_ENDPRIM:
933 return VGPU10_OPCODE_CUT;
934 case TGSI_OPCODE_BGNLOOP:
935 return VGPU10_OPCODE_LOOP;
936 case TGSI_OPCODE_ENDLOOP:
937 return VGPU10_OPCODE_ENDLOOP;
938 case TGSI_OPCODE_ENDSUB:
939 return VGPU10_OPCODE_RET;
940 case TGSI_OPCODE_NOP:
941 return VGPU10_OPCODE_NOP;
942 case TGSI_OPCODE_END:
943 return VGPU10_OPCODE_RET;
944 case TGSI_OPCODE_F2I:
945 return VGPU10_OPCODE_FTOI;
946 case TGSI_OPCODE_IMAX:
947 return VGPU10_OPCODE_IMAX;
948 case TGSI_OPCODE_IMIN:
949 return VGPU10_OPCODE_IMIN;
950 case TGSI_OPCODE_UDIV:
951 case TGSI_OPCODE_UMOD:
952 case TGSI_OPCODE_MOD:
953 return VGPU10_OPCODE_UDIV;
954 case TGSI_OPCODE_IMUL_HI:
955 return VGPU10_OPCODE_IMUL;
956 case TGSI_OPCODE_INEG:
957 return VGPU10_OPCODE_INEG;
958 case TGSI_OPCODE_ISHR:
959 return VGPU10_OPCODE_ISHR;
960 case TGSI_OPCODE_ISGE:
961 return VGPU10_OPCODE_IGE;
962 case TGSI_OPCODE_ISLT:
963 return VGPU10_OPCODE_ILT;
964 case TGSI_OPCODE_F2U:
965 return VGPU10_OPCODE_FTOU;
966 case TGSI_OPCODE_UADD:
967 return VGPU10_OPCODE_IADD;
968 case TGSI_OPCODE_U2F:
969 return VGPU10_OPCODE_UTOF;
970 case TGSI_OPCODE_UCMP:
971 return VGPU10_OPCODE_MOVC;
972 case TGSI_OPCODE_UMAD:
973 return VGPU10_OPCODE_UMAD;
974 case TGSI_OPCODE_UMAX:
975 return VGPU10_OPCODE_UMAX;
976 case TGSI_OPCODE_UMIN:
977 return VGPU10_OPCODE_UMIN;
978 case TGSI_OPCODE_UMUL:
979 case TGSI_OPCODE_UMUL_HI:
980 return VGPU10_OPCODE_UMUL;
981 case TGSI_OPCODE_USEQ:
982 return VGPU10_OPCODE_IEQ;
983 case TGSI_OPCODE_USGE:
984 return VGPU10_OPCODE_UGE;
985 case TGSI_OPCODE_USHR:
986 return VGPU10_OPCODE_USHR;
987 case TGSI_OPCODE_USLT:
988 return VGPU10_OPCODE_ULT;
989 case TGSI_OPCODE_USNE:
990 return VGPU10_OPCODE_INE;
991 case TGSI_OPCODE_SWITCH:
992 return VGPU10_OPCODE_SWITCH;
993 case TGSI_OPCODE_CASE:
994 return VGPU10_OPCODE_CASE;
995 case TGSI_OPCODE_DEFAULT:
996 return VGPU10_OPCODE_DEFAULT;
997 case TGSI_OPCODE_ENDSWITCH:
998 return VGPU10_OPCODE_ENDSWITCH;
999 case TGSI_OPCODE_FSLT:
1000 return VGPU10_OPCODE_LT;
1001 case TGSI_OPCODE_ROUND:
1002 return VGPU10_OPCODE_ROUND_NE;
1003 /* Begin SM5 opcodes */
1004 case TGSI_OPCODE_F2D:
1005 return VGPU10_OPCODE_FTOD;
1006 case TGSI_OPCODE_D2F:
1007 return VGPU10_OPCODE_DTOF;
1008 case TGSI_OPCODE_DMUL:
1009 return VGPU10_OPCODE_DMUL;
1010 case TGSI_OPCODE_DADD:
1011 return VGPU10_OPCODE_DADD;
1012 case TGSI_OPCODE_DMAX:
1013 return VGPU10_OPCODE_DMAX;
1014 case TGSI_OPCODE_DMIN:
1015 return VGPU10_OPCODE_DMIN;
1016 case TGSI_OPCODE_DSEQ:
1017 return VGPU10_OPCODE_DEQ;
1018 case TGSI_OPCODE_DSGE:
1019 return VGPU10_OPCODE_DGE;
1020 case TGSI_OPCODE_DSLT:
1021 return VGPU10_OPCODE_DLT;
1022 case TGSI_OPCODE_DSNE:
1023 return VGPU10_OPCODE_DNE;
1024 case TGSI_OPCODE_IBFE:
1025 return VGPU10_OPCODE_IBFE;
1026 case TGSI_OPCODE_UBFE:
1027 return VGPU10_OPCODE_UBFE;
1028 case TGSI_OPCODE_BFI:
1029 return VGPU10_OPCODE_BFI;
1030 case TGSI_OPCODE_BREV:
1031 return VGPU10_OPCODE_BFREV;
1032 case TGSI_OPCODE_POPC:
1033 return VGPU10_OPCODE_COUNTBITS;
1034 case TGSI_OPCODE_LSB:
1035 return VGPU10_OPCODE_FIRSTBIT_LO;
1036 case TGSI_OPCODE_IMSB:
1037 return VGPU10_OPCODE_FIRSTBIT_SHI;
1038 case TGSI_OPCODE_UMSB:
1039 return VGPU10_OPCODE_FIRSTBIT_HI;
1040 case TGSI_OPCODE_INTERP_CENTROID:
1041 return VGPU10_OPCODE_EVAL_CENTROID;
1042 case TGSI_OPCODE_INTERP_SAMPLE:
1043 return VGPU10_OPCODE_EVAL_SAMPLE_INDEX;
1044 case TGSI_OPCODE_BARRIER:
1045 return VGPU10_OPCODE_SYNC;
1046 case TGSI_OPCODE_DFMA:
1047 return VGPU10_OPCODE_DFMA;
1048 case TGSI_OPCODE_FMA:
1049 return VGPU10_OPCODE_MAD;
1050
1051 /* DX11.1 Opcodes */
1052 case TGSI_OPCODE_DDIV:
1053 return VGPU10_OPCODE_DDIV;
1054 case TGSI_OPCODE_DRCP:
1055 return VGPU10_OPCODE_DRCP;
1056 case TGSI_OPCODE_D2I:
1057 return VGPU10_OPCODE_DTOI;
1058 case TGSI_OPCODE_D2U:
1059 return VGPU10_OPCODE_DTOU;
1060 case TGSI_OPCODE_I2D:
1061 return VGPU10_OPCODE_ITOD;
1062 case TGSI_OPCODE_U2D:
1063 return VGPU10_OPCODE_UTOD;
1064
1065 case TGSI_OPCODE_SAMPLE_POS:
1066 /* Note: we never actually get this opcode because there's no GLSL
1067 * function to query multisample resource sample positions. There's
1068 * only the TGSI_SEMANTIC_SAMPLEPOS system value which contains the
1069 * position of the current sample in the render target.
1070 */
1071 FALLTHROUGH;
1072 case TGSI_OPCODE_SAMPLE_INFO:
1073 /* NOTE: we never actually get this opcode because the GLSL compiler
1074 * implements the gl_NumSamples variable with a simple constant in the
1075 * constant buffer.
1076 */
1077 FALLTHROUGH;
1078 default:
1079 assert(!"Unexpected TGSI opcode in translate_opcode()");
1080 return VGPU10_OPCODE_NOP;
1081 }
1082 }
1083
1084
1085 /**
1086 * Translate a TGSI register file type into a VGPU10 operand type.
1087 * \param array is the TGSI_FILE_TEMPORARY register an array?
1088 */
1089 static VGPU10_OPERAND_TYPE
translate_register_file(enum tgsi_file_type file,bool array)1090 translate_register_file(enum tgsi_file_type file, bool array)
1091 {
1092 switch (file) {
1093 case TGSI_FILE_CONSTANT:
1094 return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
1095 case TGSI_FILE_INPUT:
1096 return VGPU10_OPERAND_TYPE_INPUT;
1097 case TGSI_FILE_OUTPUT:
1098 return VGPU10_OPERAND_TYPE_OUTPUT;
1099 case TGSI_FILE_TEMPORARY:
1100 return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP
1101 : VGPU10_OPERAND_TYPE_TEMP;
1102 case TGSI_FILE_IMMEDIATE:
1103 /* all immediates are 32-bit values at this time so
1104 * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time.
1105 */
1106 return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER;
1107 case TGSI_FILE_SAMPLER:
1108 return VGPU10_OPERAND_TYPE_SAMPLER;
1109 case TGSI_FILE_SYSTEM_VALUE:
1110 return VGPU10_OPERAND_TYPE_INPUT;
1111
1112 /* XXX TODO more cases to finish */
1113
1114 default:
1115 assert(!"Bad tgsi register file!");
1116 return VGPU10_OPERAND_TYPE_NULL;
1117 }
1118 }
1119
1120
1121 /**
1122 * Emit a null dst register
1123 */
1124 static void
emit_null_dst_register(struct svga_shader_emitter_v10 * emit)1125 emit_null_dst_register(struct svga_shader_emitter_v10 *emit)
1126 {
1127 VGPU10OperandToken0 operand;
1128
1129 operand.value = 0;
1130 operand.operandType = VGPU10_OPERAND_TYPE_NULL;
1131 operand.numComponents = VGPU10_OPERAND_0_COMPONENT;
1132
1133 emit_dword(emit, operand.value);
1134 }
1135
1136
1137 /**
1138 * If the given register is a temporary, return the array ID.
1139 * Else return zero.
1140 */
1141 static unsigned
get_temp_array_id(const struct svga_shader_emitter_v10 * emit,enum tgsi_file_type file,unsigned index)1142 get_temp_array_id(const struct svga_shader_emitter_v10 *emit,
1143 enum tgsi_file_type file, unsigned index)
1144 {
1145 if (file == TGSI_FILE_TEMPORARY) {
1146 return emit->temp_map[index].arrayId;
1147 }
1148 else {
1149 return 0;
1150 }
1151 }
1152
1153
1154 /**
1155 * If the given register is a temporary, convert the index from a TGSI
1156 * TEMPORARY index to a VGPU10 temp index.
1157 */
1158 static unsigned
remap_temp_index(const struct svga_shader_emitter_v10 * emit,enum tgsi_file_type file,unsigned index)1159 remap_temp_index(const struct svga_shader_emitter_v10 *emit,
1160 enum tgsi_file_type file, unsigned index)
1161 {
1162 if (file == TGSI_FILE_TEMPORARY) {
1163 return emit->temp_map[index].index;
1164 }
1165 else {
1166 return index;
1167 }
1168 }
1169
1170
1171 /**
1172 * Setup the operand0 fields related to indexing (1D, 2D, relative, etc).
1173 * Note: the operandType field must already be initialized.
1174 * \param file the register file being accessed
1175 * \param indirect using indirect addressing of the register file?
1176 * \param index2D if true, 2-D indexing is being used (const or temp registers)
1177 * \param indirect2D if true, 2-D indirect indexing being used (for const buf)
1178 */
1179 static VGPU10OperandToken0
setup_operand0_indexing(struct svga_shader_emitter_v10 * emit,VGPU10OperandToken0 operand0,enum tgsi_file_type file,bool indirect,bool index2D,bool indirect2D)1180 setup_operand0_indexing(struct svga_shader_emitter_v10 *emit,
1181 VGPU10OperandToken0 operand0,
1182 enum tgsi_file_type file,
1183 bool indirect,
1184 bool index2D, bool indirect2D)
1185 {
1186 VGPU10_OPERAND_INDEX_REPRESENTATION index0Rep, index1Rep;
1187 VGPU10_OPERAND_INDEX_DIMENSION indexDim;
1188
1189 /*
1190 * Compute index dimensions
1191 */
1192 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 ||
1193 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID ||
1194 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID ||
1195 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID ||
1196 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP ||
1197 operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) {
1198 /* there's no swizzle for in-line immediates */
1199 indexDim = VGPU10_OPERAND_INDEX_0D;
1200 assert(operand0.selectionMode == 0);
1201 }
1202 else if (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT) {
1203 indexDim = VGPU10_OPERAND_INDEX_0D;
1204 }
1205 else {
1206 indexDim = index2D ? VGPU10_OPERAND_INDEX_2D : VGPU10_OPERAND_INDEX_1D;
1207 }
1208
1209 /*
1210 * Compute index representation(s) (immediate vs relative).
1211 */
1212 if (indexDim == VGPU10_OPERAND_INDEX_2D) {
1213 index0Rep = indirect2D ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1214 : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1215
1216 index1Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1217 : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1218 }
1219 else if (indexDim == VGPU10_OPERAND_INDEX_1D) {
1220 index0Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1221 : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1222
1223 index1Rep = 0;
1224 }
1225 else {
1226 index0Rep = 0;
1227 index1Rep = 0;
1228 }
1229
1230 operand0.indexDimension = indexDim;
1231 operand0.index0Representation = index0Rep;
1232 operand0.index1Representation = index1Rep;
1233
1234 return operand0;
1235 }
1236
1237
1238 /**
1239 * Emit the operand for expressing an address register for indirect indexing.
1240 * Note that the address register is really just a temp register.
1241 * \param addr_reg_index which address register to use
1242 */
1243 static void
emit_indirect_register(struct svga_shader_emitter_v10 * emit,unsigned addr_reg_index)1244 emit_indirect_register(struct svga_shader_emitter_v10 *emit,
1245 unsigned addr_reg_index)
1246 {
1247 unsigned tmp_reg_index;
1248 VGPU10OperandToken0 operand0;
1249
1250 assert(addr_reg_index < MAX_VGPU10_ADDR_REGS);
1251
1252 tmp_reg_index = emit->address_reg_index[addr_reg_index];
1253
1254 /* operand0 is a simple temporary register, selecting one component */
1255 operand0.value = 0;
1256 operand0.operandType = VGPU10_OPERAND_TYPE_TEMP;
1257 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1258 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1259 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
1260 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1261 operand0.swizzleX = 0;
1262 operand0.swizzleY = 1;
1263 operand0.swizzleZ = 2;
1264 operand0.swizzleW = 3;
1265
1266 emit_dword(emit, operand0.value);
1267 emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index));
1268 }
1269
1270
1271 /**
1272 * Translate the dst register of a TGSI instruction and emit VGPU10 tokens.
1273 * \param emit the emitter context
1274 * \param reg the TGSI dst register to translate
1275 */
1276 static void
emit_dst_register(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * reg)1277 emit_dst_register(struct svga_shader_emitter_v10 *emit,
1278 const struct tgsi_full_dst_register *reg)
1279 {
1280 enum tgsi_file_type file = reg->Register.File;
1281 unsigned index = reg->Register.Index;
1282 const enum tgsi_semantic sem_name = emit->info.output_semantic_name[index];
1283 const unsigned sem_index = emit->info.output_semantic_index[index];
1284 unsigned writemask = reg->Register.WriteMask;
1285 const bool indirect = reg->Register.Indirect;
1286 unsigned tempArrayId = get_temp_array_id(emit, file, index);
1287 bool index2d = reg->Register.Dimension || tempArrayId > 0;
1288 VGPU10OperandToken0 operand0;
1289
1290 if (file == TGSI_FILE_TEMPORARY) {
1291 emit->temp_map[index].initialized = true;
1292 }
1293
1294 if (file == TGSI_FILE_OUTPUT) {
1295 if (emit->unit == PIPE_SHADER_VERTEX ||
1296 emit->unit == PIPE_SHADER_GEOMETRY ||
1297 emit->unit == PIPE_SHADER_TESS_EVAL) {
1298 if (index == emit->vposition.out_index &&
1299 emit->vposition.tmp_index != INVALID_INDEX) {
1300 /* replace OUTPUT[POS] with TEMP[POS]. We need to store the
1301 * vertex position result in a temporary so that we can modify
1302 * it in the post_helper() code.
1303 */
1304 file = TGSI_FILE_TEMPORARY;
1305 index = emit->vposition.tmp_index;
1306 }
1307 else if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
1308 emit->clip_dist_tmp_index != INVALID_INDEX) {
1309 /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
1310 * We store the clip distance in a temporary first, then
1311 * we'll copy it to the shadow copy and to CLIPDIST with the
1312 * enabled planes mask in emit_clip_distance_instructions().
1313 */
1314 file = TGSI_FILE_TEMPORARY;
1315 index = emit->clip_dist_tmp_index + sem_index;
1316 }
1317 else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
1318 emit->clip_vertex_tmp_index != INVALID_INDEX) {
1319 /* replace the CLIPVERTEX output register with a temporary */
1320 assert(emit->clip_mode == CLIP_VERTEX);
1321 assert(sem_index == 0);
1322 file = TGSI_FILE_TEMPORARY;
1323 index = emit->clip_vertex_tmp_index;
1324 }
1325 else if (sem_name == TGSI_SEMANTIC_COLOR &&
1326 emit->key.clamp_vertex_color) {
1327
1328 /* set the saturate modifier of the instruction
1329 * to clamp the vertex color.
1330 */
1331 VGPU10OpcodeToken0 *token =
1332 (VGPU10OpcodeToken0 *)emit->buf + emit->inst_start_token;
1333 token->saturate = true;
1334 }
1335 else if (sem_name == TGSI_SEMANTIC_VIEWPORT_INDEX &&
1336 emit->gs.viewport_index_out_index != INVALID_INDEX) {
1337 file = TGSI_FILE_TEMPORARY;
1338 index = emit->gs.viewport_index_tmp_index;
1339 }
1340 }
1341 else if (emit->unit == PIPE_SHADER_FRAGMENT) {
1342 if (sem_name == TGSI_SEMANTIC_POSITION) {
1343 /* Fragment depth output register */
1344 operand0.value = 0;
1345 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
1346 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1347 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1348 emit_dword(emit, operand0.value);
1349 return;
1350 }
1351 else if (sem_name == TGSI_SEMANTIC_SAMPLEMASK) {
1352 /* Fragment sample mask output */
1353 operand0.value = 0;
1354 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
1355 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1356 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1357 emit_dword(emit, operand0.value);
1358 return;
1359 }
1360 else if (index == emit->fs.color_out_index[0] &&
1361 emit->fs.color_tmp_index != INVALID_INDEX) {
1362 /* replace OUTPUT[COLOR] with TEMP[COLOR]. We need to store the
1363 * fragment color result in a temporary so that we can read it
1364 * it in the post_helper() code.
1365 */
1366 file = TGSI_FILE_TEMPORARY;
1367 index = emit->fs.color_tmp_index;
1368 }
1369 else {
1370 /* Typically, for fragment shaders, the output register index
1371 * matches the color semantic index. But not when we write to
1372 * the fragment depth register. In that case, OUT[0] will be
1373 * fragdepth and OUT[1] will be the 0th color output. We need
1374 * to use the semantic index for color outputs.
1375 */
1376 assert(sem_name == TGSI_SEMANTIC_COLOR);
1377 index = emit->info.output_semantic_index[index];
1378
1379 emit->num_output_writes++;
1380 }
1381 }
1382 else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
1383 if (index == emit->tcs.inner.tgsi_index) {
1384 /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
1385 * in temporary for now so that will be store into appropriate
1386 * registers in post_helper() in patch constant phase.
1387 */
1388 if (emit->tcs.control_point_phase) {
1389 /* Discard writing into tessfactor in control point phase */
1390 emit->discard_instruction = true;
1391 }
1392 else {
1393 file = TGSI_FILE_TEMPORARY;
1394 index = emit->tcs.inner.temp_index;
1395 }
1396 }
1397 else if (index == emit->tcs.outer.tgsi_index) {
1398 /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
1399 * in temporary for now so that will be store into appropriate
1400 * registers in post_helper().
1401 */
1402 if (emit->tcs.control_point_phase) {
1403 /* Discard writing into tessfactor in control point phase */
1404 emit->discard_instruction = true;
1405 }
1406 else {
1407 file = TGSI_FILE_TEMPORARY;
1408 index = emit->tcs.outer.temp_index;
1409 }
1410 }
1411 else if (index >= emit->tcs.patch_generic_out_index &&
1412 index < (emit->tcs.patch_generic_out_index +
1413 emit->tcs.patch_generic_out_count)) {
1414 if (emit->tcs.control_point_phase) {
1415 /* Discard writing into generic patch constant outputs in
1416 control point phase */
1417 emit->discard_instruction = true;
1418 }
1419 else {
1420 if (emit->reemit_instruction) {
1421 /* Store results of reemitted instruction in temporary register. */
1422 file = TGSI_FILE_TEMPORARY;
1423 index = emit->tcs.patch_generic_tmp_index +
1424 (index - emit->tcs.patch_generic_out_index);
1425 /**
1426 * Temporaries for patch constant data can be done
1427 * as indexable temporaries.
1428 */
1429 tempArrayId = get_temp_array_id(emit, file, index);
1430 index2d = tempArrayId > 0;
1431
1432 emit->reemit_instruction = false;
1433 }
1434 else {
1435 /* If per-patch outputs is been read in shader, we
1436 * reemit instruction and store results in temporaries in
1437 * patch constant phase. */
1438 if (emit->info.reads_perpatch_outputs) {
1439 emit->reemit_instruction = true;
1440 }
1441 }
1442 }
1443 }
1444 else if (reg->Register.Dimension) {
1445 /* Only control point outputs are declared 2D in tgsi */
1446 if (emit->tcs.control_point_phase) {
1447 if (emit->reemit_instruction) {
1448 /* Store results of reemitted instruction in temporary register. */
1449 index2d = false;
1450 file = TGSI_FILE_TEMPORARY;
1451 index = emit->tcs.control_point_tmp_index +
1452 (index - emit->tcs.control_point_out_index);
1453 emit->reemit_instruction = false;
1454 }
1455 else {
1456 /* The mapped control point outputs are 1-D */
1457 index2d = false;
1458 if (emit->info.reads_pervertex_outputs) {
1459 /* If per-vertex outputs is been read in shader, we
1460 * reemit instruction and store results in temporaries
1461 * control point phase. */
1462 emit->reemit_instruction = true;
1463 }
1464 }
1465
1466 if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
1467 emit->clip_dist_tmp_index != INVALID_INDEX) {
1468 /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
1469 * We store the clip distance in a temporary first, then
1470 * we'll copy it to the shadow copy and to CLIPDIST with the
1471 * enabled planes mask in emit_clip_distance_instructions().
1472 */
1473 file = TGSI_FILE_TEMPORARY;
1474 index = emit->clip_dist_tmp_index + sem_index;
1475 }
1476 else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
1477 emit->clip_vertex_tmp_index != INVALID_INDEX) {
1478 /* replace the CLIPVERTEX output register with a temporary */
1479 assert(emit->clip_mode == CLIP_VERTEX);
1480 assert(sem_index == 0);
1481 file = TGSI_FILE_TEMPORARY;
1482 index = emit->clip_vertex_tmp_index;
1483 }
1484 }
1485 else {
1486 /* Discard writing into control point outputs in
1487 patch constant phase */
1488 emit->discard_instruction = true;
1489 }
1490 }
1491 }
1492 }
1493
1494 /* init operand tokens to all zero */
1495 operand0.value = 0;
1496
1497 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1498
1499 /* the operand has a writemask */
1500 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
1501
1502 /* Which of the four dest components to write to. Note that we can use a
1503 * simple assignment here since TGSI writemasks match VGPU10 writemasks.
1504 */
1505 STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X);
1506 operand0.mask = writemask;
1507
1508 /* translate TGSI register file type to VGPU10 operand type */
1509 operand0.operandType = translate_register_file(file, tempArrayId > 0);
1510
1511 check_register_index(emit, operand0.operandType, index);
1512
1513 operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1514 index2d, false);
1515
1516 /* Emit tokens */
1517 emit_dword(emit, operand0.value);
1518 if (tempArrayId > 0) {
1519 emit_dword(emit, tempArrayId);
1520 }
1521
1522 emit_dword(emit, remap_temp_index(emit, file, index));
1523
1524 if (indirect) {
1525 emit_indirect_register(emit, reg->Indirect.Index);
1526 }
1527 }
1528
1529
1530 /**
1531 * Check if temporary register needs to be initialize when
1532 * shader is not using indirect addressing for temporary and uninitialized
1533 * temporary is not used in loop. In these two scenarios, we cannot
1534 * determine if temporary is initialized or not.
1535 */
1536 static bool
need_temp_reg_initialization(struct svga_shader_emitter_v10 * emit,unsigned index)1537 need_temp_reg_initialization(struct svga_shader_emitter_v10 *emit,
1538 unsigned index)
1539 {
1540 if (!(emit->info.indirect_files & (1u << TGSI_FILE_TEMPORARY))
1541 && emit->current_loop_depth == 0) {
1542 if (!emit->temp_map[index].initialized &&
1543 emit->temp_map[index].index < emit->num_shader_temps) {
1544 return true;
1545 }
1546 }
1547
1548 return false;
1549 }
1550
1551
1552 /**
1553 * Translate a src register of a TGSI instruction and emit VGPU10 tokens.
1554 * In quite a few cases, we do register substitution. For example, if
1555 * the TGSI register is the front/back-face register, we replace that with
1556 * a temp register containing a value we computed earlier.
1557 */
1558 static void
emit_src_register(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * reg)1559 emit_src_register(struct svga_shader_emitter_v10 *emit,
1560 const struct tgsi_full_src_register *reg)
1561 {
1562 enum tgsi_file_type file = reg->Register.File;
1563 unsigned index = reg->Register.Index;
1564 bool indirect = reg->Register.Indirect;
1565 unsigned tempArrayId = get_temp_array_id(emit, file, index);
1566 bool index2d = (reg->Register.Dimension ||
1567 tempArrayId > 0 ||
1568 file == TGSI_FILE_CONSTANT);
1569 unsigned index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index;
1570 bool indirect2d = reg->Dimension.Indirect;
1571 unsigned swizzleX = reg->Register.SwizzleX;
1572 unsigned swizzleY = reg->Register.SwizzleY;
1573 unsigned swizzleZ = reg->Register.SwizzleZ;
1574 unsigned swizzleW = reg->Register.SwizzleW;
1575 const bool absolute = reg->Register.Absolute;
1576 const bool negate = reg->Register.Negate;
1577 VGPU10OperandToken0 operand0;
1578 VGPU10OperandToken1 operand1;
1579
1580 operand0.value = operand1.value = 0;
1581
1582 if (emit->unit == PIPE_SHADER_FRAGMENT){
1583 if (file == TGSI_FILE_INPUT) {
1584 if (index == emit->fs.face_input_index) {
1585 /* Replace INPUT[FACE] with TEMP[FACE] */
1586 file = TGSI_FILE_TEMPORARY;
1587 index = emit->fs.face_tmp_index;
1588 }
1589 else if (index == emit->fs.fragcoord_input_index) {
1590 /* Replace INPUT[POSITION] with TEMP[POSITION] */
1591 file = TGSI_FILE_TEMPORARY;
1592 index = emit->fs.fragcoord_tmp_index;
1593 }
1594 else if (index == emit->fs.layer_input_index) {
1595 /* Replace INPUT[LAYER] with zero.x */
1596 file = TGSI_FILE_IMMEDIATE;
1597 index = emit->fs.layer_imm_index;
1598 swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1599 }
1600 else {
1601 /* We remap fragment shader inputs to that FS input indexes
1602 * match up with VS/GS output indexes.
1603 */
1604 index = emit->linkage.input_map[index];
1605 }
1606 }
1607 else if (file == TGSI_FILE_SYSTEM_VALUE) {
1608 if (index == emit->fs.sample_pos_sys_index) {
1609 assert(emit->version >= 41);
1610 /* Current sample position is in a temp register */
1611 file = TGSI_FILE_TEMPORARY;
1612 index = emit->fs.sample_pos_tmp_index;
1613 }
1614 else if (index == emit->fs.sample_mask_in_sys_index) {
1615 /* Emitted as vCoverage0.x */
1616 /* According to GLSL spec, the gl_SampleMaskIn array has ceil(s / 32)
1617 * elements where s is the maximum number of color samples supported
1618 * by the implementation.
1619 */
1620 operand0.value = 0;
1621 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK;
1622 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1623 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1624 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1625 emit_dword(emit, operand0.value);
1626 return;
1627 }
1628 else {
1629 /* Map the TGSI system value to a VGPU10 input register */
1630 assert(index < ARRAY_SIZE(emit->system_value_indexes));
1631 file = TGSI_FILE_INPUT;
1632 index = emit->system_value_indexes[index];
1633 }
1634 }
1635 }
1636 else if (emit->unit == PIPE_SHADER_GEOMETRY) {
1637 if (file == TGSI_FILE_INPUT) {
1638 if (index == emit->gs.prim_id_index) {
1639 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
1640 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1641 }
1642 index = emit->linkage.input_map[index];
1643 }
1644 else if (file == TGSI_FILE_SYSTEM_VALUE &&
1645 index == emit->gs.invocation_id_sys_index) {
1646 /* Emitted as vGSInstanceID0.x */
1647 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1648 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID;
1649 index = 0;
1650 }
1651 }
1652 else if (emit->unit == PIPE_SHADER_VERTEX) {
1653 if (file == TGSI_FILE_INPUT) {
1654 /* if input is adjusted... */
1655 if ((emit->key.vs.adjust_attrib_w_1 |
1656 emit->key.vs.adjust_attrib_itof |
1657 emit->key.vs.adjust_attrib_utof |
1658 emit->key.vs.attrib_is_bgra |
1659 emit->key.vs.attrib_puint_to_snorm |
1660 emit->key.vs.attrib_puint_to_uscaled |
1661 emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) {
1662 file = TGSI_FILE_TEMPORARY;
1663 index = emit->vs.adjusted_input[index];
1664 }
1665 }
1666 else if (file == TGSI_FILE_SYSTEM_VALUE) {
1667 if (index == emit->vs.vertex_id_sys_index &&
1668 emit->vs.vertex_id_tmp_index != INVALID_INDEX) {
1669 file = TGSI_FILE_TEMPORARY;
1670 index = emit->vs.vertex_id_tmp_index;
1671 swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1672 }
1673 else {
1674 /* Map the TGSI system value to a VGPU10 input register */
1675 assert(index < ARRAY_SIZE(emit->system_value_indexes));
1676 file = TGSI_FILE_INPUT;
1677 index = emit->system_value_indexes[index];
1678 }
1679 }
1680 }
1681 else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
1682
1683 if (file == TGSI_FILE_SYSTEM_VALUE) {
1684 if (index == emit->tcs.vertices_per_patch_index) {
1685 /**
1686 * if source register is the system value for vertices_per_patch,
1687 * replace it with the immediate.
1688 */
1689 file = TGSI_FILE_IMMEDIATE;
1690 index = emit->tcs.imm_index;
1691 swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1692 }
1693 else if (index == emit->tcs.invocation_id_sys_index) {
1694 if (emit->tcs.control_point_phase) {
1695 /**
1696 * Emitted as vOutputControlPointID.x
1697 */
1698 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1699 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID;
1700 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
1701 operand0.mask = 0;
1702 emit_dword(emit, operand0.value);
1703 return;
1704 }
1705 else {
1706 /* There is no control point ID input declaration in
1707 * the patch constant phase in hull shader.
1708 * Since for now we are emitting all instructions in
1709 * the patch constant phase, we are replacing the
1710 * control point ID reference with the immediate 0.
1711 */
1712 file = TGSI_FILE_IMMEDIATE;
1713 index = emit->tcs.imm_index;
1714 swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_W;
1715 }
1716 }
1717 else if (index == emit->tcs.prim_id_index) {
1718 /**
1719 * Emitted as vPrim.x
1720 */
1721 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1722 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1723 index = 0;
1724 }
1725 }
1726 else if (file == TGSI_FILE_INPUT) {
1727 index = emit->linkage.input_map[index];
1728 if (!emit->tcs.control_point_phase) {
1729 /* Emitted as vicp */
1730 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1731 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
1732 assert(reg->Register.Dimension);
1733 }
1734 }
1735 else if (file == TGSI_FILE_OUTPUT) {
1736 if ((index >= emit->tcs.patch_generic_out_index &&
1737 index < (emit->tcs.patch_generic_out_index +
1738 emit->tcs.patch_generic_out_count)) ||
1739 index == emit->tcs.inner.tgsi_index ||
1740 index == emit->tcs.outer.tgsi_index) {
1741 if (emit->tcs.control_point_phase) {
1742 emit->discard_instruction = true;
1743 }
1744 else {
1745 /* Device doesn't allow reading from output so
1746 * use corresponding temporary register as source */
1747 file = TGSI_FILE_TEMPORARY;
1748 if (index == emit->tcs.inner.tgsi_index) {
1749 index = emit->tcs.inner.temp_index;
1750 }
1751 else if (index == emit->tcs.outer.tgsi_index) {
1752 index = emit->tcs.outer.temp_index;
1753 }
1754 else {
1755 index = emit->tcs.patch_generic_tmp_index +
1756 (index - emit->tcs.patch_generic_out_index);
1757 }
1758
1759 /**
1760 * Temporaries for patch constant data can be done
1761 * as indexable temporaries.
1762 */
1763 tempArrayId = get_temp_array_id(emit, file, index);
1764 index2d = tempArrayId > 0;
1765 index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index;
1766 }
1767 }
1768 else if (index2d) {
1769 if (emit->tcs.control_point_phase) {
1770 /* Device doesn't allow reading from output so
1771 * use corresponding temporary register as source */
1772 file = TGSI_FILE_TEMPORARY;
1773 index2d = false;
1774 index = emit->tcs.control_point_tmp_index +
1775 (index - emit->tcs.control_point_out_index);
1776 }
1777 else {
1778 emit->discard_instruction = true;
1779 }
1780 }
1781 }
1782 }
1783 else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
1784 if (file == TGSI_FILE_SYSTEM_VALUE) {
1785 if (index == emit->tes.tesscoord_sys_index) {
1786 /**
1787 * Emitted as vDomain
1788 */
1789 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1790 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT;
1791 index = 0;
1792
1793 /* Make sure swizzles are of those components allowed according
1794 * to the tessellator domain.
1795 */
1796 swizzleX = MIN2(swizzleX, emit->tes.swizzle_max);
1797 swizzleY = MIN2(swizzleY, emit->tes.swizzle_max);
1798 swizzleZ = MIN2(swizzleZ, emit->tes.swizzle_max);
1799 swizzleW = MIN2(swizzleW, emit->tes.swizzle_max);
1800 }
1801 else if (index == emit->tes.inner.tgsi_index) {
1802 file = TGSI_FILE_TEMPORARY;
1803 index = emit->tes.inner.temp_index;
1804 }
1805 else if (index == emit->tes.outer.tgsi_index) {
1806 file = TGSI_FILE_TEMPORARY;
1807 index = emit->tes.outer.temp_index;
1808 }
1809 else if (index == emit->tes.prim_id_index) {
1810 /**
1811 * Emitted as vPrim.x
1812 */
1813 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1814 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1815 index = 0;
1816 }
1817
1818 }
1819 else if (file == TGSI_FILE_INPUT) {
1820 if (index2d) {
1821 /* 2D input is emitted as vcp (input control point). */
1822 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
1823 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1824
1825 /* index specifies the element index and is remapped
1826 * to align with the tcs output index.
1827 */
1828 index = emit->linkage.input_map[index];
1829
1830 assert(index2 < emit->key.tes.vertices_per_patch);
1831 }
1832 else {
1833 if (index < emit->key.tes.tessfactor_index)
1834 /* index specifies the generic patch index.
1835 * Remapped to match up with the tcs output index.
1836 */
1837 index = emit->linkage.input_map[index];
1838
1839 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT;
1840 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1841 }
1842 }
1843 }
1844 else if (emit->unit == PIPE_SHADER_COMPUTE) {
1845 if (file == TGSI_FILE_SYSTEM_VALUE) {
1846 if (index == emit->cs.thread_id_index) {
1847 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1848 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP;
1849 index = 0;
1850 } else if (index == emit->cs.block_id_index) {
1851 operand0.value = 0;
1852 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1853 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID;
1854 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1855 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1856 operand0.swizzleX = swizzleX;
1857 operand0.swizzleY = swizzleY;
1858 operand0.swizzleZ = swizzleZ;
1859 operand0.swizzleW = swizzleW;
1860 emit_dword(emit, operand0.value);
1861 return;
1862 } else if (index == emit->cs.grid_size.tgsi_index) {
1863 file = TGSI_FILE_IMMEDIATE;
1864 index = emit->cs.grid_size.imm_index;
1865 }
1866 }
1867 }
1868
1869 if (file == TGSI_FILE_ADDRESS) {
1870 index = emit->address_reg_index[index];
1871 file = TGSI_FILE_TEMPORARY;
1872 }
1873
1874 if (file == TGSI_FILE_CONSTANT) {
1875 /**
1876 * If this constant buffer is to be bound as srv raw buffer,
1877 * then we have to load the constant to a temp first before
1878 * it can be used as a source in the instruction.
1879 * This is accomplished in two passes. The first pass is to
1880 * identify if there is any constbuf to rawbuf translation.
1881 * If there isn't, emit the instruction as usual.
1882 * If there is, then we save the constant buffer reference info,
1883 * and then instead of emitting the instruction at the end
1884 * of the instruction, it will trigger a second pass of parsing
1885 * this instruction. Before it starts the parsing, it will
1886 * load the referenced raw buffer elements to temporaries.
1887 * Then it will emit the instruction that replaces the
1888 * constant buffer replaces with the corresponding temporaries.
1889 */
1890 if (emit->raw_bufs & (1 << index2)) {
1891 if (emit->reemit_rawbuf_instruction != REEMIT_IN_PROGRESS) {
1892 unsigned tmpIdx = emit->raw_buf_cur_tmp_index;
1893
1894 emit->raw_buf_tmp[tmpIdx].buffer_index = index2;
1895
1896 /* Save whether the element index is indirect indexing */
1897 emit->raw_buf_tmp[tmpIdx].indirect = indirect;
1898
1899 /* If it is indirect index, save the temporary
1900 * address index, otherwise, save the immediate index.
1901 */
1902 if (indirect) {
1903 emit->raw_buf_tmp[tmpIdx].element_index =
1904 emit->address_reg_index[reg->Indirect.Index];
1905 emit->raw_buf_tmp[tmpIdx].element_rel =
1906 reg->Register.Index;
1907 }
1908 else {
1909 emit->raw_buf_tmp[tmpIdx].element_index = index;
1910 emit->raw_buf_tmp[tmpIdx].element_rel = 0;
1911 }
1912
1913 emit->raw_buf_cur_tmp_index++;
1914 emit->reemit_rawbuf_instruction = REEMIT_TRUE;
1915 emit->discard_instruction = true;
1916 emit->reemit_tgsi_instruction = true;
1917 }
1918 else {
1919 /* In the reemitting process, replace the constant buffer
1920 * reference with temporary.
1921 */
1922 file = TGSI_FILE_TEMPORARY;
1923 index = emit->raw_buf_cur_tmp_index + emit->raw_buf_tmp_index;
1924 index2d = false;
1925 indirect = false;
1926 emit->raw_buf_cur_tmp_index++;
1927 }
1928 }
1929 }
1930
1931 if (file == TGSI_FILE_TEMPORARY) {
1932 if (need_temp_reg_initialization(emit, index)) {
1933 emit->initialize_temp_index = index;
1934 emit->discard_instruction = true;
1935 }
1936 }
1937
1938 if (operand0.value == 0) {
1939 /* if operand0 was not set above for a special case, do the general
1940 * case now.
1941 */
1942 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1943 operand0.operandType = translate_register_file(file, tempArrayId > 0);
1944 }
1945 operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1946 index2d, indirect2d);
1947
1948 if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 &&
1949 operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
1950 /* there's no swizzle for in-line immediates */
1951 if (swizzleX == swizzleY &&
1952 swizzleX == swizzleZ &&
1953 swizzleX == swizzleW) {
1954 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1955 }
1956 else {
1957 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1958 }
1959
1960 operand0.swizzleX = swizzleX;
1961 operand0.swizzleY = swizzleY;
1962 operand0.swizzleZ = swizzleZ;
1963 operand0.swizzleW = swizzleW;
1964
1965 if (absolute || negate) {
1966 operand0.extended = 1;
1967 operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER;
1968 if (absolute && !negate)
1969 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS;
1970 if (!absolute && negate)
1971 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG;
1972 if (absolute && negate)
1973 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG;
1974 }
1975 }
1976
1977 check_register_index(emit, operand0.operandType, index);
1978
1979 /* Emit the operand tokens */
1980 emit_dword(emit, operand0.value);
1981 if (operand0.extended)
1982 emit_dword(emit, operand1.value);
1983
1984 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) {
1985 /* Emit the four float/int in-line immediate values */
1986 unsigned *c;
1987 assert(index < ARRAY_SIZE(emit->immediates));
1988 assert(file == TGSI_FILE_IMMEDIATE);
1989 assert(swizzleX < 4);
1990 assert(swizzleY < 4);
1991 assert(swizzleZ < 4);
1992 assert(swizzleW < 4);
1993 c = (unsigned *) emit->immediates[index];
1994 emit_dword(emit, c[swizzleX]);
1995 emit_dword(emit, c[swizzleY]);
1996 emit_dword(emit, c[swizzleZ]);
1997 emit_dword(emit, c[swizzleW]);
1998 }
1999 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) {
2000 /* Emit the register index(es) */
2001 if (index2d) {
2002 emit_dword(emit, index2);
2003
2004 if (indirect2d) {
2005 emit_indirect_register(emit, reg->DimIndirect.Index);
2006 }
2007 }
2008
2009 emit_dword(emit, remap_temp_index(emit, file, index));
2010
2011 if (indirect) {
2012 assert(operand0.operandType != VGPU10_OPERAND_TYPE_TEMP);
2013 emit_indirect_register(emit, reg->Indirect.Index);
2014 }
2015 }
2016 }
2017
2018
2019 /**
2020 * Emit a resource operand (for use with a SAMPLE instruction).
2021 */
2022 static void
emit_resource_register(struct svga_shader_emitter_v10 * emit,unsigned resource_number)2023 emit_resource_register(struct svga_shader_emitter_v10 *emit,
2024 unsigned resource_number)
2025 {
2026 VGPU10OperandToken0 operand0;
2027
2028 check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number);
2029
2030 /* init */
2031 operand0.value = 0;
2032
2033 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
2034 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2035 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
2036 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
2037 operand0.swizzleX = VGPU10_COMPONENT_X;
2038 operand0.swizzleY = VGPU10_COMPONENT_Y;
2039 operand0.swizzleZ = VGPU10_COMPONENT_Z;
2040 operand0.swizzleW = VGPU10_COMPONENT_W;
2041
2042 emit_dword(emit, operand0.value);
2043 emit_dword(emit, resource_number);
2044 }
2045
2046
2047 /**
2048 * Emit a sampler operand (for use with a SAMPLE instruction).
2049 */
2050 static void
emit_sampler_register(struct svga_shader_emitter_v10 * emit,unsigned unit)2051 emit_sampler_register(struct svga_shader_emitter_v10 *emit,
2052 unsigned unit)
2053 {
2054 VGPU10OperandToken0 operand0;
2055 unsigned sampler_number;
2056
2057 sampler_number = emit->key.tex[unit].sampler_index;
2058
2059 if ((emit->shadow_compare_units & (1 << unit)) && emit->use_sampler_state_mapping)
2060 sampler_number++;
2061
2062 check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number);
2063
2064 /* init */
2065 operand0.value = 0;
2066
2067 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
2068 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2069
2070 emit_dword(emit, operand0.value);
2071 emit_dword(emit, sampler_number);
2072 }
2073
2074
2075 /**
2076 * Emit an operand which reads the IS_FRONT_FACING register.
2077 */
2078 static void
emit_face_register(struct svga_shader_emitter_v10 * emit)2079 emit_face_register(struct svga_shader_emitter_v10 *emit)
2080 {
2081 VGPU10OperandToken0 operand0;
2082 unsigned index = emit->linkage.input_map[emit->fs.face_input_index];
2083
2084 /* init */
2085 operand0.value = 0;
2086
2087 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT;
2088 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2089 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
2090 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
2091
2092 operand0.swizzleX = VGPU10_COMPONENT_X;
2093 operand0.swizzleY = VGPU10_COMPONENT_X;
2094 operand0.swizzleZ = VGPU10_COMPONENT_X;
2095 operand0.swizzleW = VGPU10_COMPONENT_X;
2096
2097 emit_dword(emit, operand0.value);
2098 emit_dword(emit, index);
2099 }
2100
2101
2102 /**
2103 * Emit tokens for the "rasterizer" register used by the SAMPLE_POS
2104 * instruction.
2105 */
2106 static void
emit_rasterizer_register(struct svga_shader_emitter_v10 * emit)2107 emit_rasterizer_register(struct svga_shader_emitter_v10 *emit)
2108 {
2109 VGPU10OperandToken0 operand0;
2110
2111 /* init */
2112 operand0.value = 0;
2113
2114 /* No register index for rasterizer index (there's only one) */
2115 operand0.operandType = VGPU10_OPERAND_TYPE_RASTERIZER;
2116 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
2117 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
2118 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
2119 operand0.swizzleX = VGPU10_COMPONENT_X;
2120 operand0.swizzleY = VGPU10_COMPONENT_Y;
2121 operand0.swizzleZ = VGPU10_COMPONENT_Z;
2122 operand0.swizzleW = VGPU10_COMPONENT_W;
2123
2124 emit_dword(emit, operand0.value);
2125 }
2126
2127
2128 /**
2129 * Emit tokens for the "stream" register used by the
2130 * DCL_STREAM, CUT_STREAM, EMIT_STREAM instructions.
2131 */
2132 static void
emit_stream_register(struct svga_shader_emitter_v10 * emit,unsigned index)2133 emit_stream_register(struct svga_shader_emitter_v10 *emit, unsigned index)
2134 {
2135 VGPU10OperandToken0 operand0;
2136
2137 /* init */
2138 operand0.value = 0;
2139
2140 /* No register index for rasterizer index (there's only one) */
2141 operand0.operandType = VGPU10_OPERAND_TYPE_STREAM;
2142 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2143 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
2144
2145 emit_dword(emit, operand0.value);
2146 emit_dword(emit, index);
2147 }
2148
2149
2150 /**
2151 * Emit the token for a VGPU10 opcode, with precise parameter.
2152 * \param saturate clamp result to [0,1]?
2153 */
2154 static void
emit_opcode_precise(struct svga_shader_emitter_v10 * emit,unsigned vgpu10_opcode,bool saturate,bool precise)2155 emit_opcode_precise(struct svga_shader_emitter_v10 *emit,
2156 unsigned vgpu10_opcode, bool saturate, bool precise)
2157 {
2158 VGPU10OpcodeToken0 token0;
2159
2160 token0.value = 0; /* init all fields to zero */
2161 token0.opcodeType = vgpu10_opcode;
2162 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2163 token0.saturate = saturate;
2164
2165 /* Mesa's GLSL IR -> TGSI translator will set the TGSI precise flag for
2166 * 'invariant' declarations. Only set preciseValues=1 if we have SM5.
2167 */
2168 token0.preciseValues = precise && emit->version >= 50;
2169
2170 emit_dword(emit, token0.value);
2171
2172 emit->uses_precise_qualifier |= token0.preciseValues;
2173 }
2174
2175
2176 /**
2177 * Emit the token for a VGPU10 opcode.
2178 * \param saturate clamp result to [0,1]?
2179 */
2180 static void
emit_opcode(struct svga_shader_emitter_v10 * emit,unsigned vgpu10_opcode,bool saturate)2181 emit_opcode(struct svga_shader_emitter_v10 *emit,
2182 unsigned vgpu10_opcode, bool saturate)
2183 {
2184 emit_opcode_precise(emit, vgpu10_opcode, saturate, false);
2185 }
2186
2187
2188 /**
2189 * Emit the token for a VGPU10 resinfo instruction.
2190 * \param modifier return type modifier, _uint or _rcpFloat.
2191 * TODO: We may want to remove this parameter if it will
2192 * only ever be used as _uint.
2193 */
2194 static void
emit_opcode_resinfo(struct svga_shader_emitter_v10 * emit,VGPU10_RESINFO_RETURN_TYPE modifier)2195 emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit,
2196 VGPU10_RESINFO_RETURN_TYPE modifier)
2197 {
2198 VGPU10OpcodeToken0 token0;
2199
2200 token0.value = 0; /* init all fields to zero */
2201 token0.opcodeType = VGPU10_OPCODE_RESINFO;
2202 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2203 token0.resinfoReturnType = modifier;
2204
2205 emit_dword(emit, token0.value);
2206 }
2207
2208
2209 /**
2210 * Emit opcode tokens for a texture sample instruction. Texture instructions
2211 * can be rather complicated (texel offsets, etc) so we have this specialized
2212 * function.
2213 */
2214 static void
emit_sample_opcode(struct svga_shader_emitter_v10 * emit,unsigned vgpu10_opcode,bool saturate,const int offsets[3])2215 emit_sample_opcode(struct svga_shader_emitter_v10 *emit,
2216 unsigned vgpu10_opcode, bool saturate,
2217 const int offsets[3])
2218 {
2219 VGPU10OpcodeToken0 token0;
2220 VGPU10OpcodeToken1 token1;
2221
2222 token0.value = 0; /* init all fields to zero */
2223 token0.opcodeType = vgpu10_opcode;
2224 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2225 token0.saturate = saturate;
2226
2227 if (offsets[0] || offsets[1] || offsets[2]) {
2228 assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2229 assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2230 assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2231 assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2232 assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2233 assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2234
2235 token0.extended = 1;
2236 token1.value = 0;
2237 token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS;
2238 token1.offsetU = offsets[0];
2239 token1.offsetV = offsets[1];
2240 token1.offsetW = offsets[2];
2241 }
2242
2243 emit_dword(emit, token0.value);
2244 if (token0.extended) {
2245 emit_dword(emit, token1.value);
2246 }
2247 }
2248
2249
2250 /**
2251 * Emit a DISCARD opcode token.
2252 * If nonzero is set, we'll discard the fragment if the X component is not 0.
2253 * Otherwise, we'll discard the fragment if the X component is 0.
2254 */
2255 static void
emit_discard_opcode(struct svga_shader_emitter_v10 * emit,bool nonzero)2256 emit_discard_opcode(struct svga_shader_emitter_v10 *emit, bool nonzero)
2257 {
2258 VGPU10OpcodeToken0 opcode0;
2259
2260 opcode0.value = 0;
2261 opcode0.opcodeType = VGPU10_OPCODE_DISCARD;
2262 if (nonzero)
2263 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
2264
2265 emit_dword(emit, opcode0.value);
2266 }
2267
2268
2269 /**
2270 * We need to call this before we begin emitting a VGPU10 instruction.
2271 */
2272 static void
begin_emit_instruction(struct svga_shader_emitter_v10 * emit)2273 begin_emit_instruction(struct svga_shader_emitter_v10 *emit)
2274 {
2275 assert(emit->inst_start_token == 0);
2276 /* Save location of the instruction's VGPU10OpcodeToken0 token.
2277 * Note, we can't save a pointer because it would become invalid if
2278 * we have to realloc the output buffer.
2279 */
2280 emit->inst_start_token = emit_get_num_tokens(emit);
2281 }
2282
2283
2284 /**
2285 * We need to call this after we emit the last token of a VGPU10 instruction.
2286 * This function patches in the opcode token's instructionLength field.
2287 */
2288 static void
end_emit_instruction(struct svga_shader_emitter_v10 * emit)2289 end_emit_instruction(struct svga_shader_emitter_v10 *emit)
2290 {
2291 VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
2292 unsigned inst_length;
2293
2294 assert(emit->inst_start_token > 0);
2295
2296 if (emit->discard_instruction) {
2297 /* Back up the emit->ptr to where this instruction started so
2298 * that we discard the current instruction.
2299 */
2300 emit->ptr = (char *) (tokens + emit->inst_start_token);
2301 }
2302 else {
2303 /* Compute instruction length and patch that into the start of
2304 * the instruction.
2305 */
2306 inst_length = emit_get_num_tokens(emit) - emit->inst_start_token;
2307
2308 assert(inst_length > 0);
2309
2310 tokens[emit->inst_start_token].instructionLength = inst_length;
2311 }
2312
2313 emit->inst_start_token = 0; /* reset to zero for error checking */
2314 emit->discard_instruction = false;
2315 }
2316
2317
2318 /**
2319 * Return index for a free temporary register.
2320 */
2321 static unsigned
get_temp_index(struct svga_shader_emitter_v10 * emit)2322 get_temp_index(struct svga_shader_emitter_v10 *emit)
2323 {
2324 assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS);
2325 return emit->num_shader_temps + emit->internal_temp_count++;
2326 }
2327
2328
2329 /**
2330 * Release the temporaries which were generated by get_temp_index().
2331 */
2332 static void
free_temp_indexes(struct svga_shader_emitter_v10 * emit)2333 free_temp_indexes(struct svga_shader_emitter_v10 *emit)
2334 {
2335 emit->internal_temp_count = 0;
2336 }
2337
2338
2339 /**
2340 * Create a tgsi_full_src_register.
2341 */
2342 static struct tgsi_full_src_register
make_src_reg(enum tgsi_file_type file,unsigned index)2343 make_src_reg(enum tgsi_file_type file, unsigned index)
2344 {
2345 struct tgsi_full_src_register reg;
2346
2347 memset(®, 0, sizeof(reg));
2348 reg.Register.File = file;
2349 reg.Register.Index = index;
2350 reg.Register.SwizzleX = TGSI_SWIZZLE_X;
2351 reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
2352 reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
2353 reg.Register.SwizzleW = TGSI_SWIZZLE_W;
2354 return reg;
2355 }
2356
2357
2358 /**
2359 * Create a tgsi_full_src_register with a swizzle such that all four
2360 * vector components have the same scalar value.
2361 */
2362 static struct tgsi_full_src_register
make_src_scalar_reg(enum tgsi_file_type file,unsigned index,unsigned component)2363 make_src_scalar_reg(enum tgsi_file_type file, unsigned index, unsigned component)
2364 {
2365 struct tgsi_full_src_register reg;
2366
2367 assert(component >= TGSI_SWIZZLE_X);
2368 assert(component <= TGSI_SWIZZLE_W);
2369
2370 memset(®, 0, sizeof(reg));
2371 reg.Register.File = file;
2372 reg.Register.Index = index;
2373 reg.Register.SwizzleX =
2374 reg.Register.SwizzleY =
2375 reg.Register.SwizzleZ =
2376 reg.Register.SwizzleW = component;
2377 return reg;
2378 }
2379
2380
2381 /**
2382 * Create a tgsi_full_src_register for a temporary.
2383 */
2384 static struct tgsi_full_src_register
make_src_temp_reg(unsigned index)2385 make_src_temp_reg(unsigned index)
2386 {
2387 return make_src_reg(TGSI_FILE_TEMPORARY, index);
2388 }
2389
2390
2391 /**
2392 * Create a tgsi_full_src_register for a constant.
2393 */
2394 static struct tgsi_full_src_register
make_src_const_reg(unsigned index)2395 make_src_const_reg(unsigned index)
2396 {
2397 return make_src_reg(TGSI_FILE_CONSTANT, index);
2398 }
2399
2400
2401 /**
2402 * Create a tgsi_full_src_register for an immediate constant.
2403 */
2404 static struct tgsi_full_src_register
make_src_immediate_reg(unsigned index)2405 make_src_immediate_reg(unsigned index)
2406 {
2407 return make_src_reg(TGSI_FILE_IMMEDIATE, index);
2408 }
2409
2410
2411 /**
2412 * Create a tgsi_full_dst_register.
2413 */
2414 static struct tgsi_full_dst_register
make_dst_reg(enum tgsi_file_type file,unsigned index)2415 make_dst_reg(enum tgsi_file_type file, unsigned index)
2416 {
2417 struct tgsi_full_dst_register reg;
2418
2419 memset(®, 0, sizeof(reg));
2420 reg.Register.File = file;
2421 reg.Register.Index = index;
2422 reg.Register.WriteMask = TGSI_WRITEMASK_XYZW;
2423 return reg;
2424 }
2425
2426
2427 /**
2428 * Create a tgsi_full_dst_register for a temporary.
2429 */
2430 static struct tgsi_full_dst_register
make_dst_temp_reg(unsigned index)2431 make_dst_temp_reg(unsigned index)
2432 {
2433 return make_dst_reg(TGSI_FILE_TEMPORARY, index);
2434 }
2435
2436
2437 /**
2438 * Create a tgsi_full_dst_register for an output.
2439 */
2440 static struct tgsi_full_dst_register
make_dst_output_reg(unsigned index)2441 make_dst_output_reg(unsigned index)
2442 {
2443 return make_dst_reg(TGSI_FILE_OUTPUT, index);
2444 }
2445
2446
2447 /**
2448 * Create negated tgsi_full_src_register.
2449 */
2450 static struct tgsi_full_src_register
negate_src(const struct tgsi_full_src_register * reg)2451 negate_src(const struct tgsi_full_src_register *reg)
2452 {
2453 struct tgsi_full_src_register neg = *reg;
2454 neg.Register.Negate = !reg->Register.Negate;
2455 return neg;
2456 }
2457
2458 /**
2459 * Create absolute value of a tgsi_full_src_register.
2460 */
2461 static struct tgsi_full_src_register
absolute_src(const struct tgsi_full_src_register * reg)2462 absolute_src(const struct tgsi_full_src_register *reg)
2463 {
2464 struct tgsi_full_src_register absolute = *reg;
2465 absolute.Register.Absolute = 1;
2466 return absolute;
2467 }
2468
2469
2470 /** Return the named swizzle term from the src register */
2471 static inline unsigned
get_swizzle(const struct tgsi_full_src_register * reg,enum tgsi_swizzle term)2472 get_swizzle(const struct tgsi_full_src_register *reg, enum tgsi_swizzle term)
2473 {
2474 switch (term) {
2475 case TGSI_SWIZZLE_X:
2476 return reg->Register.SwizzleX;
2477 case TGSI_SWIZZLE_Y:
2478 return reg->Register.SwizzleY;
2479 case TGSI_SWIZZLE_Z:
2480 return reg->Register.SwizzleZ;
2481 case TGSI_SWIZZLE_W:
2482 return reg->Register.SwizzleW;
2483 default:
2484 assert(!"Bad swizzle");
2485 return TGSI_SWIZZLE_X;
2486 }
2487 }
2488
2489
2490 /**
2491 * Create swizzled tgsi_full_src_register.
2492 */
2493 static struct tgsi_full_src_register
swizzle_src(const struct tgsi_full_src_register * reg,enum tgsi_swizzle swizzleX,enum tgsi_swizzle swizzleY,enum tgsi_swizzle swizzleZ,enum tgsi_swizzle swizzleW)2494 swizzle_src(const struct tgsi_full_src_register *reg,
2495 enum tgsi_swizzle swizzleX, enum tgsi_swizzle swizzleY,
2496 enum tgsi_swizzle swizzleZ, enum tgsi_swizzle swizzleW)
2497 {
2498 struct tgsi_full_src_register swizzled = *reg;
2499 /* Note: we swizzle the current swizzle */
2500 swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX);
2501 swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY);
2502 swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ);
2503 swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW);
2504 return swizzled;
2505 }
2506
2507
2508 /**
2509 * Create swizzled tgsi_full_src_register where all the swizzle
2510 * terms are the same.
2511 */
2512 static struct tgsi_full_src_register
scalar_src(const struct tgsi_full_src_register * reg,enum tgsi_swizzle swizzle)2513 scalar_src(const struct tgsi_full_src_register *reg, enum tgsi_swizzle swizzle)
2514 {
2515 struct tgsi_full_src_register swizzled = *reg;
2516 /* Note: we swizzle the current swizzle */
2517 swizzled.Register.SwizzleX =
2518 swizzled.Register.SwizzleY =
2519 swizzled.Register.SwizzleZ =
2520 swizzled.Register.SwizzleW = get_swizzle(reg, swizzle);
2521 return swizzled;
2522 }
2523
2524
2525 /**
2526 * Create new tgsi_full_dst_register with writemask.
2527 * \param mask bitmask of TGSI_WRITEMASK_[XYZW]
2528 */
2529 static struct tgsi_full_dst_register
writemask_dst(const struct tgsi_full_dst_register * reg,unsigned mask)2530 writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask)
2531 {
2532 struct tgsi_full_dst_register masked = *reg;
2533 masked.Register.WriteMask = mask;
2534 return masked;
2535 }
2536
2537
2538 /**
2539 * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW.
2540 */
2541 static bool
same_swizzle_terms(const struct tgsi_full_src_register * reg)2542 same_swizzle_terms(const struct tgsi_full_src_register *reg)
2543 {
2544 return (reg->Register.SwizzleX == reg->Register.SwizzleY &&
2545 reg->Register.SwizzleY == reg->Register.SwizzleZ &&
2546 reg->Register.SwizzleZ == reg->Register.SwizzleW);
2547 }
2548
2549
2550 /**
2551 * Search the vector for the value 'x' and return its position.
2552 */
2553 static int
find_imm_in_vec4(const union tgsi_immediate_data vec[4],union tgsi_immediate_data x)2554 find_imm_in_vec4(const union tgsi_immediate_data vec[4],
2555 union tgsi_immediate_data x)
2556 {
2557 unsigned i;
2558 for (i = 0; i < 4; i++) {
2559 if (vec[i].Int == x.Int)
2560 return i;
2561 }
2562 return -1;
2563 }
2564
2565
2566 /**
2567 * Helper used by make_immediate_reg(), make_immediate_reg_4().
2568 */
2569 static int
find_immediate(struct svga_shader_emitter_v10 * emit,union tgsi_immediate_data x,unsigned startIndex)2570 find_immediate(struct svga_shader_emitter_v10 *emit,
2571 union tgsi_immediate_data x, unsigned startIndex)
2572 {
2573 const unsigned endIndex = emit->num_immediates;
2574 unsigned i;
2575
2576 assert(emit->num_immediates_emitted > 0);
2577
2578 /* Search immediates for x, y, z, w */
2579 for (i = startIndex; i < endIndex; i++) {
2580 if (x.Int == emit->immediates[i][0].Int ||
2581 x.Int == emit->immediates[i][1].Int ||
2582 x.Int == emit->immediates[i][2].Int ||
2583 x.Int == emit->immediates[i][3].Int) {
2584 return i;
2585 }
2586 }
2587 /* immediate not declared yet */
2588 return -1;
2589 }
2590
2591
2592 /**
2593 * As above, but search for a double[2] pair.
2594 */
2595 static int
find_immediate_dbl(struct svga_shader_emitter_v10 * emit,double x,double y)2596 find_immediate_dbl(struct svga_shader_emitter_v10 *emit,
2597 double x, double y)
2598 {
2599 const unsigned endIndex = emit->num_immediates;
2600 unsigned i;
2601
2602 assert(emit->num_immediates_emitted > 0);
2603
2604 /* Search immediates for x, y, z, w */
2605 for (i = 0; i < endIndex; i++) {
2606 if (x == emit->immediates_dbl[i][0] &&
2607 y == emit->immediates_dbl[i][1]) {
2608 return i;
2609 }
2610 }
2611 /* Should never try to use an immediate value that wasn't pre-declared */
2612 assert(!"find_immediate_dbl() failed!");
2613 return -1;
2614 }
2615
2616
2617
2618 /**
2619 * Return a tgsi_full_src_register for an immediate/literal
2620 * union tgsi_immediate_data[4] value.
2621 * Note: the values must have been previously declared/allocated in
2622 * emit_pre_helpers(). And, all of x,y,z,w must be located in the same
2623 * vec4 immediate.
2624 */
2625 static struct tgsi_full_src_register
make_immediate_reg_4(struct svga_shader_emitter_v10 * emit,const union tgsi_immediate_data imm[4])2626 make_immediate_reg_4(struct svga_shader_emitter_v10 *emit,
2627 const union tgsi_immediate_data imm[4])
2628 {
2629 struct tgsi_full_src_register reg;
2630 unsigned i;
2631
2632 for (i = 0; i < emit->num_common_immediates; i++) {
2633 /* search for first component value */
2634 int immpos = find_immediate(emit, imm[0], i);
2635 int x, y, z, w;
2636
2637 assert(immpos >= 0);
2638
2639 /* find remaining components within the immediate vector */
2640 x = find_imm_in_vec4(emit->immediates[immpos], imm[0]);
2641 y = find_imm_in_vec4(emit->immediates[immpos], imm[1]);
2642 z = find_imm_in_vec4(emit->immediates[immpos], imm[2]);
2643 w = find_imm_in_vec4(emit->immediates[immpos], imm[3]);
2644
2645 if (x >=0 && y >= 0 && z >= 0 && w >= 0) {
2646 /* found them all */
2647 memset(®, 0, sizeof(reg));
2648 reg.Register.File = TGSI_FILE_IMMEDIATE;
2649 reg.Register.Index = immpos;
2650 reg.Register.SwizzleX = x;
2651 reg.Register.SwizzleY = y;
2652 reg.Register.SwizzleZ = z;
2653 reg.Register.SwizzleW = w;
2654 return reg;
2655 }
2656 /* else, keep searching */
2657 }
2658
2659 assert(!"Failed to find immediate register!");
2660
2661 /* Just return IMM[0].xxxx */
2662 memset(®, 0, sizeof(reg));
2663 reg.Register.File = TGSI_FILE_IMMEDIATE;
2664 return reg;
2665 }
2666
2667
2668 /**
2669 * Return a tgsi_full_src_register for an immediate/literal
2670 * union tgsi_immediate_data value of the form {value, value, value, value}.
2671 * \sa make_immediate_reg_4() regarding allowed values.
2672 */
2673 static struct tgsi_full_src_register
make_immediate_reg(struct svga_shader_emitter_v10 * emit,union tgsi_immediate_data value)2674 make_immediate_reg(struct svga_shader_emitter_v10 *emit,
2675 union tgsi_immediate_data value)
2676 {
2677 struct tgsi_full_src_register reg;
2678 int immpos = find_immediate(emit, value, 0);
2679
2680 assert(immpos >= 0);
2681
2682 memset(®, 0, sizeof(reg));
2683 reg.Register.File = TGSI_FILE_IMMEDIATE;
2684 reg.Register.Index = immpos;
2685 reg.Register.SwizzleX =
2686 reg.Register.SwizzleY =
2687 reg.Register.SwizzleZ =
2688 reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value);
2689
2690 return reg;
2691 }
2692
2693
2694 /**
2695 * Return a tgsi_full_src_register for an immediate/literal float[4] value.
2696 * \sa make_immediate_reg_4() regarding allowed values.
2697 */
2698 static struct tgsi_full_src_register
make_immediate_reg_float4(struct svga_shader_emitter_v10 * emit,float x,float y,float z,float w)2699 make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit,
2700 float x, float y, float z, float w)
2701 {
2702 union tgsi_immediate_data imm[4];
2703 imm[0].Float = x;
2704 imm[1].Float = y;
2705 imm[2].Float = z;
2706 imm[3].Float = w;
2707 return make_immediate_reg_4(emit, imm);
2708 }
2709
2710
2711 /**
2712 * Return a tgsi_full_src_register for an immediate/literal float value
2713 * of the form {value, value, value, value}.
2714 * \sa make_immediate_reg_4() regarding allowed values.
2715 */
2716 static struct tgsi_full_src_register
make_immediate_reg_float(struct svga_shader_emitter_v10 * emit,float value)2717 make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value)
2718 {
2719 union tgsi_immediate_data imm;
2720 imm.Float = value;
2721 return make_immediate_reg(emit, imm);
2722 }
2723
2724
2725 /**
2726 * Return a tgsi_full_src_register for an immediate/literal int[4] vector.
2727 */
2728 static struct tgsi_full_src_register
make_immediate_reg_int4(struct svga_shader_emitter_v10 * emit,int x,int y,int z,int w)2729 make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit,
2730 int x, int y, int z, int w)
2731 {
2732 union tgsi_immediate_data imm[4];
2733 imm[0].Int = x;
2734 imm[1].Int = y;
2735 imm[2].Int = z;
2736 imm[3].Int = w;
2737 return make_immediate_reg_4(emit, imm);
2738 }
2739
2740
2741 /**
2742 * Return a tgsi_full_src_register for an immediate/literal int value
2743 * of the form {value, value, value, value}.
2744 * \sa make_immediate_reg_4() regarding allowed values.
2745 */
2746 static struct tgsi_full_src_register
make_immediate_reg_int(struct svga_shader_emitter_v10 * emit,int value)2747 make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value)
2748 {
2749 union tgsi_immediate_data imm;
2750 imm.Int = value;
2751 return make_immediate_reg(emit, imm);
2752 }
2753
2754
2755 static struct tgsi_full_src_register
make_immediate_reg_double(struct svga_shader_emitter_v10 * emit,double value)2756 make_immediate_reg_double(struct svga_shader_emitter_v10 *emit, double value)
2757 {
2758 struct tgsi_full_src_register reg;
2759 int immpos = find_immediate_dbl(emit, value, value);
2760
2761 assert(immpos >= 0);
2762
2763 memset(®, 0, sizeof(reg));
2764 reg.Register.File = TGSI_FILE_IMMEDIATE;
2765 reg.Register.Index = immpos;
2766 reg.Register.SwizzleX = TGSI_SWIZZLE_X;
2767 reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
2768 reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
2769 reg.Register.SwizzleW = TGSI_SWIZZLE_W;
2770
2771 return reg;
2772 }
2773
2774
2775 /**
2776 * Allocate space for a union tgsi_immediate_data[4] immediate.
2777 * \return the index/position of the immediate.
2778 */
2779 static unsigned
alloc_immediate_4(struct svga_shader_emitter_v10 * emit,const union tgsi_immediate_data imm[4])2780 alloc_immediate_4(struct svga_shader_emitter_v10 *emit,
2781 const union tgsi_immediate_data imm[4])
2782 {
2783 unsigned n = emit->num_immediates++;
2784 assert(n < ARRAY_SIZE(emit->immediates));
2785 emit->immediates[n][0] = imm[0];
2786 emit->immediates[n][1] = imm[1];
2787 emit->immediates[n][2] = imm[2];
2788 emit->immediates[n][3] = imm[3];
2789 return n;
2790 }
2791
2792
2793 /**
2794 * Allocate space for a float[4] immediate.
2795 * \return the index/position of the immediate.
2796 */
2797 static unsigned
alloc_immediate_float4(struct svga_shader_emitter_v10 * emit,float x,float y,float z,float w)2798 alloc_immediate_float4(struct svga_shader_emitter_v10 *emit,
2799 float x, float y, float z, float w)
2800 {
2801 union tgsi_immediate_data imm[4];
2802 imm[0].Float = x;
2803 imm[1].Float = y;
2804 imm[2].Float = z;
2805 imm[3].Float = w;
2806 return alloc_immediate_4(emit, imm);
2807 }
2808
2809
2810 /**
2811 * Allocate space for an int[4] immediate.
2812 * \return the index/position of the immediate.
2813 */
2814 static unsigned
alloc_immediate_int4(struct svga_shader_emitter_v10 * emit,int x,int y,int z,int w)2815 alloc_immediate_int4(struct svga_shader_emitter_v10 *emit,
2816 int x, int y, int z, int w)
2817 {
2818 union tgsi_immediate_data imm[4];
2819 imm[0].Int = x;
2820 imm[1].Int = y;
2821 imm[2].Int = z;
2822 imm[3].Int = w;
2823 return alloc_immediate_4(emit, imm);
2824 }
2825
2826
2827 /**
2828 * Add a new immediate after the immediate block has been declared.
2829 * Any new immediates will be appended to the immediate block after the
2830 * shader has been parsed.
2831 * \return the index/position of the immediate.
2832 */
2833 static unsigned
add_immediate_int(struct svga_shader_emitter_v10 * emit,int x)2834 add_immediate_int(struct svga_shader_emitter_v10 *emit, int x)
2835 {
2836 union tgsi_immediate_data imm[4];
2837 imm[0].Int = x;
2838 imm[1].Int = x+1;
2839 imm[2].Int = x+2;
2840 imm[3].Int = x+3;
2841
2842 unsigned immpos = alloc_immediate_4(emit, imm);
2843 emit->num_new_immediates++;
2844
2845 return immpos;
2846 }
2847
2848
2849 static unsigned
alloc_immediate_double2(struct svga_shader_emitter_v10 * emit,double x,double y)2850 alloc_immediate_double2(struct svga_shader_emitter_v10 *emit,
2851 double x, double y)
2852 {
2853 unsigned n = emit->num_immediates++;
2854 assert(!emit->num_immediates_emitted);
2855 assert(n < ARRAY_SIZE(emit->immediates));
2856 emit->immediates_dbl[n][0] = x;
2857 emit->immediates_dbl[n][1] = y;
2858 return n;
2859
2860 }
2861
2862
2863 /**
2864 * Allocate a shader input to store a system value.
2865 */
2866 static unsigned
alloc_system_value_index(struct svga_shader_emitter_v10 * emit,unsigned index)2867 alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index)
2868 {
2869 const unsigned n = emit->linkage.input_map_max + 1 + index;
2870 assert(index < ARRAY_SIZE(emit->system_value_indexes));
2871 emit->system_value_indexes[index] = n;
2872 return n;
2873 }
2874
2875
2876 /**
2877 * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10.
2878 */
2879 static bool
emit_vgpu10_immediate(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_immediate * imm)2880 emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit,
2881 const struct tgsi_full_immediate *imm)
2882 {
2883 /* We don't actually emit any code here. We just save the
2884 * immediate values and emit them later.
2885 */
2886 alloc_immediate_4(emit, imm->u);
2887 return true;
2888 }
2889
2890
2891 /**
2892 * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block
2893 * containing all the immediate values previously allocated
2894 * with alloc_immediate_4().
2895 */
2896 static bool
emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 * emit)2897 emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit)
2898 {
2899 VGPU10OpcodeToken0 token;
2900
2901 assert(!emit->num_immediates_emitted);
2902
2903 token.value = 0;
2904 token.opcodeType = VGPU10_OPCODE_CUSTOMDATA;
2905 token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER;
2906
2907 emit->immediates_block_start_token =
2908 (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0);
2909
2910 /* Note: no begin/end_emit_instruction() calls */
2911 emit_dword(emit, token.value);
2912 emit_dword(emit, 2 + 4 * emit->num_immediates);
2913 emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates);
2914
2915 emit->num_immediates_emitted = emit->num_immediates;
2916
2917 emit->immediates_block_next_token =
2918 (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0);
2919
2920 return true;
2921 }
2922
2923
2924 /**
2925 * Reemit the immediate constant buffer block to include the new
2926 * immediates that are allocated after the block is declared. Those
2927 * immediates are used as constant indices to constant buffers.
2928 */
2929 static bool
reemit_immediates_block(struct svga_shader_emitter_v10 * emit)2930 reemit_immediates_block(struct svga_shader_emitter_v10 *emit)
2931 {
2932 unsigned num_tokens = emit_get_num_tokens(emit);
2933 unsigned num_new_immediates = emit->num_new_immediates;
2934
2935 /* Reserve room for the new immediates */
2936 if (!reserve(emit, 4 * num_new_immediates))
2937 return false;
2938
2939 /* Move the tokens after the immediates block to make room for the
2940 * new immediates.
2941 */
2942 VGPU10ProgramToken *tokens = (VGPU10ProgramToken *)emit->buf;
2943 char *next = (char *) (tokens + emit->immediates_block_next_token);
2944 char *new_next = (char *) (tokens + emit->immediates_block_next_token +
2945 num_new_immediates * 4);
2946
2947 char *end = emit->ptr;
2948 unsigned len = end - next;
2949 memmove(new_next, next, len);
2950
2951 /* Append the new immediates to the end of the immediates block */
2952 char *start = (char *) (tokens + emit->immediates_block_start_token+1);
2953 unsigned immediates_block_size = *(uint32 *)start;
2954
2955 char *new_immediates = (char *)&emit->immediates[emit->num_immediates_emitted][0];
2956 *(uint32 *)start = immediates_block_size + 4 * num_new_immediates;
2957 memcpy(next, new_immediates, 4 * num_new_immediates * sizeof(uint32));
2958
2959 emit->ptr = (char *) (tokens + num_tokens + 4 * num_new_immediates);
2960
2961 return true;
2962 }
2963
2964
2965
2966 /**
2967 * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10
2968 * interpolation mode.
2969 * \return a VGPU10_INTERPOLATION_x value
2970 */
2971 static unsigned
translate_interpolation(const struct svga_shader_emitter_v10 * emit,enum tgsi_interpolate_mode interp,enum tgsi_interpolate_loc interpolate_loc)2972 translate_interpolation(const struct svga_shader_emitter_v10 *emit,
2973 enum tgsi_interpolate_mode interp,
2974 enum tgsi_interpolate_loc interpolate_loc)
2975 {
2976 if (interp == TGSI_INTERPOLATE_COLOR) {
2977 interp = emit->key.fs.flatshade ?
2978 TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
2979 }
2980
2981 switch (interp) {
2982 case TGSI_INTERPOLATE_CONSTANT:
2983 return VGPU10_INTERPOLATION_CONSTANT;
2984 case TGSI_INTERPOLATE_LINEAR:
2985 if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
2986 return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID;
2987 } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
2988 emit->version >= 41) {
2989 return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE;
2990 } else {
2991 return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE;
2992 }
2993 break;
2994 case TGSI_INTERPOLATE_PERSPECTIVE:
2995 if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
2996 return VGPU10_INTERPOLATION_LINEAR_CENTROID;
2997 } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
2998 emit->version >= 41) {
2999 return VGPU10_INTERPOLATION_LINEAR_SAMPLE;
3000 } else {
3001 return VGPU10_INTERPOLATION_LINEAR;
3002 }
3003 break;
3004 default:
3005 assert(!"Unexpected interpolation mode");
3006 return VGPU10_INTERPOLATION_CONSTANT;
3007 }
3008 }
3009
3010
3011 /**
3012 * Translate a TGSI property to VGPU10.
3013 * Don't emit any instructions yet, only need to gather the primitive property
3014 * information. The output primitive topology might be changed later. The
3015 * final property instructions will be emitted as part of the pre-helper code.
3016 */
3017 static bool
emit_vgpu10_property(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_property * prop)3018 emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
3019 const struct tgsi_full_property *prop)
3020 {
3021 static const VGPU10_PRIMITIVE primType[] = {
3022 VGPU10_PRIMITIVE_POINT, /* MESA_PRIM_POINTS */
3023 VGPU10_PRIMITIVE_LINE, /* MESA_PRIM_LINES */
3024 VGPU10_PRIMITIVE_LINE, /* MESA_PRIM_LINE_LOOP */
3025 VGPU10_PRIMITIVE_LINE, /* MESA_PRIM_LINE_STRIP */
3026 VGPU10_PRIMITIVE_TRIANGLE, /* MESA_PRIM_TRIANGLES */
3027 VGPU10_PRIMITIVE_TRIANGLE, /* MESA_PRIM_TRIANGLE_STRIP */
3028 VGPU10_PRIMITIVE_TRIANGLE, /* MESA_PRIM_TRIANGLE_FAN */
3029 VGPU10_PRIMITIVE_UNDEFINED, /* MESA_PRIM_QUADS */
3030 VGPU10_PRIMITIVE_UNDEFINED, /* MESA_PRIM_QUAD_STRIP */
3031 VGPU10_PRIMITIVE_UNDEFINED, /* MESA_PRIM_POLYGON */
3032 VGPU10_PRIMITIVE_LINE_ADJ, /* MESA_PRIM_LINES_ADJACENCY */
3033 VGPU10_PRIMITIVE_LINE_ADJ, /* MESA_PRIM_LINE_STRIP_ADJACENCY */
3034 VGPU10_PRIMITIVE_TRIANGLE_ADJ, /* MESA_PRIM_TRIANGLES_ADJACENCY */
3035 VGPU10_PRIMITIVE_TRIANGLE_ADJ /* MESA_PRIM_TRIANGLE_STRIP_ADJACENCY */
3036 };
3037
3038 static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = {
3039 VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST, /* MESA_PRIM_POINTS */
3040 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* MESA_PRIM_LINES */
3041 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* MESA_PRIM_LINE_LOOP */
3042 VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP, /* MESA_PRIM_LINE_STRIP */
3043 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST, /* MESA_PRIM_TRIANGLES */
3044 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* MESA_PRIM_TRIANGLE_STRIP */
3045 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* MESA_PRIM_TRIANGLE_FAN */
3046 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* MESA_PRIM_QUADS */
3047 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* MESA_PRIM_QUAD_STRIP */
3048 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* MESA_PRIM_POLYGON */
3049 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* MESA_PRIM_LINES_ADJACENCY */
3050 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* MESA_PRIM_LINE_STRIP_ADJACENCY */
3051 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* MESA_PRIM_TRIANGLES_ADJACENCY */
3052 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* MESA_PRIM_TRIANGLE_STRIP_ADJACENCY */
3053 };
3054
3055 static const unsigned inputArraySize[] = {
3056 0, /* VGPU10_PRIMITIVE_UNDEFINED */
3057 1, /* VGPU10_PRIMITIVE_POINT */
3058 2, /* VGPU10_PRIMITIVE_LINE */
3059 3, /* VGPU10_PRIMITIVE_TRIANGLE */
3060 0,
3061 0,
3062 4, /* VGPU10_PRIMITIVE_LINE_ADJ */
3063 6 /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */
3064 };
3065
3066 switch (prop->Property.PropertyName) {
3067 case TGSI_PROPERTY_GS_INPUT_PRIM:
3068 assert(prop->u[0].Data < ARRAY_SIZE(primType));
3069 emit->gs.prim_type = primType[prop->u[0].Data];
3070 assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED);
3071 emit->gs.input_size = inputArraySize[emit->gs.prim_type];
3072 break;
3073
3074 case TGSI_PROPERTY_GS_OUTPUT_PRIM:
3075 assert(prop->u[0].Data < ARRAY_SIZE(primTopology));
3076 emit->gs.prim_topology = primTopology[prop->u[0].Data];
3077 assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED);
3078 break;
3079
3080 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
3081 emit->gs.max_out_vertices = prop->u[0].Data;
3082 break;
3083
3084 case TGSI_PROPERTY_GS_INVOCATIONS:
3085 emit->gs.invocations = prop->u[0].Data;
3086 break;
3087
3088 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
3089 case TGSI_PROPERTY_NEXT_SHADER:
3090 case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
3091 /* no-op */
3092 break;
3093
3094 case TGSI_PROPERTY_TCS_VERTICES_OUT:
3095 /* This info is already captured in the shader key */
3096 break;
3097
3098 case TGSI_PROPERTY_TES_PRIM_MODE:
3099 emit->tes.prim_mode = prop->u[0].Data;
3100 break;
3101
3102 case TGSI_PROPERTY_TES_SPACING:
3103 emit->tes.spacing = prop->u[0].Data;
3104 break;
3105
3106 case TGSI_PROPERTY_TES_VERTEX_ORDER_CW:
3107 emit->tes.vertices_order_cw = prop->u[0].Data;
3108 break;
3109
3110 case TGSI_PROPERTY_TES_POINT_MODE:
3111 emit->tes.point_mode = prop->u[0].Data;
3112 break;
3113
3114 case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH:
3115 emit->cs.block_width = prop->u[0].Data;
3116 break;
3117
3118 case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT:
3119 emit->cs.block_height = prop->u[0].Data;
3120 break;
3121
3122 case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH:
3123 emit->cs.block_depth = prop->u[0].Data;
3124 break;
3125
3126 case TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL:
3127 emit->fs.forceEarlyDepthStencil = true;
3128 break;
3129
3130 default:
3131 debug_printf("Unexpected TGSI property %s\n",
3132 tgsi_property_names[prop->Property.PropertyName]);
3133 }
3134
3135 return true;
3136 }
3137
3138
3139 static void
emit_property_instruction(struct svga_shader_emitter_v10 * emit,VGPU10OpcodeToken0 opcode0,unsigned nData,unsigned data)3140 emit_property_instruction(struct svga_shader_emitter_v10 *emit,
3141 VGPU10OpcodeToken0 opcode0, unsigned nData,
3142 unsigned data)
3143 {
3144 begin_emit_instruction(emit);
3145 emit_dword(emit, opcode0.value);
3146 if (nData)
3147 emit_dword(emit, data);
3148 end_emit_instruction(emit);
3149 }
3150
3151
3152 /**
3153 * Emit property instructions
3154 */
3155 static void
emit_property_instructions(struct svga_shader_emitter_v10 * emit)3156 emit_property_instructions(struct svga_shader_emitter_v10 *emit)
3157 {
3158 VGPU10OpcodeToken0 opcode0;
3159
3160 assert(emit->unit == PIPE_SHADER_GEOMETRY);
3161
3162 /* emit input primitive type declaration */
3163 opcode0.value = 0;
3164 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE;
3165 opcode0.primitive = emit->gs.prim_type;
3166 emit_property_instruction(emit, opcode0, 0, 0);
3167
3168 /* emit max output vertices */
3169 opcode0.value = 0;
3170 opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT;
3171 emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices);
3172
3173 if (emit->version >= 50 && emit->gs.invocations > 0) {
3174 opcode0.value = 0;
3175 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT;
3176 emit_property_instruction(emit, opcode0, 1, emit->gs.invocations);
3177 }
3178 }
3179
3180
3181 /**
3182 * A helper function to declare tessellator domain in a hull shader or
3183 * in the domain shader.
3184 */
3185 static void
emit_tessellator_domain(struct svga_shader_emitter_v10 * emit,enum mesa_prim prim_mode)3186 emit_tessellator_domain(struct svga_shader_emitter_v10 *emit,
3187 enum mesa_prim prim_mode)
3188 {
3189 VGPU10OpcodeToken0 opcode0;
3190
3191 opcode0.value = 0;
3192 opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_DOMAIN;
3193 switch (prim_mode) {
3194 case MESA_PRIM_QUADS:
3195 case MESA_PRIM_LINES:
3196 opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_QUAD;
3197 break;
3198 case MESA_PRIM_TRIANGLES:
3199 opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_TRI;
3200 break;
3201 default:
3202 debug_printf("Invalid tessellator prim mode %d\n", prim_mode);
3203 opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_UNDEFINED;
3204 }
3205 begin_emit_instruction(emit);
3206 emit_dword(emit, opcode0.value);
3207 end_emit_instruction(emit);
3208 }
3209
3210
3211 /**
3212 * Emit domain shader declarations.
3213 */
3214 static void
emit_domain_shader_declarations(struct svga_shader_emitter_v10 * emit)3215 emit_domain_shader_declarations(struct svga_shader_emitter_v10 *emit)
3216 {
3217 VGPU10OpcodeToken0 opcode0;
3218
3219 assert(emit->unit == PIPE_SHADER_TESS_EVAL);
3220
3221 /* Emit the input control point count */
3222 assert(emit->key.tes.vertices_per_patch >= 0 &&
3223 emit->key.tes.vertices_per_patch <= 32);
3224
3225 opcode0.value = 0;
3226 opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT;
3227 opcode0.controlPointCount = emit->key.tes.vertices_per_patch;
3228 begin_emit_instruction(emit);
3229 emit_dword(emit, opcode0.value);
3230 end_emit_instruction(emit);
3231
3232 emit_tessellator_domain(emit, emit->tes.prim_mode);
3233
3234 /* Specify a max for swizzles of the domain point according to the
3235 * tessellator domain type.
3236 */
3237 emit->tes.swizzle_max = emit->tes.prim_mode == MESA_PRIM_TRIANGLES ?
3238 TGSI_SWIZZLE_Z : TGSI_SWIZZLE_Y;
3239 }
3240
3241
3242 /**
3243 * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed
3244 * to implement some instructions. We pre-allocate those values here
3245 * in the immediate constant buffer.
3246 */
3247 static void
alloc_common_immediates(struct svga_shader_emitter_v10 * emit)3248 alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
3249 {
3250 unsigned n = 0;
3251
3252 emit->common_immediate_pos[n++] =
3253 alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f);
3254
3255 if (emit->info.opcode_count[TGSI_OPCODE_LIT] > 0) {
3256 emit->common_immediate_pos[n++] =
3257 alloc_immediate_float4(emit, 128.0f, -128.0f, 0.0f, 0.0f);
3258 }
3259
3260 emit->common_immediate_pos[n++] =
3261 alloc_immediate_int4(emit, 0, 1, 2, -1);
3262
3263 emit->common_immediate_pos[n++] =
3264 alloc_immediate_int4(emit, 3, 4, 5, 6);
3265
3266 if (emit->info.opcode_count[TGSI_OPCODE_IMSB] > 0 ||
3267 emit->info.opcode_count[TGSI_OPCODE_UMSB] > 0) {
3268 emit->common_immediate_pos[n++] =
3269 alloc_immediate_int4(emit, 31, 0, 0, 0);
3270 }
3271
3272 if (emit->info.opcode_count[TGSI_OPCODE_UBFE] > 0 ||
3273 emit->info.opcode_count[TGSI_OPCODE_IBFE] > 0 ||
3274 emit->info.opcode_count[TGSI_OPCODE_BFI] > 0) {
3275 emit->common_immediate_pos[n++] =
3276 alloc_immediate_int4(emit, 32, 0, 0, 0);
3277 }
3278
3279 if (emit->key.vs.attrib_puint_to_snorm) {
3280 emit->common_immediate_pos[n++] =
3281 alloc_immediate_float4(emit, -2.0f, 2.0f, 3.0f, -1.66666f);
3282 }
3283
3284 if (emit->key.vs.attrib_puint_to_uscaled) {
3285 emit->common_immediate_pos[n++] =
3286 alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f);
3287 }
3288
3289 if (emit->key.vs.attrib_puint_to_sscaled) {
3290 emit->common_immediate_pos[n++] =
3291 alloc_immediate_int4(emit, 22, 12, 2, 0);
3292
3293 emit->common_immediate_pos[n++] =
3294 alloc_immediate_int4(emit, 22, 30, 0, 0);
3295 }
3296
3297 if (emit->vposition.num_prescale > 1) {
3298 unsigned i;
3299 for (i = 0; i < emit->vposition.num_prescale; i+=4) {
3300 emit->common_immediate_pos[n++] =
3301 alloc_immediate_int4(emit, i, i+1, i+2, i+3);
3302 }
3303 }
3304
3305 emit->immediates_dbl = (double (*)[2]) emit->immediates;
3306
3307 if (emit->info.opcode_count[TGSI_OPCODE_DNEG] > 0) {
3308 emit->common_immediate_pos[n++] =
3309 alloc_immediate_double2(emit, -1.0, -1.0);
3310 }
3311
3312 if (emit->info.opcode_count[TGSI_OPCODE_DSQRT] > 0 ||
3313 emit->info.opcode_count[TGSI_OPCODE_DTRUNC] > 0) {
3314 emit->common_immediate_pos[n++] =
3315 alloc_immediate_double2(emit, 0.0, 0.0);
3316 emit->common_immediate_pos[n++] =
3317 alloc_immediate_double2(emit, 1.0, 1.0);
3318 }
3319
3320 if (emit->info.opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0) {
3321 emit->common_immediate_pos[n++] =
3322 alloc_immediate_float4(emit, 16.0f, -16.0f, 0.0, 0.0);
3323 }
3324
3325 assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
3326
3327 unsigned i;
3328
3329 for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
3330 if (emit->key.tex[i].texel_bias) {
3331 /* Replace 0.0f if more immediate float value is needed */
3332 emit->common_immediate_pos[n++] =
3333 alloc_immediate_float4(emit, 0.0001f, 0.0f, 0.0f, 0.0f);
3334 break;
3335 }
3336 }
3337
3338 /** TODO: allocate immediates for all possible element byte offset?
3339 */
3340 if (emit->raw_bufs) {
3341 unsigned i;
3342 for (i = 7; i < 12; i+=4) {
3343 emit->common_immediate_pos[n++] =
3344 alloc_immediate_int4(emit, i, (i+1), (i+2), (i+3));
3345 }
3346 }
3347
3348 if (emit->info.indirect_files &
3349 (1 << TGSI_FILE_IMAGE | 1 << TGSI_FILE_BUFFER)) {
3350 unsigned i;
3351 for (i = 7; i < 8; i+=4) {
3352 emit->common_immediate_pos[n++] =
3353 alloc_immediate_int4(emit, i, (i+1), (i+2), (i+3));
3354 }
3355 }
3356
3357 assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
3358 emit->num_common_immediates = n;
3359 }
3360
3361
3362 /**
3363 * Emit hull shader declarations.
3364 */
3365 static void
emit_hull_shader_declarations(struct svga_shader_emitter_v10 * emit)3366 emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit)
3367 {
3368 VGPU10OpcodeToken0 opcode0;
3369
3370 /* Emit the input control point count */
3371 assert(emit->key.tcs.vertices_per_patch > 0 &&
3372 emit->key.tcs.vertices_per_patch <= 32);
3373
3374 opcode0.value = 0;
3375 opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT;
3376 opcode0.controlPointCount = emit->key.tcs.vertices_per_patch;
3377 begin_emit_instruction(emit);
3378 emit_dword(emit, opcode0.value);
3379 end_emit_instruction(emit);
3380
3381 /* Emit the output control point count */
3382 assert(emit->key.tcs.vertices_out >= 0 && emit->key.tcs.vertices_out <= 32);
3383
3384 opcode0.value = 0;
3385 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT;
3386 opcode0.controlPointCount = emit->key.tcs.vertices_out;
3387 begin_emit_instruction(emit);
3388 emit_dword(emit, opcode0.value);
3389 end_emit_instruction(emit);
3390
3391 /* Emit tessellator domain */
3392 emit_tessellator_domain(emit, emit->key.tcs.prim_mode);
3393
3394 /* Emit tessellator output primitive */
3395 opcode0.value = 0;
3396 opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE;
3397 if (emit->key.tcs.point_mode) {
3398 opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_POINT;
3399 }
3400 else if (emit->key.tcs.prim_mode == MESA_PRIM_LINES) {
3401 opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_LINE;
3402 }
3403 else {
3404 assert(emit->key.tcs.prim_mode == MESA_PRIM_QUADS ||
3405 emit->key.tcs.prim_mode == MESA_PRIM_TRIANGLES);
3406
3407 if (emit->key.tcs.vertices_order_cw)
3408 opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CCW;
3409 else
3410 opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CW;
3411 }
3412 begin_emit_instruction(emit);
3413 emit_dword(emit, opcode0.value);
3414 end_emit_instruction(emit);
3415
3416 /* Emit tessellator partitioning */
3417 opcode0.value = 0;
3418 opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_PARTITIONING;
3419 switch (emit->key.tcs.spacing) {
3420 case PIPE_TESS_SPACING_FRACTIONAL_ODD:
3421 opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD;
3422 break;
3423 case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
3424 opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN;
3425 break;
3426 case PIPE_TESS_SPACING_EQUAL:
3427 opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_INTEGER;
3428 break;
3429 default:
3430 debug_printf("invalid tessellator spacing %d\n", emit->key.tcs.spacing);
3431 opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_UNDEFINED;
3432 }
3433 begin_emit_instruction(emit);
3434 emit_dword(emit, opcode0.value);
3435 end_emit_instruction(emit);
3436
3437 alloc_common_immediates(emit);
3438
3439 /* Declare constant registers */
3440 emit_constant_declaration(emit);
3441
3442 /* Declare samplers and resources */
3443 emit_sampler_declarations(emit);
3444 emit_resource_declarations(emit);
3445
3446 /* Declare images */
3447 emit_image_declarations(emit);
3448
3449 /* Declare shader buffers */
3450 emit_shader_buf_declarations(emit);
3451
3452 /* Declare atomic buffers */
3453 emit_atomic_buf_declarations(emit);
3454
3455 int nVertices = emit->key.tcs.vertices_per_patch;
3456 emit->tcs.imm_index =
3457 alloc_immediate_int4(emit, nVertices, nVertices, nVertices, 0);
3458
3459 /* Now, emit the constant block containing all the immediates
3460 * declared by shader, as well as the extra ones seen above.
3461 */
3462 emit_vgpu10_immediates_block(emit);
3463
3464 }
3465
3466
3467 /**
3468 * A helper function to determine if control point phase is needed.
3469 * Returns TRUE if there is control point output.
3470 */
3471 static bool
needs_control_point_phase(struct svga_shader_emitter_v10 * emit)3472 needs_control_point_phase(struct svga_shader_emitter_v10 *emit)
3473 {
3474 unsigned i;
3475
3476 assert(emit->unit == PIPE_SHADER_TESS_CTRL);
3477
3478 /* If output control point count does not match the input count,
3479 * we need a control point phase to explicitly set the output control
3480 * points.
3481 */
3482 if ((emit->key.tcs.vertices_per_patch != emit->key.tcs.vertices_out) &&
3483 emit->key.tcs.vertices_out)
3484 return true;
3485
3486 for (i = 0; i < emit->info.num_outputs; i++) {
3487 switch (emit->info.output_semantic_name[i]) {
3488 case TGSI_SEMANTIC_PATCH:
3489 case TGSI_SEMANTIC_TESSOUTER:
3490 case TGSI_SEMANTIC_TESSINNER:
3491 break;
3492 default:
3493 return true;
3494 }
3495 }
3496 return false;
3497 }
3498
3499
3500 /**
3501 * A helper function to add shader signature for passthrough control point
3502 * phase. This signature is also generated for passthrough control point
3503 * phase from HLSL compiler and is needed by Metal Renderer.
3504 */
3505 static void
emit_passthrough_control_point_signature(struct svga_shader_emitter_v10 * emit)3506 emit_passthrough_control_point_signature(struct svga_shader_emitter_v10 *emit)
3507 {
3508 struct svga_shader_signature *sgn = &emit->signature;
3509 SVGA3dDXShaderSignatureEntry *sgnEntry;
3510 unsigned i;
3511
3512 for (i = 0; i < emit->info.num_inputs; i++) {
3513 unsigned index = emit->linkage.input_map[i];
3514 enum tgsi_semantic sem_name = emit->info.input_semantic_name[i];
3515
3516 sgnEntry = &sgn->inputs[sgn->header.numInputSignatures++];
3517
3518 set_shader_signature_entry(sgnEntry, index,
3519 tgsi_semantic_to_sgn_name[sem_name],
3520 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3521 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3522 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3523
3524 sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++];
3525
3526 set_shader_signature_entry(sgnEntry, i,
3527 tgsi_semantic_to_sgn_name[sem_name],
3528 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3529 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3530 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3531 }
3532 }
3533
3534
3535 /**
3536 * A helper function to emit an instruction to start the control point phase
3537 * in the hull shader.
3538 */
3539 static void
emit_control_point_phase_instruction(struct svga_shader_emitter_v10 * emit)3540 emit_control_point_phase_instruction(struct svga_shader_emitter_v10 *emit)
3541 {
3542 VGPU10OpcodeToken0 opcode0;
3543
3544 opcode0.value = 0;
3545 opcode0.opcodeType = VGPU10_OPCODE_HS_CONTROL_POINT_PHASE;
3546 begin_emit_instruction(emit);
3547 emit_dword(emit, opcode0.value);
3548 end_emit_instruction(emit);
3549 }
3550
3551
3552 /**
3553 * Start the hull shader control point phase
3554 */
3555 static bool
emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 * emit)3556 emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit)
3557 {
3558 /* If there is no control point output, skip the control point phase. */
3559 if (!needs_control_point_phase(emit)) {
3560 if (!emit->key.tcs.vertices_out) {
3561 /**
3562 * If the tcs does not explicitly generate any control point output
3563 * and the tes does not use any input control point, then
3564 * emit an empty control point phase with zero output control
3565 * point count.
3566 */
3567 emit_control_point_phase_instruction(emit);
3568
3569 /**
3570 * Since this is an empty control point phase, we will need to
3571 * add input signatures when we parse the tcs again in the
3572 * patch constant phase.
3573 */
3574 emit->tcs.fork_phase_add_signature = true;
3575 }
3576 else {
3577 /**
3578 * Before skipping the control point phase, add the signature for
3579 * the passthrough control point.
3580 */
3581 emit_passthrough_control_point_signature(emit);
3582 }
3583 return false;
3584 }
3585
3586 /* Start the control point phase in the hull shader */
3587 emit_control_point_phase_instruction(emit);
3588
3589 /* Declare the output control point ID */
3590 if (emit->tcs.invocation_id_sys_index == INVALID_INDEX) {
3591 /* Add invocation id declaration if it does not exist */
3592 emit->tcs.invocation_id_sys_index = emit->info.num_system_values + 1;
3593 }
3594
3595 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3596 VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID,
3597 VGPU10_OPERAND_INDEX_0D,
3598 0, 1,
3599 VGPU10_NAME_UNDEFINED,
3600 VGPU10_OPERAND_0_COMPONENT, 0,
3601 0,
3602 VGPU10_INTERPOLATION_CONSTANT, true,
3603 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
3604
3605 if (emit->tcs.prim_id_index != INVALID_INDEX) {
3606 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3607 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
3608 VGPU10_OPERAND_INDEX_0D,
3609 0, 1,
3610 VGPU10_NAME_UNDEFINED,
3611 VGPU10_OPERAND_0_COMPONENT,
3612 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
3613 0,
3614 VGPU10_INTERPOLATION_UNDEFINED, true,
3615 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
3616 }
3617
3618 return true;
3619 }
3620
3621
3622 /**
3623 * Start the hull shader patch constant phase and
3624 * do the second pass of the tcs translation and emit
3625 * the relevant declarations and instructions for this phase.
3626 */
3627 static bool
emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 * emit,struct tgsi_parse_context * parse)3628 emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 *emit,
3629 struct tgsi_parse_context *parse)
3630 {
3631 unsigned inst_number = 0;
3632 bool ret = true;
3633 VGPU10OpcodeToken0 opcode0;
3634
3635 emit->skip_instruction = false;
3636
3637 /* Start the patch constant phase */
3638 opcode0.value = 0;
3639 opcode0.opcodeType = VGPU10_OPCODE_HS_FORK_PHASE;
3640 begin_emit_instruction(emit);
3641 emit_dword(emit, opcode0.value);
3642 end_emit_instruction(emit);
3643
3644 /* Set the current phase to patch constant phase */
3645 emit->tcs.control_point_phase = false;
3646
3647 if (emit->tcs.prim_id_index != INVALID_INDEX) {
3648 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3649 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
3650 VGPU10_OPERAND_INDEX_0D,
3651 0, 1,
3652 VGPU10_NAME_UNDEFINED,
3653 VGPU10_OPERAND_0_COMPONENT,
3654 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
3655 0,
3656 VGPU10_INTERPOLATION_UNDEFINED, true,
3657 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
3658 }
3659
3660 /* Emit declarations for this phase */
3661 emit->index_range.required =
3662 emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? true : false;
3663 emit_tcs_input_declarations(emit);
3664
3665 if (emit->index_range.start_index != INVALID_INDEX) {
3666 emit_index_range_declaration(emit);
3667 }
3668
3669 emit->index_range.required =
3670 emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? true : false;
3671 emit_tcs_output_declarations(emit);
3672
3673 if (emit->index_range.start_index != INVALID_INDEX) {
3674 emit_index_range_declaration(emit);
3675 }
3676 emit->index_range.required = false;
3677
3678 emit_temporaries_declaration(emit);
3679
3680 /* Reset the token position to the first instruction token
3681 * in preparation for the second pass of the shader
3682 */
3683 parse->Position = emit->tcs.instruction_token_pos;
3684
3685 while (!tgsi_parse_end_of_tokens(parse)) {
3686 tgsi_parse_token(parse);
3687
3688 assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
3689 ret = emit_vgpu10_instruction(emit, inst_number++,
3690 &parse->FullToken.FullInstruction);
3691
3692 /* Usually this applies to TCS only. If shader is reading output of
3693 * patch constant in fork phase, we should reemit all instructions
3694 * which are writting into output of patch constant in fork phase
3695 * to store results into temporaries.
3696 */
3697 assert(!(emit->reemit_instruction && emit->reemit_rawbuf_instruction));
3698 if (emit->reemit_instruction) {
3699 assert(emit->unit == PIPE_SHADER_TESS_CTRL);
3700 ret = emit_vgpu10_instruction(emit, inst_number,
3701 &parse->FullToken.FullInstruction);
3702 } else if (emit->reemit_rawbuf_instruction) {
3703 ret = emit_rawbuf_instruction(emit, inst_number,
3704 &parse->FullToken.FullInstruction);
3705 }
3706
3707 if (!ret)
3708 return false;
3709 }
3710
3711 return true;
3712 }
3713
3714
3715 /**
3716 * Emit the thread group declaration for compute shader.
3717 */
3718 static void
emit_compute_shader_declarations(struct svga_shader_emitter_v10 * emit)3719 emit_compute_shader_declarations(struct svga_shader_emitter_v10 *emit)
3720 {
3721 VGPU10OpcodeToken0 opcode0;
3722
3723 opcode0.value = 0;
3724 opcode0.opcodeType = VGPU10_OPCODE_DCL_THREAD_GROUP;
3725 begin_emit_instruction(emit);
3726 emit_dword(emit, opcode0.value);
3727 emit_dword(emit, emit->cs.block_width);
3728 emit_dword(emit, emit->cs.block_height);
3729 emit_dword(emit, emit->cs.block_depth);
3730 end_emit_instruction(emit);
3731 }
3732
3733
3734 /**
3735 * Emit index range declaration.
3736 */
3737 static bool
emit_index_range_declaration(struct svga_shader_emitter_v10 * emit)3738 emit_index_range_declaration(struct svga_shader_emitter_v10 *emit)
3739 {
3740 if (emit->version < 50)
3741 return true;
3742
3743 assert(emit->index_range.start_index != INVALID_INDEX);
3744 assert(emit->index_range.count != 0);
3745 assert(emit->index_range.required);
3746 assert(emit->index_range.operandType != VGPU10_NUM_OPERANDS);
3747 assert(emit->index_range.dim != 0);
3748 assert(emit->index_range.size != 0);
3749
3750 VGPU10OpcodeToken0 opcode0;
3751 VGPU10OperandToken0 operand0;
3752
3753 opcode0.value = 0;
3754 opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEX_RANGE;
3755
3756 operand0.value = 0;
3757 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
3758 operand0.indexDimension = emit->index_range.dim;
3759 operand0.operandType = emit->index_range.operandType;
3760 operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
3761 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3762
3763 if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D)
3764 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3765
3766 begin_emit_instruction(emit);
3767 emit_dword(emit, opcode0.value);
3768 emit_dword(emit, operand0.value);
3769
3770 if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D) {
3771 emit_dword(emit, emit->index_range.size);
3772 emit_dword(emit, emit->index_range.start_index);
3773 emit_dword(emit, emit->index_range.count);
3774 }
3775 else {
3776 emit_dword(emit, emit->index_range.start_index);
3777 emit_dword(emit, emit->index_range.count);
3778 }
3779
3780 end_emit_instruction(emit);
3781
3782 /* Reset fields in emit->index_range struct except
3783 * emit->index_range.required which will be reset afterwards
3784 */
3785 emit->index_range.count = 0;
3786 emit->index_range.operandType = VGPU10_NUM_OPERANDS;
3787 emit->index_range.start_index = INVALID_INDEX;
3788 emit->index_range.size = 0;
3789 emit->index_range.dim = 0;
3790
3791 return true;
3792 }
3793
3794
3795 /**
3796 * Emit a vgpu10 declaration "instruction".
3797 * \param index the register index
3798 * \param size array size of the operand. In most cases, it is 1,
3799 * but for inputs to geometry shader, the array size varies
3800 * depending on the primitive type.
3801 */
3802 static void
emit_decl_instruction(struct svga_shader_emitter_v10 * emit,VGPU10OpcodeToken0 opcode0,VGPU10OperandToken0 operand0,VGPU10NameToken name_token,unsigned index,unsigned size)3803 emit_decl_instruction(struct svga_shader_emitter_v10 *emit,
3804 VGPU10OpcodeToken0 opcode0,
3805 VGPU10OperandToken0 operand0,
3806 VGPU10NameToken name_token,
3807 unsigned index, unsigned size)
3808 {
3809 assert(opcode0.opcodeType);
3810 assert(operand0.mask ||
3811 (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT) ||
3812 (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH) ||
3813 (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK) ||
3814 (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) ||
3815 (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) ||
3816 (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID) ||
3817 (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK) ||
3818 (operand0.operandType == VGPU10_OPERAND_TYPE_STREAM));
3819
3820 begin_emit_instruction(emit);
3821 emit_dword(emit, opcode0.value);
3822
3823 emit_dword(emit, operand0.value);
3824
3825 if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) {
3826 /* Next token is the index of the register to declare */
3827 emit_dword(emit, index);
3828 }
3829 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) {
3830 /* Next token is the size of the register */
3831 emit_dword(emit, size);
3832
3833 /* Followed by the index of the register */
3834 emit_dword(emit, index);
3835 }
3836
3837 if (name_token.value) {
3838 emit_dword(emit, name_token.value);
3839 }
3840
3841 end_emit_instruction(emit);
3842 }
3843
3844
3845 /**
3846 * Emit the declaration for a shader input.
3847 * \param opcodeType opcode type, one of VGPU10_OPCODE_DCL_INPUTx
3848 * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x
3849 * \param dim index dimension
3850 * \param index the input register index
3851 * \param size array size of the operand. In most cases, it is 1,
3852 * but for inputs to geometry shader, the array size varies
3853 * depending on the primitive type. For tessellation control
3854 * shader, the array size is the vertex count per patch.
3855 * \param name one of VGPU10_NAME_x
3856 * \parma numComp number of components
3857 * \param selMode component selection mode
3858 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
3859 * \param interpMode interpolation mode
3860 */
3861 static void
emit_input_declaration(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcodeType,VGPU10_OPERAND_TYPE operandType,VGPU10_OPERAND_INDEX_DIMENSION dim,unsigned index,unsigned size,VGPU10_SYSTEM_NAME name,VGPU10_OPERAND_NUM_COMPONENTS numComp,VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,unsigned usageMask,VGPU10_INTERPOLATION_MODE interpMode,bool addSignature,SVGA3dDXSignatureSemanticName sgnName)3862 emit_input_declaration(struct svga_shader_emitter_v10 *emit,
3863 VGPU10_OPCODE_TYPE opcodeType,
3864 VGPU10_OPERAND_TYPE operandType,
3865 VGPU10_OPERAND_INDEX_DIMENSION dim,
3866 unsigned index, unsigned size,
3867 VGPU10_SYSTEM_NAME name,
3868 VGPU10_OPERAND_NUM_COMPONENTS numComp,
3869 VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,
3870 unsigned usageMask,
3871 VGPU10_INTERPOLATION_MODE interpMode,
3872 bool addSignature,
3873 SVGA3dDXSignatureSemanticName sgnName)
3874 {
3875 VGPU10OpcodeToken0 opcode0;
3876 VGPU10OperandToken0 operand0;
3877 VGPU10NameToken name_token;
3878
3879 assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
3880 assert(opcodeType == VGPU10_OPCODE_DCL_INPUT ||
3881 opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV ||
3882 opcodeType == VGPU10_OPCODE_DCL_INPUT_SGV ||
3883 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS ||
3884 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SIV ||
3885 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV);
3886 assert(operandType == VGPU10_OPERAND_TYPE_INPUT ||
3887 operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID ||
3888 operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK ||
3889 operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID ||
3890 operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID ||
3891 operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT ||
3892 operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT ||
3893 operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT ||
3894 operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID ||
3895 operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID ||
3896 operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP);
3897
3898 assert(numComp <= VGPU10_OPERAND_4_COMPONENT);
3899 assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
3900 assert(dim <= VGPU10_OPERAND_INDEX_3D);
3901 assert(name == VGPU10_NAME_UNDEFINED ||
3902 name == VGPU10_NAME_POSITION ||
3903 name == VGPU10_NAME_INSTANCE_ID ||
3904 name == VGPU10_NAME_VERTEX_ID ||
3905 name == VGPU10_NAME_PRIMITIVE_ID ||
3906 name == VGPU10_NAME_IS_FRONT_FACE ||
3907 name == VGPU10_NAME_SAMPLE_INDEX ||
3908 name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
3909 name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX);
3910
3911 assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED ||
3912 interpMode == VGPU10_INTERPOLATION_CONSTANT ||
3913 interpMode == VGPU10_INTERPOLATION_LINEAR ||
3914 interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID ||
3915 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE ||
3916 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID ||
3917 interpMode == VGPU10_INTERPOLATION_LINEAR_SAMPLE ||
3918 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE);
3919
3920 check_register_index(emit, opcodeType, index);
3921
3922 opcode0.value = operand0.value = name_token.value = 0;
3923
3924 opcode0.opcodeType = opcodeType;
3925 opcode0.interpolationMode = interpMode;
3926
3927 operand0.operandType = operandType;
3928 operand0.numComponents = numComp;
3929 operand0.selectionMode = selMode;
3930 operand0.mask = usageMask;
3931 operand0.indexDimension = dim;
3932 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3933 if (dim == VGPU10_OPERAND_INDEX_2D)
3934 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3935
3936 name_token.name = name;
3937
3938 emit_decl_instruction(emit, opcode0, operand0, name_token, index, size);
3939
3940 if (addSignature) {
3941 struct svga_shader_signature *sgn = &emit->signature;
3942 if (operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT) {
3943 /* Set patch constant signature */
3944 SVGA3dDXShaderSignatureEntry *sgnEntry =
3945 &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
3946 set_shader_signature_entry(sgnEntry, index,
3947 sgnName, usageMask,
3948 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3949 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3950
3951 } else if (operandType == VGPU10_OPERAND_TYPE_INPUT ||
3952 operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT) {
3953 /* Set input signature */
3954 SVGA3dDXShaderSignatureEntry *sgnEntry =
3955 &sgn->inputs[sgn->header.numInputSignatures++];
3956 set_shader_signature_entry(sgnEntry, index,
3957 sgnName, usageMask,
3958 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3959 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3960 }
3961 }
3962
3963 if (emit->index_range.required) {
3964 /* Here, index_range declaration is only applicable for opcodeType
3965 * VGPU10_OPCODE_DCL_INPUT and VGPU10_OPCODE_DCL_INPUT_PS and
3966 * for operandType VGPU10_OPERAND_TYPE_INPUT,
3967 * VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT and
3968 * VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT.
3969 */
3970 if ((opcodeType != VGPU10_OPCODE_DCL_INPUT &&
3971 opcodeType != VGPU10_OPCODE_DCL_INPUT_PS) ||
3972 (operandType != VGPU10_OPERAND_TYPE_INPUT &&
3973 operandType != VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT &&
3974 operandType != VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT)) {
3975 if (emit->index_range.start_index != INVALID_INDEX) {
3976 emit_index_range_declaration(emit);
3977 }
3978 return;
3979 }
3980
3981 if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) {
3982 /* Need record new index_range */
3983 emit->index_range.count = 1;
3984 emit->index_range.operandType = operandType;
3985 emit->index_range.start_index = index;
3986 emit->index_range.size = size;
3987 emit->index_range.dim = dim;
3988 }
3989 else if (index !=
3990 (emit->index_range.start_index + emit->index_range.count) ||
3991 emit->index_range.operandType != operandType) {
3992 /* Input index is not contiguous with index range or operandType is
3993 * different from index range's operandType. We need to emit current
3994 * index_range first and then start recording next index range.
3995 */
3996 emit_index_range_declaration(emit);
3997
3998 emit->index_range.count = 1;
3999 emit->index_range.operandType = operandType;
4000 emit->index_range.start_index = index;
4001 emit->index_range.size = size;
4002 emit->index_range.dim = dim;
4003 }
4004 else if (emit->index_range.operandType == operandType) {
4005 /* Since input index is contiguous with index range and operandType
4006 * is same as index range's operandType, increment index range count.
4007 */
4008 emit->index_range.count++;
4009 }
4010 }
4011 }
4012
4013
4014 /**
4015 * Emit the declaration for a shader output.
4016 * \param type one of VGPU10_OPCODE_DCL_OUTPUTx
4017 * \param index the output register index
4018 * \param name one of VGPU10_NAME_x
4019 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
4020 */
4021 static void
emit_output_declaration(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE type,unsigned index,VGPU10_SYSTEM_NAME name,unsigned writemask,bool addSignature,SVGA3dDXSignatureSemanticName sgnName)4022 emit_output_declaration(struct svga_shader_emitter_v10 *emit,
4023 VGPU10_OPCODE_TYPE type, unsigned index,
4024 VGPU10_SYSTEM_NAME name,
4025 unsigned writemask,
4026 bool addSignature,
4027 SVGA3dDXSignatureSemanticName sgnName)
4028 {
4029 VGPU10OpcodeToken0 opcode0;
4030 VGPU10OperandToken0 operand0;
4031 VGPU10NameToken name_token;
4032
4033 assert(writemask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
4034 assert(type == VGPU10_OPCODE_DCL_OUTPUT ||
4035 type == VGPU10_OPCODE_DCL_OUTPUT_SGV ||
4036 type == VGPU10_OPCODE_DCL_OUTPUT_SIV);
4037 assert(name == VGPU10_NAME_UNDEFINED ||
4038 name == VGPU10_NAME_POSITION ||
4039 name == VGPU10_NAME_PRIMITIVE_ID ||
4040 name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
4041 name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX ||
4042 name == VGPU10_NAME_CLIP_DISTANCE);
4043
4044 check_register_index(emit, type, index);
4045
4046 opcode0.value = operand0.value = name_token.value = 0;
4047
4048 opcode0.opcodeType = type;
4049 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
4050 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
4051 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
4052 operand0.mask = writemask;
4053 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
4054 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
4055
4056 name_token.name = name;
4057
4058 emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
4059
4060 /* Capture output signature */
4061 if (addSignature) {
4062 struct svga_shader_signature *sgn = &emit->signature;
4063 SVGA3dDXShaderSignatureEntry *sgnEntry =
4064 &sgn->outputs[sgn->header.numOutputSignatures++];
4065 set_shader_signature_entry(sgnEntry, index,
4066 sgnName, writemask,
4067 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4068 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4069 }
4070
4071 if (emit->index_range.required) {
4072 /* Here, index_range declaration is only applicable for opcodeType
4073 * VGPU10_OPCODE_DCL_OUTPUT and for operandType
4074 * VGPU10_OPERAND_TYPE_OUTPUT.
4075 */
4076 if (type != VGPU10_OPCODE_DCL_OUTPUT) {
4077 if (emit->index_range.start_index != INVALID_INDEX) {
4078 emit_index_range_declaration(emit);
4079 }
4080 return;
4081 }
4082
4083 if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) {
4084 /* Need record new index_range */
4085 emit->index_range.count = 1;
4086 emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
4087 emit->index_range.start_index = index;
4088 emit->index_range.size = 1;
4089 emit->index_range.dim = VGPU10_OPERAND_INDEX_1D;
4090 }
4091 else if (index !=
4092 (emit->index_range.start_index + emit->index_range.count)) {
4093 /* Output index is not contiguous with index range. We need to
4094 * emit current index_range first and then start recording next
4095 * index range.
4096 */
4097 emit_index_range_declaration(emit);
4098
4099 emit->index_range.count = 1;
4100 emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
4101 emit->index_range.start_index = index;
4102 emit->index_range.size = 1;
4103 emit->index_range.dim = VGPU10_OPERAND_INDEX_1D;
4104 }
4105 else {
4106 /* Since output index is contiguous with index range, increment
4107 * index range count.
4108 */
4109 emit->index_range.count++;
4110 }
4111 }
4112 }
4113
4114
4115 /**
4116 * Emit the declaration for the fragment depth output.
4117 */
4118 static void
emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 * emit)4119 emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit)
4120 {
4121 VGPU10OpcodeToken0 opcode0;
4122 VGPU10OperandToken0 operand0;
4123 VGPU10NameToken name_token;
4124
4125 assert(emit->unit == PIPE_SHADER_FRAGMENT);
4126
4127 opcode0.value = operand0.value = name_token.value = 0;
4128
4129 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
4130 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
4131 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
4132 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
4133 operand0.mask = 0;
4134
4135 emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
4136 }
4137
4138
4139 /**
4140 * Emit the declaration for the fragment sample mask/coverage output.
4141 */
4142 static void
emit_samplemask_output_declaration(struct svga_shader_emitter_v10 * emit)4143 emit_samplemask_output_declaration(struct svga_shader_emitter_v10 *emit)
4144 {
4145 VGPU10OpcodeToken0 opcode0;
4146 VGPU10OperandToken0 operand0;
4147 VGPU10NameToken name_token;
4148
4149 assert(emit->unit == PIPE_SHADER_FRAGMENT);
4150 assert(emit->version >= 41);
4151
4152 opcode0.value = operand0.value = name_token.value = 0;
4153
4154 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
4155 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
4156 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
4157 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
4158 operand0.mask = 0;
4159
4160 emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
4161 }
4162
4163
4164 /**
4165 * Emit output declarations for fragment shader.
4166 */
4167 static void
emit_fs_output_declarations(struct svga_shader_emitter_v10 * emit)4168 emit_fs_output_declarations(struct svga_shader_emitter_v10 *emit)
4169 {
4170 unsigned int i;
4171
4172 for (i = 0; i < emit->info.num_outputs; i++) {
4173 /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
4174 const enum tgsi_semantic semantic_name =
4175 emit->info.output_semantic_name[i];
4176 const unsigned semantic_index = emit->info.output_semantic_index[i];
4177 unsigned index = i;
4178
4179 if (semantic_name == TGSI_SEMANTIC_COLOR) {
4180 assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index));
4181
4182 emit->fs.color_out_index[semantic_index] = index;
4183
4184 emit->fs.num_color_outputs = MAX2(emit->fs.num_color_outputs,
4185 index + 1);
4186
4187 /* The semantic index is the shader's color output/buffer index */
4188 emit_output_declaration(emit,
4189 VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
4190 VGPU10_NAME_UNDEFINED,
4191 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4192 true,
4193 map_tgsi_semantic_to_sgn_name(semantic_name));
4194
4195 if (semantic_index == 0) {
4196 if (emit->key.fs.write_color0_to_n_cbufs > 1) {
4197 /* Emit declarations for the additional color outputs
4198 * for broadcasting.
4199 */
4200 unsigned j;
4201 for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) {
4202 /* Allocate a new output index */
4203 unsigned idx = emit->info.num_outputs + j - 1;
4204 emit->fs.color_out_index[j] = idx;
4205 emit_output_declaration(emit,
4206 VGPU10_OPCODE_DCL_OUTPUT, idx,
4207 VGPU10_NAME_UNDEFINED,
4208 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4209 true,
4210 map_tgsi_semantic_to_sgn_name(semantic_name));
4211 emit->info.output_semantic_index[idx] = j;
4212 }
4213
4214 emit->fs.num_color_outputs =
4215 emit->key.fs.write_color0_to_n_cbufs;
4216 }
4217 }
4218 }
4219 else if (semantic_name == TGSI_SEMANTIC_POSITION) {
4220 /* Fragment depth output */
4221 emit_fragdepth_output_declaration(emit);
4222 }
4223 else if (semantic_name == TGSI_SEMANTIC_SAMPLEMASK) {
4224 /* Sample mask output */
4225 emit_samplemask_output_declaration(emit);
4226 }
4227 else {
4228 assert(!"Bad output semantic name");
4229 }
4230 }
4231 }
4232
4233
4234 /**
4235 * Emit common output declaration for vertex processing.
4236 */
4237 static void
emit_vertex_output_declaration(struct svga_shader_emitter_v10 * emit,unsigned index,unsigned writemask,bool addSignature)4238 emit_vertex_output_declaration(struct svga_shader_emitter_v10 *emit,
4239 unsigned index, unsigned writemask,
4240 bool addSignature)
4241 {
4242 const enum tgsi_semantic semantic_name =
4243 emit->info.output_semantic_name[index];
4244 const unsigned semantic_index = emit->info.output_semantic_index[index];
4245 unsigned name, type;
4246 unsigned final_mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
4247
4248 assert(emit->unit != PIPE_SHADER_FRAGMENT &&
4249 emit->unit != PIPE_SHADER_COMPUTE);
4250
4251 switch (semantic_name) {
4252 case TGSI_SEMANTIC_POSITION:
4253 if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4254 /* position will be declared in control point only */
4255 assert(emit->tcs.control_point_phase);
4256 type = VGPU10_OPCODE_DCL_OUTPUT;
4257 name = VGPU10_NAME_UNDEFINED;
4258 emit_output_declaration(emit, type, index, name, final_mask, true,
4259 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4260 return;
4261 }
4262 else {
4263 type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
4264 name = VGPU10_NAME_POSITION;
4265 }
4266 /* Save the index of the vertex position output register */
4267 emit->vposition.out_index = index;
4268 break;
4269 case TGSI_SEMANTIC_CLIPDIST:
4270 type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
4271 name = VGPU10_NAME_CLIP_DISTANCE;
4272 /* save the starting index of the clip distance output register */
4273 if (semantic_index == 0)
4274 emit->clip_dist_out_index = index;
4275 final_mask = apply_clip_plane_mask(emit, writemask, semantic_index);
4276 if (final_mask == 0x0)
4277 return; /* discard this do-nothing declaration */
4278 break;
4279 case TGSI_SEMANTIC_CLIPVERTEX:
4280 type = VGPU10_OPCODE_DCL_OUTPUT;
4281 name = VGPU10_NAME_UNDEFINED;
4282 emit->clip_vertex_out_index = index;
4283 break;
4284 default:
4285 /* generic output */
4286 type = VGPU10_OPCODE_DCL_OUTPUT;
4287 name = VGPU10_NAME_UNDEFINED;
4288 }
4289
4290 emit_output_declaration(emit, type, index, name, final_mask, addSignature,
4291 map_tgsi_semantic_to_sgn_name(semantic_name));
4292 }
4293
4294
4295 /**
4296 * Emit declaration for outputs in vertex shader.
4297 */
4298 static void
emit_vs_output_declarations(struct svga_shader_emitter_v10 * emit)4299 emit_vs_output_declarations(struct svga_shader_emitter_v10 *emit)
4300 {
4301 unsigned i;
4302 for (i = 0; i < emit->info.num_outputs; i++) {
4303 emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], true);
4304 }
4305 }
4306
4307
4308 /**
4309 * A helper function to determine the writemask for an output
4310 * for the specified stream.
4311 */
4312 static unsigned
output_writemask_for_stream(unsigned stream,uint8_t output_streams,uint8_t output_usagemask)4313 output_writemask_for_stream(unsigned stream, uint8_t output_streams,
4314 uint8_t output_usagemask)
4315 {
4316 unsigned i;
4317 unsigned writemask = 0;
4318
4319 for (i = 0; i < 4; i++) {
4320 if ((output_streams & 0x3) == stream)
4321 writemask |= (VGPU10_OPERAND_4_COMPONENT_MASK_X << i);
4322 output_streams >>= 2;
4323 }
4324 return writemask & output_usagemask;
4325 }
4326
4327
4328 /**
4329 * Emit declaration for outputs in geometry shader.
4330 */
4331 static void
emit_gs_output_declarations(struct svga_shader_emitter_v10 * emit)4332 emit_gs_output_declarations(struct svga_shader_emitter_v10 *emit)
4333 {
4334 unsigned i;
4335 VGPU10OpcodeToken0 opcode0;
4336 unsigned numStreamsSupported = 1;
4337 int s;
4338
4339 if (emit->version >= 50) {
4340 numStreamsSupported = ARRAY_SIZE(emit->info.num_stream_output_components);
4341 }
4342
4343 /**
4344 * Start emitting from the last stream first, so we end with
4345 * stream 0, so any of the auxiliary output declarations will
4346 * go to stream 0.
4347 */
4348 for (s = numStreamsSupported-1; s >= 0; s--) {
4349
4350 if (emit->info.num_stream_output_components[s] == 0)
4351 continue;
4352
4353 if (emit->version >= 50) {
4354 /* DCL_STREAM stream */
4355 begin_emit_instruction(emit);
4356 emit_opcode(emit, VGPU10_OPCODE_DCL_STREAM, false);
4357 emit_stream_register(emit, s);
4358 end_emit_instruction(emit);
4359 }
4360
4361 /* emit output primitive topology declaration */
4362 opcode0.value = 0;
4363 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY;
4364 opcode0.primitiveTopology = emit->gs.prim_topology;
4365 emit_property_instruction(emit, opcode0, 0, 0);
4366
4367 for (i = 0; i < emit->info.num_outputs; i++) {
4368 unsigned writemask;
4369
4370 /* find out the writemask for this stream */
4371 writemask = output_writemask_for_stream(s, emit->info.output_streams[i],
4372 emit->output_usage_mask[i]);
4373
4374 if (writemask) {
4375 enum tgsi_semantic semantic_name =
4376 emit->info.output_semantic_name[i];
4377
4378 /* TODO: Still need to take care of a special case where a
4379 * single varying spans across multiple output registers.
4380 */
4381 switch(semantic_name) {
4382 case TGSI_SEMANTIC_PRIMID:
4383 emit_output_declaration(emit,
4384 VGPU10_OPCODE_DCL_OUTPUT_SGV, i,
4385 VGPU10_NAME_PRIMITIVE_ID,
4386 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4387 false,
4388 map_tgsi_semantic_to_sgn_name(semantic_name));
4389 break;
4390 case TGSI_SEMANTIC_LAYER:
4391 emit_output_declaration(emit,
4392 VGPU10_OPCODE_DCL_OUTPUT_SIV, i,
4393 VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX,
4394 VGPU10_OPERAND_4_COMPONENT_MASK_X,
4395 false,
4396 map_tgsi_semantic_to_sgn_name(semantic_name));
4397 break;
4398 case TGSI_SEMANTIC_VIEWPORT_INDEX:
4399 emit_output_declaration(emit,
4400 VGPU10_OPCODE_DCL_OUTPUT_SIV, i,
4401 VGPU10_NAME_VIEWPORT_ARRAY_INDEX,
4402 VGPU10_OPERAND_4_COMPONENT_MASK_X,
4403 false,
4404 map_tgsi_semantic_to_sgn_name(semantic_name));
4405 emit->gs.viewport_index_out_index = i;
4406 break;
4407 default:
4408 emit_vertex_output_declaration(emit, i, writemask, false);
4409 }
4410 }
4411 }
4412 }
4413
4414 /* For geometry shader outputs, it is possible the same register is
4415 * declared multiple times for different streams. So to avoid
4416 * redundant signature entries, geometry shader output signature is done
4417 * outside of the declaration.
4418 */
4419 struct svga_shader_signature *sgn = &emit->signature;
4420 SVGA3dDXShaderSignatureEntry *sgnEntry;
4421
4422 for (i = 0; i < emit->info.num_outputs; i++) {
4423 if (emit->output_usage_mask[i]) {
4424 enum tgsi_semantic sem_name = emit->info.output_semantic_name[i];
4425
4426 sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++];
4427 set_shader_signature_entry(sgnEntry, i,
4428 map_tgsi_semantic_to_sgn_name(sem_name),
4429 emit->output_usage_mask[i],
4430 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4431 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4432 }
4433 }
4434 }
4435
4436
4437 /**
4438 * Emit the declaration for the tess inner/outer output.
4439 * \param opcodeType either VGPU10_OPCODE_DCL_OUTPUT_SIV or _INPUT_SIV
4440 * \param operandType either VGPU10_OPERAND_TYPE_OUTPUT or _INPUT
4441 * \param name VGPU10_NAME_FINAL_*_TESSFACTOR value
4442 */
4443 static void
emit_tesslevel_declaration(struct svga_shader_emitter_v10 * emit,unsigned index,unsigned opcodeType,unsigned operandType,VGPU10_SYSTEM_NAME name,SVGA3dDXSignatureSemanticName sgnName)4444 emit_tesslevel_declaration(struct svga_shader_emitter_v10 *emit,
4445 unsigned index, unsigned opcodeType,
4446 unsigned operandType, VGPU10_SYSTEM_NAME name,
4447 SVGA3dDXSignatureSemanticName sgnName)
4448 {
4449 VGPU10OpcodeToken0 opcode0;
4450 VGPU10OperandToken0 operand0;
4451 VGPU10NameToken name_token;
4452
4453 assert(emit->version >= 50);
4454 assert(name >= VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR ||
4455 (emit->key.tcs.prim_mode == MESA_PRIM_LINES &&
4456 name == VGPU10_NAME_UNDEFINED));
4457 assert(name <= VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
4458
4459 assert(operandType == VGPU10_OPERAND_TYPE_OUTPUT ||
4460 operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT);
4461
4462 opcode0.value = operand0.value = name_token.value = 0;
4463
4464 opcode0.opcodeType = opcodeType;
4465 operand0.operandType = operandType;
4466 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
4467 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
4468 operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
4469 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
4470 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
4471
4472 name_token.name = name;
4473 emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
4474
4475 /* Capture patch constant signature */
4476 struct svga_shader_signature *sgn = &emit->signature;
4477 SVGA3dDXShaderSignatureEntry *sgnEntry =
4478 &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
4479 set_shader_signature_entry(sgnEntry, index,
4480 sgnName, VGPU10_OPERAND_4_COMPONENT_MASK_X,
4481 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4482 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4483 }
4484
4485
4486 /**
4487 * Emit output declarations for tessellation control shader.
4488 */
4489 static void
emit_tcs_output_declarations(struct svga_shader_emitter_v10 * emit)4490 emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit)
4491 {
4492 unsigned int i;
4493 unsigned outputIndex = emit->num_outputs;
4494 struct svga_shader_signature *sgn = &emit->signature;
4495
4496 /**
4497 * Initialize patch_generic_out_count so it won't be counted twice
4498 * since this function is called twice, one for control point phase
4499 * and another time for patch constant phase.
4500 */
4501 emit->tcs.patch_generic_out_count = 0;
4502
4503 for (i = 0; i < emit->info.num_outputs; i++) {
4504 unsigned index = i;
4505 const enum tgsi_semantic semantic_name =
4506 emit->info.output_semantic_name[i];
4507
4508 switch (semantic_name) {
4509 case TGSI_SEMANTIC_TESSINNER:
4510 emit->tcs.inner.tgsi_index = i;
4511
4512 /* skip per-patch output declarations in control point phase */
4513 if (emit->tcs.control_point_phase)
4514 break;
4515
4516 emit->tcs.inner.out_index = outputIndex;
4517 switch (emit->key.tcs.prim_mode) {
4518 case MESA_PRIM_QUADS:
4519 emit_tesslevel_declaration(emit, outputIndex++,
4520 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4521 VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4522 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4523
4524 emit_tesslevel_declaration(emit, outputIndex++,
4525 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4526 VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4527 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4528 break;
4529 case MESA_PRIM_TRIANGLES:
4530 emit_tesslevel_declaration(emit, outputIndex++,
4531 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4532 VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
4533 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
4534 break;
4535 case MESA_PRIM_LINES:
4536 break;
4537 default:
4538 debug_printf("Unsupported primitive type");
4539 }
4540 break;
4541
4542 case TGSI_SEMANTIC_TESSOUTER:
4543 emit->tcs.outer.tgsi_index = i;
4544
4545 /* skip per-patch output declarations in control point phase */
4546 if (emit->tcs.control_point_phase)
4547 break;
4548
4549 emit->tcs.outer.out_index = outputIndex;
4550 switch (emit->key.tcs.prim_mode) {
4551 case MESA_PRIM_QUADS:
4552 for (int j = 0; j < 4; j++) {
4553 emit_tesslevel_declaration(emit, outputIndex++,
4554 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4555 VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j,
4556 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j);
4557 }
4558 break;
4559 case MESA_PRIM_TRIANGLES:
4560 for (int j = 0; j < 3; j++) {
4561 emit_tesslevel_declaration(emit, outputIndex++,
4562 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4563 VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j,
4564 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j);
4565 }
4566 break;
4567 case MESA_PRIM_LINES:
4568 for (int j = 0; j < 2; j++) {
4569 emit_tesslevel_declaration(emit, outputIndex++,
4570 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4571 VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j,
4572 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j);
4573 }
4574 break;
4575 default:
4576 debug_printf("Unsupported primitive type");
4577 }
4578 break;
4579
4580 case TGSI_SEMANTIC_PATCH:
4581 if (emit->tcs.patch_generic_out_index == INVALID_INDEX)
4582 emit->tcs.patch_generic_out_index= i;
4583 emit->tcs.patch_generic_out_count++;
4584
4585 /* skip per-patch output declarations in control point phase */
4586 if (emit->tcs.control_point_phase)
4587 break;
4588
4589 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, index,
4590 VGPU10_NAME_UNDEFINED,
4591 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4592 false,
4593 map_tgsi_semantic_to_sgn_name(semantic_name));
4594
4595 SVGA3dDXShaderSignatureEntry *sgnEntry =
4596 &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
4597 set_shader_signature_entry(sgnEntry, index,
4598 map_tgsi_semantic_to_sgn_name(semantic_name),
4599 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4600 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4601 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4602
4603 break;
4604
4605 default:
4606 /* save the starting index of control point outputs */
4607 if (emit->tcs.control_point_out_index == INVALID_INDEX)
4608 emit->tcs.control_point_out_index = i;
4609 emit->tcs.control_point_out_count++;
4610
4611 /* skip control point output declarations in patch constant phase */
4612 if (!emit->tcs.control_point_phase)
4613 break;
4614
4615 emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i],
4616 true);
4617
4618 }
4619 }
4620
4621 if (emit->tcs.control_point_phase) {
4622 /**
4623 * Add missing control point output in control point phase.
4624 */
4625 if (emit->tcs.control_point_out_index == INVALID_INDEX) {
4626 /* use register index after tessellation factors */
4627 switch (emit->key.tcs.prim_mode) {
4628 case MESA_PRIM_QUADS:
4629 emit->tcs.control_point_out_index = outputIndex + 6;
4630 break;
4631 case MESA_PRIM_TRIANGLES:
4632 emit->tcs.control_point_out_index = outputIndex + 4;
4633 break;
4634 default:
4635 emit->tcs.control_point_out_index = outputIndex + 2;
4636 break;
4637 }
4638 emit->tcs.control_point_out_count++;
4639 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV,
4640 emit->tcs.control_point_out_index,
4641 VGPU10_NAME_POSITION,
4642 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4643 true,
4644 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
4645
4646 /* If tcs does not output any control point output,
4647 * we can end the hull shader control point phase here
4648 * after emitting the default control point output.
4649 */
4650 emit->skip_instruction = true;
4651 }
4652 }
4653 else {
4654 if (emit->tcs.outer.out_index == INVALID_INDEX) {
4655 /* since the TCS did not declare out outer tess level output register,
4656 * we declare it here for patch constant phase only.
4657 */
4658 emit->tcs.outer.out_index = outputIndex;
4659 if (emit->key.tcs.prim_mode == MESA_PRIM_QUADS) {
4660 for (int i = 0; i < 4; i++) {
4661 emit_tesslevel_declaration(emit, outputIndex++,
4662 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4663 VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i,
4664 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i);
4665 }
4666 }
4667 else if (emit->key.tcs.prim_mode == MESA_PRIM_TRIANGLES) {
4668 for (int i = 0; i < 3; i++) {
4669 emit_tesslevel_declaration(emit, outputIndex++,
4670 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4671 VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i,
4672 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i);
4673 }
4674 }
4675 }
4676
4677 if (emit->tcs.inner.out_index == INVALID_INDEX) {
4678 /* since the TCS did not declare out inner tess level output register,
4679 * we declare it here
4680 */
4681 emit->tcs.inner.out_index = outputIndex;
4682 if (emit->key.tcs.prim_mode == MESA_PRIM_QUADS) {
4683 emit_tesslevel_declaration(emit, outputIndex++,
4684 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4685 VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4686 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4687 emit_tesslevel_declaration(emit, outputIndex++,
4688 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4689 VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4690 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4691 }
4692 else if (emit->key.tcs.prim_mode == MESA_PRIM_TRIANGLES) {
4693 emit_tesslevel_declaration(emit, outputIndex++,
4694 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4695 VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
4696 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
4697 }
4698 }
4699 }
4700 emit->num_outputs = outputIndex;
4701 }
4702
4703
4704 /**
4705 * Emit output declarations for tessellation evaluation shader.
4706 */
4707 static void
emit_tes_output_declarations(struct svga_shader_emitter_v10 * emit)4708 emit_tes_output_declarations(struct svga_shader_emitter_v10 *emit)
4709 {
4710 unsigned int i;
4711
4712 for (i = 0; i < emit->info.num_outputs; i++) {
4713 emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], true);
4714 }
4715 }
4716
4717
4718 /**
4719 * Emit the declaration for a system value input/output.
4720 */
4721 static void
emit_system_value_declaration(struct svga_shader_emitter_v10 * emit,enum tgsi_semantic semantic_name,unsigned index)4722 emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
4723 enum tgsi_semantic semantic_name, unsigned index)
4724 {
4725 switch (semantic_name) {
4726 case TGSI_SEMANTIC_INSTANCEID:
4727 index = alloc_system_value_index(emit, index);
4728 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
4729 VGPU10_OPERAND_TYPE_INPUT,
4730 VGPU10_OPERAND_INDEX_1D,
4731 index, 1,
4732 VGPU10_NAME_INSTANCE_ID,
4733 VGPU10_OPERAND_4_COMPONENT,
4734 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4735 VGPU10_OPERAND_4_COMPONENT_MASK_X,
4736 VGPU10_INTERPOLATION_UNDEFINED, true,
4737 map_tgsi_semantic_to_sgn_name(semantic_name));
4738 break;
4739 case TGSI_SEMANTIC_VERTEXID:
4740 emit->vs.vertex_id_sys_index = index;
4741 index = alloc_system_value_index(emit, index);
4742 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
4743 VGPU10_OPERAND_TYPE_INPUT,
4744 VGPU10_OPERAND_INDEX_1D,
4745 index, 1,
4746 VGPU10_NAME_VERTEX_ID,
4747 VGPU10_OPERAND_4_COMPONENT,
4748 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4749 VGPU10_OPERAND_4_COMPONENT_MASK_X,
4750 VGPU10_INTERPOLATION_UNDEFINED, true,
4751 map_tgsi_semantic_to_sgn_name(semantic_name));
4752 break;
4753 case TGSI_SEMANTIC_SAMPLEID:
4754 assert(emit->unit == PIPE_SHADER_FRAGMENT);
4755 emit->fs.sample_id_sys_index = index;
4756 index = alloc_system_value_index(emit, index);
4757 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_PS_SIV,
4758 VGPU10_OPERAND_TYPE_INPUT,
4759 VGPU10_OPERAND_INDEX_1D,
4760 index, 1,
4761 VGPU10_NAME_SAMPLE_INDEX,
4762 VGPU10_OPERAND_4_COMPONENT,
4763 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4764 VGPU10_OPERAND_4_COMPONENT_MASK_X,
4765 VGPU10_INTERPOLATION_CONSTANT, true,
4766 map_tgsi_semantic_to_sgn_name(semantic_name));
4767 break;
4768 case TGSI_SEMANTIC_SAMPLEPOS:
4769 /* This system value contains the position of the current sample
4770 * when using per-sample shading. We implement this by calling
4771 * the VGPU10_OPCODE_SAMPLE_POS instruction with the current sample
4772 * index as the argument. See emit_sample_position_instructions().
4773 */
4774 assert(emit->version >= 41);
4775 emit->fs.sample_pos_sys_index = index;
4776 index = alloc_system_value_index(emit, index);
4777 break;
4778 case TGSI_SEMANTIC_INVOCATIONID:
4779 /* Note: invocation id input is mapped to different register depending
4780 * on the shader type. In GS, it will be mapped to vGSInstanceID#.
4781 * In TCS, it will be mapped to vOutputControlPointID#.
4782 * Since in both cases, the mapped name is unique rather than
4783 * just a generic input name ("v#"), so there is no need to remap
4784 * the index value.
4785 */
4786 assert(emit->unit == PIPE_SHADER_GEOMETRY ||
4787 emit->unit == PIPE_SHADER_TESS_CTRL);
4788 assert(emit->version >= 50);
4789
4790 if (emit->unit == PIPE_SHADER_GEOMETRY) {
4791 emit->gs.invocation_id_sys_index = index;
4792 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4793 VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID,
4794 VGPU10_OPERAND_INDEX_0D,
4795 index, 1,
4796 VGPU10_NAME_UNDEFINED,
4797 VGPU10_OPERAND_0_COMPONENT,
4798 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4799 0,
4800 VGPU10_INTERPOLATION_UNDEFINED, true,
4801 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4802 } else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4803 /* The emission of the control point id will be done
4804 * in the control point phase in emit_hull_shader_control_point_phase().
4805 */
4806 emit->tcs.invocation_id_sys_index = index;
4807 }
4808 break;
4809 case TGSI_SEMANTIC_SAMPLEMASK:
4810 /* Note: the PS sample mask input has a unique name ("vCoverage#")
4811 * rather than just a generic input name ("v#") so no need to remap the
4812 * index value.
4813 */
4814 assert(emit->unit == PIPE_SHADER_FRAGMENT);
4815 assert(emit->version >= 50);
4816 emit->fs.sample_mask_in_sys_index = index;
4817 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4818 VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK,
4819 VGPU10_OPERAND_INDEX_0D,
4820 index, 1,
4821 VGPU10_NAME_UNDEFINED,
4822 VGPU10_OPERAND_1_COMPONENT,
4823 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4824 0,
4825 VGPU10_INTERPOLATION_CONSTANT, true,
4826 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4827 break;
4828 case TGSI_SEMANTIC_TESSCOORD:
4829 assert(emit->version >= 50);
4830
4831 unsigned usageMask = 0;
4832
4833 if (emit->tes.prim_mode == MESA_PRIM_TRIANGLES) {
4834 usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XYZ;
4835 }
4836 else if (emit->tes.prim_mode == MESA_PRIM_LINES ||
4837 emit->tes.prim_mode == MESA_PRIM_QUADS) {
4838 usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XY;
4839 }
4840
4841 emit->tes.tesscoord_sys_index = index;
4842 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4843 VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT,
4844 VGPU10_OPERAND_INDEX_0D,
4845 index, 1,
4846 VGPU10_NAME_UNDEFINED,
4847 VGPU10_OPERAND_4_COMPONENT,
4848 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4849 usageMask,
4850 VGPU10_INTERPOLATION_UNDEFINED, true,
4851 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4852 break;
4853 case TGSI_SEMANTIC_TESSINNER:
4854 assert(emit->version >= 50);
4855 emit->tes.inner.tgsi_index = index;
4856 break;
4857 case TGSI_SEMANTIC_TESSOUTER:
4858 assert(emit->version >= 50);
4859 emit->tes.outer.tgsi_index = index;
4860 break;
4861 case TGSI_SEMANTIC_VERTICESIN:
4862 assert(emit->unit == PIPE_SHADER_TESS_CTRL);
4863 assert(emit->version >= 50);
4864
4865 /* save the system value index */
4866 emit->tcs.vertices_per_patch_index = index;
4867 break;
4868 case TGSI_SEMANTIC_PRIMID:
4869 assert(emit->version >= 50);
4870 if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4871 emit->tcs.prim_id_index = index;
4872 }
4873 else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
4874 emit->tes.prim_id_index = index;
4875 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4876 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
4877 VGPU10_OPERAND_INDEX_0D,
4878 index, 1,
4879 VGPU10_NAME_UNDEFINED,
4880 VGPU10_OPERAND_0_COMPONENT,
4881 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4882 0,
4883 VGPU10_INTERPOLATION_UNDEFINED, true,
4884 map_tgsi_semantic_to_sgn_name(semantic_name));
4885 }
4886 break;
4887 case TGSI_SEMANTIC_THREAD_ID:
4888 assert(emit->unit >= PIPE_SHADER_COMPUTE);
4889 assert(emit->version >= 50);
4890 emit->cs.thread_id_index = index;
4891 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4892 VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP,
4893 VGPU10_OPERAND_INDEX_0D,
4894 index, 1,
4895 VGPU10_NAME_UNDEFINED,
4896 VGPU10_OPERAND_4_COMPONENT,
4897 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4898 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4899 VGPU10_INTERPOLATION_UNDEFINED, true,
4900 map_tgsi_semantic_to_sgn_name(semantic_name));
4901 break;
4902 case TGSI_SEMANTIC_BLOCK_ID:
4903 assert(emit->unit >= PIPE_SHADER_COMPUTE);
4904 assert(emit->version >= 50);
4905 emit->cs.block_id_index = index;
4906 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4907 VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID,
4908 VGPU10_OPERAND_INDEX_0D,
4909 index, 1,
4910 VGPU10_NAME_UNDEFINED,
4911 VGPU10_OPERAND_4_COMPONENT,
4912 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4913 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4914 VGPU10_INTERPOLATION_UNDEFINED, true,
4915 map_tgsi_semantic_to_sgn_name(semantic_name));
4916 break;
4917 case TGSI_SEMANTIC_GRID_SIZE:
4918 assert(emit->unit == PIPE_SHADER_COMPUTE);
4919 assert(emit->version >= 50);
4920 emit->cs.grid_size.tgsi_index = index;
4921 break;
4922 default:
4923 debug_printf("unexpected system value semantic index %u / %s\n",
4924 semantic_name, tgsi_semantic_names[semantic_name]);
4925 }
4926 }
4927
4928 /**
4929 * Translate a TGSI declaration to VGPU10.
4930 */
4931 static bool
emit_vgpu10_declaration(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_declaration * decl)4932 emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
4933 const struct tgsi_full_declaration *decl)
4934 {
4935 switch (decl->Declaration.File) {
4936 case TGSI_FILE_INPUT:
4937 /* do nothing - see emit_input_declarations() */
4938 return true;
4939
4940 case TGSI_FILE_OUTPUT:
4941 assert(decl->Range.First == decl->Range.Last);
4942 emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask;
4943 return true;
4944
4945 case TGSI_FILE_TEMPORARY:
4946 /* Don't declare the temps here. Just keep track of how many
4947 * and emit the declaration later.
4948 */
4949 if (decl->Declaration.Array) {
4950 /* Indexed temporary array. Save the start index of the array
4951 * and the size of the array.
4952 */
4953 const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS);
4954 assert(arrayID < ARRAY_SIZE(emit->temp_arrays));
4955
4956 /* Save this array so we can emit the declaration for it later */
4957 create_temp_array(emit, arrayID, decl->Range.First,
4958 decl->Range.Last - decl->Range.First + 1,
4959 decl->Range.First);
4960 }
4961
4962 /* for all temps, indexed or not, keep track of highest index */
4963 emit->num_shader_temps = MAX2(emit->num_shader_temps,
4964 decl->Range.Last + 1);
4965 return true;
4966
4967 case TGSI_FILE_CONSTANT:
4968 /* Don't declare constants here. Just keep track and emit later. */
4969 {
4970 unsigned constbuf = 0, num_consts;
4971 if (decl->Declaration.Dimension) {
4972 constbuf = decl->Dim.Index2D;
4973 }
4974 /* We throw an assertion here when, in fact, the shader should never
4975 * have linked due to constbuf index out of bounds, so we shouldn't
4976 * have reached here.
4977 */
4978 assert(constbuf < ARRAY_SIZE(emit->num_shader_consts));
4979
4980 num_consts = MAX2(emit->num_shader_consts[constbuf],
4981 decl->Range.Last + 1);
4982
4983 if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
4984 debug_printf("Warning: constant buffer is declared to size [%u]"
4985 " but [%u] is the limit.\n",
4986 num_consts,
4987 VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
4988 emit->register_overflow = true;
4989 }
4990 /* The linker doesn't enforce the max UBO size so we clamp here */
4991 emit->num_shader_consts[constbuf] =
4992 MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
4993 }
4994 return true;
4995
4996 case TGSI_FILE_IMMEDIATE:
4997 assert(!"TGSI_FILE_IMMEDIATE not handled yet!");
4998 return false;
4999
5000 case TGSI_FILE_SYSTEM_VALUE:
5001 emit_system_value_declaration(emit, decl->Semantic.Name,
5002 decl->Range.First);
5003 return true;
5004
5005 case TGSI_FILE_SAMPLER:
5006 /* Don't declare samplers here. Just keep track and emit later. */
5007 emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
5008 return true;
5009
5010 #if 0
5011 case TGSI_FILE_RESOURCE:
5012 /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/
5013 /* XXX more, VGPU10_RETURN_TYPE_FLOAT */
5014 assert(!"TGSI_FILE_RESOURCE not handled yet");
5015 return false;
5016 #endif
5017
5018 case TGSI_FILE_ADDRESS:
5019 emit->num_address_regs = MAX2(emit->num_address_regs,
5020 decl->Range.Last + 1);
5021 return true;
5022
5023 case TGSI_FILE_SAMPLER_VIEW:
5024 {
5025 unsigned unit = decl->Range.First;
5026 assert(decl->Range.First == decl->Range.Last);
5027 emit->sampler_target[unit] = decl->SamplerView.Resource;
5028
5029 /* Note: we can ignore YZW return types for now */
5030 emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX;
5031 emit->sampler_view[unit] = true;
5032 }
5033 return true;
5034
5035 case TGSI_FILE_IMAGE:
5036 {
5037 unsigned unit = decl->Range.First;
5038 assert(decl->Range.First == decl->Range.Last);
5039 assert(unit < PIPE_MAX_SHADER_IMAGES);
5040 emit->image[unit] = decl->Image;
5041 emit->image_mask |= 1 << unit;
5042 emit->num_images++;
5043 }
5044 return true;
5045
5046 case TGSI_FILE_HW_ATOMIC:
5047 /* Declare the atomic buffer if it is not already declared. */
5048 if (!(emit->atomic_bufs_mask & (1 << decl->Dim.Index2D))) {
5049 emit->num_atomic_bufs++;
5050 emit->atomic_bufs_mask |= (1 << decl->Dim.Index2D);
5051 }
5052
5053 /* Remember the maximum atomic counter index encountered */
5054 emit->max_atomic_counter_index =
5055 MAX2(emit->max_atomic_counter_index, decl->Range.Last);
5056 return true;
5057
5058 case TGSI_FILE_MEMORY:
5059 /* Record memory has been used. */
5060 if (emit->unit == PIPE_SHADER_COMPUTE &&
5061 decl->Declaration.MemType == TGSI_MEMORY_TYPE_SHARED) {
5062 emit->cs.shared_memory_declared = true;
5063 }
5064
5065 return true;
5066
5067 case TGSI_FILE_BUFFER:
5068 assert(emit->version >= 50);
5069 emit->num_shader_bufs++;
5070 return true;
5071
5072 default:
5073 assert(!"Unexpected type of declaration");
5074 return false;
5075 }
5076 }
5077
5078
5079 /**
5080 * Emit input declarations for fragment shader.
5081 */
5082 static void
emit_fs_input_declarations(struct svga_shader_emitter_v10 * emit)5083 emit_fs_input_declarations(struct svga_shader_emitter_v10 *emit)
5084 {
5085 unsigned i;
5086
5087 for (i = 0; i < emit->linkage.num_inputs; i++) {
5088 enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
5089 unsigned usage_mask = emit->info.input_usage_mask[i];
5090 unsigned index = emit->linkage.input_map[i];
5091 unsigned type, interpolationMode, name;
5092 unsigned mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
5093
5094 if (usage_mask == 0)
5095 continue; /* register is not actually used */
5096
5097 if (semantic_name == TGSI_SEMANTIC_POSITION) {
5098 /* fragment position input */
5099 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5100 interpolationMode = VGPU10_INTERPOLATION_LINEAR;
5101 name = VGPU10_NAME_POSITION;
5102 if (usage_mask & TGSI_WRITEMASK_W) {
5103 /* we need to replace use of 'w' with '1/w' */
5104 emit->fs.fragcoord_input_index = i;
5105 }
5106 }
5107 else if (semantic_name == TGSI_SEMANTIC_FACE) {
5108 /* fragment front-facing input */
5109 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5110 interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5111 name = VGPU10_NAME_IS_FRONT_FACE;
5112 emit->fs.face_input_index = i;
5113 }
5114 else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
5115 /* primitive ID */
5116 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5117 interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5118 name = VGPU10_NAME_PRIMITIVE_ID;
5119 }
5120 else if (semantic_name == TGSI_SEMANTIC_SAMPLEID) {
5121 /* sample index / ID */
5122 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5123 interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5124 name = VGPU10_NAME_SAMPLE_INDEX;
5125 }
5126 else if (semantic_name == TGSI_SEMANTIC_LAYER) {
5127 /* render target array index */
5128 if (emit->key.fs.layer_to_zero) {
5129 /**
5130 * The shader from the previous stage does not write to layer,
5131 * so reading the layer index in fragment shader should return 0.
5132 */
5133 emit->fs.layer_input_index = i;
5134 continue;
5135 } else {
5136 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5137 interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5138 name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX;
5139 mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
5140 }
5141 }
5142 else if (semantic_name == TGSI_SEMANTIC_VIEWPORT_INDEX) {
5143 /* viewport index */
5144 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5145 interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5146 name = VGPU10_NAME_VIEWPORT_ARRAY_INDEX;
5147 mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
5148 }
5149 else {
5150 /* general fragment input */
5151 type = VGPU10_OPCODE_DCL_INPUT_PS;
5152 interpolationMode =
5153 translate_interpolation(emit,
5154 emit->info.input_interpolate[i],
5155 emit->info.input_interpolate_loc[i]);
5156
5157 /* keeps track if flat interpolation mode is being used */
5158 emit->uses_flat_interp = emit->uses_flat_interp ||
5159 (interpolationMode == VGPU10_INTERPOLATION_CONSTANT);
5160
5161 name = VGPU10_NAME_UNDEFINED;
5162 }
5163
5164 emit_input_declaration(emit, type,
5165 VGPU10_OPERAND_TYPE_INPUT,
5166 VGPU10_OPERAND_INDEX_1D, index, 1,
5167 name,
5168 VGPU10_OPERAND_4_COMPONENT,
5169 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5170 mask,
5171 interpolationMode, true,
5172 map_tgsi_semantic_to_sgn_name(semantic_name));
5173 }
5174 }
5175
5176
5177 /**
5178 * Emit input declarations for vertex shader.
5179 */
5180 static void
emit_vs_input_declarations(struct svga_shader_emitter_v10 * emit)5181 emit_vs_input_declarations(struct svga_shader_emitter_v10 *emit)
5182 {
5183 unsigned i;
5184
5185 for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) {
5186 unsigned usage_mask = emit->info.input_usage_mask[i];
5187 unsigned index = i;
5188
5189 if (usage_mask == 0)
5190 continue; /* register is not actually used */
5191
5192 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5193 VGPU10_OPERAND_TYPE_INPUT,
5194 VGPU10_OPERAND_INDEX_1D, index, 1,
5195 VGPU10_NAME_UNDEFINED,
5196 VGPU10_OPERAND_4_COMPONENT,
5197 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5198 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5199 VGPU10_INTERPOLATION_UNDEFINED, true,
5200 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5201 }
5202 }
5203
5204
5205 /**
5206 * Emit input declarations for geometry shader.
5207 */
5208 static void
emit_gs_input_declarations(struct svga_shader_emitter_v10 * emit)5209 emit_gs_input_declarations(struct svga_shader_emitter_v10 *emit)
5210 {
5211 unsigned i;
5212
5213 for (i = 0; i < emit->info.num_inputs; i++) {
5214 enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
5215 unsigned usage_mask = emit->info.input_usage_mask[i];
5216 unsigned index = emit->linkage.input_map[i];
5217 unsigned opcodeType, operandType;
5218 unsigned numComp, selMode;
5219 unsigned name;
5220 unsigned dim;
5221
5222 if (usage_mask == 0)
5223 continue; /* register is not actually used */
5224
5225 opcodeType = VGPU10_OPCODE_DCL_INPUT;
5226 operandType = VGPU10_OPERAND_TYPE_INPUT;
5227 numComp = VGPU10_OPERAND_4_COMPONENT;
5228 selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
5229 name = VGPU10_NAME_UNDEFINED;
5230
5231 /* all geometry shader inputs are two dimensional except
5232 * gl_PrimitiveID
5233 */
5234 dim = VGPU10_OPERAND_INDEX_2D;
5235
5236 if (semantic_name == TGSI_SEMANTIC_PRIMID) {
5237 /* Primitive ID */
5238 operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
5239 dim = VGPU10_OPERAND_INDEX_0D;
5240 numComp = VGPU10_OPERAND_0_COMPONENT;
5241 selMode = 0;
5242
5243 /* also save the register index so we can check for
5244 * primitive id when emit src register. We need to modify the
5245 * operand type, index dimension when emit primitive id src reg.
5246 */
5247 emit->gs.prim_id_index = i;
5248 }
5249 else if (semantic_name == TGSI_SEMANTIC_POSITION) {
5250 /* vertex position input */
5251 opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV;
5252 name = VGPU10_NAME_POSITION;
5253 }
5254
5255 emit_input_declaration(emit, opcodeType, operandType,
5256 dim, index,
5257 emit->gs.input_size,
5258 name,
5259 numComp, selMode,
5260 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5261 VGPU10_INTERPOLATION_UNDEFINED, true,
5262 map_tgsi_semantic_to_sgn_name(semantic_name));
5263 }
5264 }
5265
5266
5267 /**
5268 * Emit input declarations for tessellation control shader.
5269 */
5270 static void
emit_tcs_input_declarations(struct svga_shader_emitter_v10 * emit)5271 emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit)
5272 {
5273 unsigned i;
5274 unsigned size = emit->key.tcs.vertices_per_patch;
5275 bool addSignature = true;
5276
5277 if (!emit->tcs.control_point_phase)
5278 addSignature = emit->tcs.fork_phase_add_signature;
5279
5280 for (i = 0; i < emit->info.num_inputs; i++) {
5281 unsigned usage_mask = emit->info.input_usage_mask[i];
5282 unsigned index = emit->linkage.input_map[i];
5283 enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
5284 VGPU10_SYSTEM_NAME name = VGPU10_NAME_UNDEFINED;
5285 VGPU10_OPERAND_TYPE operandType = VGPU10_OPERAND_TYPE_INPUT;
5286 SVGA3dDXSignatureSemanticName sgn_name =
5287 map_tgsi_semantic_to_sgn_name(semantic_name);
5288
5289 if (semantic_name == TGSI_SEMANTIC_POSITION ||
5290 index == emit->linkage.position_index) {
5291 /* save the input control point index for later use */
5292 emit->tcs.control_point_input_index = i;
5293 }
5294 else if (usage_mask == 0) {
5295 continue; /* register is not actually used */
5296 }
5297 else if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
5298 /* The shadow copy is being used here. So set the signature name
5299 * to UNDEFINED.
5300 */
5301 sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
5302 }
5303
5304 /* input control points in the patch constant phase are emitted in the
5305 * vicp register rather than the v register.
5306 */
5307 if (!emit->tcs.control_point_phase) {
5308 operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
5309 }
5310
5311 /* Tessellation control shader inputs are two dimensional.
5312 * The array size is determined by the patch vertex count.
5313 */
5314 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5315 operandType,
5316 VGPU10_OPERAND_INDEX_2D,
5317 index, size, name,
5318 VGPU10_OPERAND_4_COMPONENT,
5319 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5320 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5321 VGPU10_INTERPOLATION_UNDEFINED,
5322 addSignature, sgn_name);
5323 }
5324
5325 if (emit->tcs.control_point_phase) {
5326
5327 /* Also add an address register for the indirection to the
5328 * input control points
5329 */
5330 emit->tcs.control_point_addr_index = emit->num_address_regs++;
5331 }
5332 }
5333
5334
5335 static void
emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 * emit)5336 emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 *emit)
5337 {
5338
5339 /* In tcs, tess factors are emitted as extra outputs.
5340 * The starting register index for the tess factors is captured
5341 * in the compile key.
5342 */
5343 unsigned inputIndex = emit->key.tes.tessfactor_index;
5344
5345 if (emit->tes.prim_mode == MESA_PRIM_QUADS) {
5346 if (emit->key.tes.need_tessouter) {
5347 emit->tes.outer.in_index = inputIndex;
5348 for (int i = 0; i < 4; i++) {
5349 emit_tesslevel_declaration(emit, inputIndex++,
5350 VGPU10_OPCODE_DCL_INPUT_SIV,
5351 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5352 VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i,
5353 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i);
5354 }
5355 }
5356
5357 if (emit->key.tes.need_tessinner) {
5358 emit->tes.inner.in_index = inputIndex;
5359 emit_tesslevel_declaration(emit, inputIndex++,
5360 VGPU10_OPCODE_DCL_INPUT_SIV,
5361 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5362 VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
5363 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
5364
5365 emit_tesslevel_declaration(emit, inputIndex++,
5366 VGPU10_OPCODE_DCL_INPUT_SIV,
5367 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5368 VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
5369 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
5370 }
5371 }
5372 else if (emit->tes.prim_mode == MESA_PRIM_TRIANGLES) {
5373 if (emit->key.tes.need_tessouter) {
5374 emit->tes.outer.in_index = inputIndex;
5375 for (int i = 0; i < 3; i++) {
5376 emit_tesslevel_declaration(emit, inputIndex++,
5377 VGPU10_OPCODE_DCL_INPUT_SIV,
5378 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5379 VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i,
5380 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i);
5381 }
5382 }
5383
5384 if (emit->key.tes.need_tessinner) {
5385 emit->tes.inner.in_index = inputIndex;
5386 emit_tesslevel_declaration(emit, inputIndex++,
5387 VGPU10_OPCODE_DCL_INPUT_SIV,
5388 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5389 VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
5390 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
5391 }
5392 }
5393 else if (emit->tes.prim_mode == MESA_PRIM_LINES) {
5394 if (emit->key.tes.need_tessouter) {
5395 emit->tes.outer.in_index = inputIndex;
5396 emit_tesslevel_declaration(emit, inputIndex++,
5397 VGPU10_OPCODE_DCL_INPUT_SIV,
5398 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5399 VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR,
5400 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR);
5401
5402 emit_tesslevel_declaration(emit, inputIndex++,
5403 VGPU10_OPCODE_DCL_INPUT_SIV,
5404 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5405 VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR,
5406 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
5407 }
5408 }
5409 }
5410
5411
5412 /**
5413 * Emit input declarations for tessellation evaluation shader.
5414 */
5415 static void
emit_tes_input_declarations(struct svga_shader_emitter_v10 * emit)5416 emit_tes_input_declarations(struct svga_shader_emitter_v10 *emit)
5417 {
5418 unsigned i;
5419
5420 for (i = 0; i < emit->info.num_inputs; i++) {
5421 unsigned usage_mask = emit->info.input_usage_mask[i];
5422 unsigned index = emit->linkage.input_map[i];
5423 unsigned size;
5424 const enum tgsi_semantic semantic_name =
5425 emit->info.input_semantic_name[i];
5426 SVGA3dDXSignatureSemanticName sgn_name;
5427 VGPU10_OPERAND_TYPE operandType;
5428 VGPU10_OPERAND_INDEX_DIMENSION dim;
5429
5430 if (usage_mask == 0)
5431 usage_mask = 1; /* at least set usage mask to one */
5432
5433 if (semantic_name == TGSI_SEMANTIC_PATCH) {
5434 operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT;
5435 dim = VGPU10_OPERAND_INDEX_1D;
5436 size = 1;
5437 sgn_name = map_tgsi_semantic_to_sgn_name(semantic_name);
5438 }
5439 else {
5440 operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
5441 dim = VGPU10_OPERAND_INDEX_2D;
5442 size = emit->key.tes.vertices_per_patch;
5443 sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
5444 }
5445
5446 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, operandType,
5447 dim, index, size, VGPU10_NAME_UNDEFINED,
5448 VGPU10_OPERAND_4_COMPONENT,
5449 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5450 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5451 VGPU10_INTERPOLATION_UNDEFINED,
5452 true, sgn_name);
5453 }
5454
5455 emit_tessfactor_input_declarations(emit);
5456
5457 /* DX spec requires DS input controlpoint/patch-constant signatures to match
5458 * the HS output controlpoint/patch-constant signatures exactly.
5459 * Add missing input declarations even if they are not used in the shader.
5460 */
5461 if (emit->linkage.num_inputs < emit->linkage.prevShader.num_outputs) {
5462 struct tgsi_shader_info *prevInfo = emit->prevShaderInfo;
5463 for (i = 0; i < emit->linkage.prevShader.num_outputs; i++) {
5464
5465 /* If a tcs output does not have a corresponding input register in
5466 * tes, add one.
5467 */
5468 if (emit->linkage.prevShader.output_map[i] >
5469 emit->linkage.input_map_max) {
5470 const enum tgsi_semantic sem_name = prevInfo->output_semantic_name[i];
5471
5472 if (sem_name == TGSI_SEMANTIC_PATCH) {
5473 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5474 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5475 VGPU10_OPERAND_INDEX_1D,
5476 i, 1, VGPU10_NAME_UNDEFINED,
5477 VGPU10_OPERAND_4_COMPONENT,
5478 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5479 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5480 VGPU10_INTERPOLATION_UNDEFINED,
5481 true,
5482 map_tgsi_semantic_to_sgn_name(sem_name));
5483
5484 } else if (sem_name != TGSI_SEMANTIC_TESSINNER &&
5485 sem_name != TGSI_SEMANTIC_TESSOUTER) {
5486 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5487 VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT,
5488 VGPU10_OPERAND_INDEX_2D,
5489 i, emit->key.tes.vertices_per_patch,
5490 VGPU10_NAME_UNDEFINED,
5491 VGPU10_OPERAND_4_COMPONENT,
5492 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5493 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5494 VGPU10_INTERPOLATION_UNDEFINED,
5495 true,
5496 map_tgsi_semantic_to_sgn_name(sem_name));
5497 }
5498 /* tessellation factors are taken care of in
5499 * emit_tessfactor_input_declarations().
5500 */
5501 }
5502 }
5503 }
5504 }
5505
5506
5507 /**
5508 * Emit all input declarations.
5509 */
5510 static bool
emit_input_declarations(struct svga_shader_emitter_v10 * emit)5511 emit_input_declarations(struct svga_shader_emitter_v10 *emit)
5512 {
5513 emit->index_range.required =
5514 emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? true : false;
5515
5516 switch (emit->unit) {
5517 case PIPE_SHADER_FRAGMENT:
5518 emit_fs_input_declarations(emit);
5519 break;
5520 case PIPE_SHADER_GEOMETRY:
5521 emit_gs_input_declarations(emit);
5522 break;
5523 case PIPE_SHADER_VERTEX:
5524 emit_vs_input_declarations(emit);
5525 break;
5526 case PIPE_SHADER_TESS_CTRL:
5527 emit_tcs_input_declarations(emit);
5528 break;
5529 case PIPE_SHADER_TESS_EVAL:
5530 emit_tes_input_declarations(emit);
5531 break;
5532 case PIPE_SHADER_COMPUTE:
5533 //XXX emit_cs_input_declarations(emit);
5534 break;
5535 default:
5536 assert(0);
5537 }
5538
5539 if (emit->index_range.start_index != INVALID_INDEX) {
5540 emit_index_range_declaration(emit);
5541 }
5542 emit->index_range.required = false;
5543 return true;
5544 }
5545
5546
5547 /**
5548 * Emit all output declarations.
5549 */
5550 static bool
emit_output_declarations(struct svga_shader_emitter_v10 * emit)5551 emit_output_declarations(struct svga_shader_emitter_v10 *emit)
5552 {
5553 emit->index_range.required =
5554 emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? true : false;
5555
5556 switch (emit->unit) {
5557 case PIPE_SHADER_FRAGMENT:
5558 emit_fs_output_declarations(emit);
5559 break;
5560 case PIPE_SHADER_GEOMETRY:
5561 emit_gs_output_declarations(emit);
5562 break;
5563 case PIPE_SHADER_VERTEX:
5564 emit_vs_output_declarations(emit);
5565 break;
5566 case PIPE_SHADER_TESS_CTRL:
5567 emit_tcs_output_declarations(emit);
5568 break;
5569 case PIPE_SHADER_TESS_EVAL:
5570 emit_tes_output_declarations(emit);
5571 break;
5572 case PIPE_SHADER_COMPUTE:
5573 //XXX emit_cs_output_declarations(emit);
5574 break;
5575 default:
5576 assert(0);
5577 }
5578
5579 if (emit->vposition.so_index != INVALID_INDEX &&
5580 emit->vposition.out_index != INVALID_INDEX) {
5581
5582 assert(emit->unit != PIPE_SHADER_FRAGMENT);
5583
5584 /* Emit the declaration for the non-adjusted vertex position
5585 * for stream output purpose
5586 */
5587 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5588 emit->vposition.so_index,
5589 VGPU10_NAME_UNDEFINED,
5590 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5591 true,
5592 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
5593 }
5594
5595 if (emit->clip_dist_so_index != INVALID_INDEX &&
5596 emit->clip_dist_out_index != INVALID_INDEX) {
5597
5598 assert(emit->unit != PIPE_SHADER_FRAGMENT);
5599
5600 /* Emit the declaration for the clip distance shadow copy which
5601 * will be used for stream output purpose and for clip distance
5602 * varying variable. Note all clip distances
5603 * will be written regardless of the enabled clipping planes.
5604 */
5605 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5606 emit->clip_dist_so_index,
5607 VGPU10_NAME_UNDEFINED,
5608 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5609 true,
5610 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5611
5612 if (emit->info.num_written_clipdistance > 4) {
5613 /* for the second clip distance register, each handles 4 planes */
5614 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5615 emit->clip_dist_so_index + 1,
5616 VGPU10_NAME_UNDEFINED,
5617 VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5618 true,
5619 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5620 }
5621 }
5622
5623 if (emit->index_range.start_index != INVALID_INDEX) {
5624 emit_index_range_declaration(emit);
5625 }
5626 emit->index_range.required = false;
5627 return true;
5628 }
5629
5630
5631 /**
5632 * A helper function to create a temporary indexable array
5633 * and initialize the corresponding entries in the temp_map array.
5634 */
5635 static void
create_temp_array(struct svga_shader_emitter_v10 * emit,unsigned arrayID,unsigned first,unsigned count,unsigned startIndex)5636 create_temp_array(struct svga_shader_emitter_v10 *emit,
5637 unsigned arrayID, unsigned first, unsigned count,
5638 unsigned startIndex)
5639 {
5640 unsigned i, tempIndex = startIndex;
5641
5642 emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1);
5643 assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS);
5644 emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS);
5645
5646 emit->temp_arrays[arrayID].start = first;
5647 emit->temp_arrays[arrayID].size = count;
5648
5649 /* Fill in the temp_map entries for this temp array */
5650 for (i = 0; i < count; i++, tempIndex++) {
5651 emit->temp_map[tempIndex].arrayId = arrayID;
5652 emit->temp_map[tempIndex].index = i;
5653 }
5654 }
5655
5656
5657 /**
5658 * Emit the declaration for the temporary registers.
5659 */
5660 static bool
emit_temporaries_declaration(struct svga_shader_emitter_v10 * emit)5661 emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
5662 {
5663 unsigned total_temps, reg, i;
5664
5665 total_temps = emit->num_shader_temps;
5666
5667 /* If there is indirect access to non-indexable temps in the shader,
5668 * convert those temps to indexable temps. This works around a bug
5669 * in the GLSL->TGSI translator exposed in piglit test
5670 * glsl-1.20/execution/fs-const-array-of-struct-of-array.shader_test.
5671 * Internal temps added by the driver remain as non-indexable temps.
5672 */
5673 if ((emit->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) &&
5674 emit->num_temp_arrays == 0) {
5675 create_temp_array(emit, 1, 0, total_temps, 0);
5676 }
5677
5678 /* Allocate extra temps for specially-implemented instructions,
5679 * such as LIT.
5680 */
5681 total_temps += MAX_INTERNAL_TEMPS;
5682
5683 /* Allocate extra temps for clip distance or clip vertex.
5684 */
5685 if (emit->clip_mode == CLIP_DISTANCE) {
5686 /* We need to write the clip distance to a temporary register
5687 * first. Then it will be copied to the shadow copy for
5688 * the clip distance varying variable and stream output purpose.
5689 * It will also be copied to the actual CLIPDIST register
5690 * according to the enabled clip planes
5691 */
5692 emit->clip_dist_tmp_index = total_temps++;
5693 if (emit->info.num_written_clipdistance > 4)
5694 total_temps++; /* second clip register */
5695 }
5696 else if (emit->clip_mode == CLIP_VERTEX && emit->key.last_vertex_stage) {
5697 /* If the current shader is in the last vertex processing stage,
5698 * We need to convert the TGSI CLIPVERTEX output to one or more
5699 * clip distances. Allocate a temp reg for the clipvertex here.
5700 */
5701 assert(emit->info.writes_clipvertex > 0);
5702 emit->clip_vertex_tmp_index = total_temps;
5703 total_temps++;
5704 }
5705
5706 if (emit->info.uses_vertexid) {
5707 assert(emit->unit == PIPE_SHADER_VERTEX);
5708 emit->vs.vertex_id_tmp_index = total_temps++;
5709 }
5710
5711 if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) {
5712 if (emit->vposition.need_prescale || emit->key.vs.undo_viewport ||
5713 emit->key.clip_plane_enable ||
5714 emit->vposition.so_index != INVALID_INDEX) {
5715 emit->vposition.tmp_index = total_temps;
5716 total_temps += 1;
5717 }
5718
5719 if (emit->vposition.need_prescale) {
5720 emit->vposition.prescale_scale_index = total_temps++;
5721 emit->vposition.prescale_trans_index = total_temps++;
5722 }
5723
5724 if (emit->unit == PIPE_SHADER_VERTEX) {
5725 unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 |
5726 emit->key.vs.adjust_attrib_itof |
5727 emit->key.vs.adjust_attrib_utof |
5728 emit->key.vs.attrib_is_bgra |
5729 emit->key.vs.attrib_puint_to_snorm |
5730 emit->key.vs.attrib_puint_to_uscaled |
5731 emit->key.vs.attrib_puint_to_sscaled);
5732 while (attrib_mask) {
5733 unsigned index = u_bit_scan(&attrib_mask);
5734 emit->vs.adjusted_input[index] = total_temps++;
5735 }
5736 }
5737 else if (emit->unit == PIPE_SHADER_GEOMETRY) {
5738 if (emit->key.gs.writes_viewport_index)
5739 emit->gs.viewport_index_tmp_index = total_temps++;
5740 }
5741 }
5742 else if (emit->unit == PIPE_SHADER_FRAGMENT) {
5743 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS ||
5744 emit->key.fs.write_color0_to_n_cbufs > 1) {
5745 /* Allocate a temp to hold the output color */
5746 emit->fs.color_tmp_index = total_temps;
5747 total_temps += 1;
5748 }
5749
5750 if (emit->fs.face_input_index != INVALID_INDEX) {
5751 /* Allocate a temp for the +/-1 face register */
5752 emit->fs.face_tmp_index = total_temps;
5753 total_temps += 1;
5754 }
5755
5756 if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
5757 /* Allocate a temp for modified fragment position register */
5758 emit->fs.fragcoord_tmp_index = total_temps;
5759 total_temps += 1;
5760 }
5761
5762 if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
5763 /* Allocate a temp for the sample position */
5764 emit->fs.sample_pos_tmp_index = total_temps++;
5765 }
5766 }
5767 else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
5768 if (emit->vposition.need_prescale) {
5769 emit->vposition.tmp_index = total_temps++;
5770 emit->vposition.prescale_scale_index = total_temps++;
5771 emit->vposition.prescale_trans_index = total_temps++;
5772 }
5773
5774 if (emit->tes.inner.tgsi_index) {
5775 emit->tes.inner.temp_index = total_temps;
5776 total_temps += 1;
5777 }
5778
5779 if (emit->tes.outer.tgsi_index) {
5780 emit->tes.outer.temp_index = total_temps;
5781 total_temps += 1;
5782 }
5783 }
5784 else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
5785 if (emit->tcs.inner.tgsi_index != INVALID_INDEX) {
5786 if (!emit->tcs.control_point_phase) {
5787 emit->tcs.inner.temp_index = total_temps;
5788 total_temps += 1;
5789 }
5790 }
5791 if (emit->tcs.outer.tgsi_index != INVALID_INDEX) {
5792 if (!emit->tcs.control_point_phase) {
5793 emit->tcs.outer.temp_index = total_temps;
5794 total_temps += 1;
5795 }
5796 }
5797
5798 if (emit->tcs.control_point_phase &&
5799 emit->info.reads_pervertex_outputs) {
5800 emit->tcs.control_point_tmp_index = total_temps;
5801 total_temps += emit->tcs.control_point_out_count;
5802 }
5803 else if (!emit->tcs.control_point_phase &&
5804 emit->info.reads_perpatch_outputs) {
5805
5806 /* If there is indirect access to the patch constant outputs
5807 * in the control point phase, then an indexable temporary array
5808 * will be created for these patch constant outputs.
5809 * Note, indirect access can only be applicable to
5810 * patch constant outputs in the control point phase.
5811 */
5812 if (emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
5813 unsigned arrayID =
5814 emit->num_temp_arrays ? emit->num_temp_arrays : 1;
5815 create_temp_array(emit, arrayID, 0,
5816 emit->tcs.patch_generic_out_count, total_temps);
5817 }
5818 emit->tcs.patch_generic_tmp_index = total_temps;
5819 total_temps += emit->tcs.patch_generic_out_count;
5820 }
5821
5822 emit->tcs.invocation_id_tmp_index = total_temps++;
5823 }
5824
5825 if (emit->raw_bufs) {
5826 /**
5827 * Add 3 more temporaries if we need to translate constant buffer
5828 * to srv raw buffer. Since we need to load the value to a temporary
5829 * before it can be used as a source. There could be three source
5830 * register in an instruction.
5831 */
5832 emit->raw_buf_tmp_index = total_temps;
5833 total_temps+=3;
5834 }
5835
5836 for (i = 0; i < emit->num_address_regs; i++) {
5837 emit->address_reg_index[i] = total_temps++;
5838 }
5839
5840 /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10
5841 * temp indexes. Basically, we compact all the non-array temp register
5842 * indexes into a consecutive series.
5843 *
5844 * Before, we may have some TGSI declarations like:
5845 * DCL TEMP[0..1], LOCAL
5846 * DCL TEMP[2..4], ARRAY(1), LOCAL
5847 * DCL TEMP[5..7], ARRAY(2), LOCAL
5848 * plus, some extra temps, like TEMP[8], TEMP[9] for misc things
5849 *
5850 * After, we'll have a map like this:
5851 * temp_map[0] = { array 0, index 0 }
5852 * temp_map[1] = { array 0, index 1 }
5853 * temp_map[2] = { array 1, index 0 }
5854 * temp_map[3] = { array 1, index 1 }
5855 * temp_map[4] = { array 1, index 2 }
5856 * temp_map[5] = { array 2, index 0 }
5857 * temp_map[6] = { array 2, index 1 }
5858 * temp_map[7] = { array 2, index 2 }
5859 * temp_map[8] = { array 0, index 2 }
5860 * temp_map[9] = { array 0, index 3 }
5861 *
5862 * We'll declare two arrays of 3 elements, plus a set of four non-indexed
5863 * temps numbered 0..3
5864 *
5865 * Any time we emit a temporary register index, we'll have to use the
5866 * temp_map[] table to convert the TGSI index to the VGPU10 index.
5867 *
5868 * Finally, we recompute the total_temps value here.
5869 */
5870 reg = 0;
5871 for (i = 0; i < total_temps; i++) {
5872 if (emit->temp_map[i].arrayId == 0) {
5873 emit->temp_map[i].index = reg++;
5874 }
5875 }
5876
5877 if (0) {
5878 debug_printf("total_temps %u\n", total_temps);
5879 for (i = 0; i < total_temps; i++) {
5880 debug_printf("temp %u -> array %u index %u\n",
5881 i, emit->temp_map[i].arrayId, emit->temp_map[i].index);
5882 }
5883 }
5884
5885 total_temps = reg;
5886
5887 /* Emit declaration of ordinary temp registers */
5888 if (total_temps > 0) {
5889 VGPU10OpcodeToken0 opcode0;
5890
5891 opcode0.value = 0;
5892 opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS;
5893
5894 begin_emit_instruction(emit);
5895 emit_dword(emit, opcode0.value);
5896 emit_dword(emit, total_temps);
5897 end_emit_instruction(emit);
5898 }
5899
5900 /* Emit declarations for indexable temp arrays. Skip 0th entry since
5901 * it's unused.
5902 */
5903 for (i = 1; i < emit->num_temp_arrays; i++) {
5904 unsigned num_temps = emit->temp_arrays[i].size;
5905
5906 if (num_temps > 0) {
5907 VGPU10OpcodeToken0 opcode0;
5908
5909 opcode0.value = 0;
5910 opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP;
5911
5912 begin_emit_instruction(emit);
5913 emit_dword(emit, opcode0.value);
5914 emit_dword(emit, i); /* which array */
5915 emit_dword(emit, num_temps);
5916 emit_dword(emit, 4); /* num components */
5917 end_emit_instruction(emit);
5918
5919 total_temps += num_temps;
5920 }
5921 }
5922
5923 /* Check that the grand total of all regular and indexed temps is
5924 * under the limit.
5925 */
5926 check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1);
5927
5928 return true;
5929 }
5930
5931
5932 static bool
emit_rawbuf_declaration(struct svga_shader_emitter_v10 * emit,unsigned index)5933 emit_rawbuf_declaration(struct svga_shader_emitter_v10 *emit,
5934 unsigned index)
5935 {
5936 VGPU10OpcodeToken0 opcode1;
5937 VGPU10OperandToken0 operand1;
5938
5939 opcode1.value = 0;
5940 opcode1.opcodeType = VGPU10_OPCODE_DCL_RESOURCE_RAW;
5941 opcode1.resourceDimension = VGPU10_RESOURCE_DIMENSION_UNKNOWN;
5942
5943 operand1.value = 0;
5944 operand1.numComponents = VGPU10_OPERAND_0_COMPONENT;
5945 operand1.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
5946 operand1.indexDimension = VGPU10_OPERAND_INDEX_1D;
5947 operand1.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5948
5949 begin_emit_instruction(emit);
5950 emit_dword(emit, opcode1.value);
5951 emit_dword(emit, operand1.value);
5952 emit_dword(emit, index);
5953 end_emit_instruction(emit);
5954
5955 return true;
5956 }
5957
5958
5959 static bool
emit_constant_declaration(struct svga_shader_emitter_v10 * emit)5960 emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
5961 {
5962 VGPU10OpcodeToken0 opcode0;
5963 VGPU10OperandToken0 operand0;
5964 unsigned total_consts, i;
5965
5966 opcode0.value = 0;
5967 opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER;
5968 opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED;
5969 /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */
5970
5971 operand0.value = 0;
5972 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
5973 operand0.indexDimension = VGPU10_OPERAND_INDEX_2D;
5974 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5975 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5976 operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
5977 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
5978 operand0.swizzleX = 0;
5979 operand0.swizzleY = 1;
5980 operand0.swizzleZ = 2;
5981 operand0.swizzleW = 3;
5982
5983 /**
5984 * Emit declaration for constant buffer [0]. We also allocate
5985 * room for the extra constants here.
5986 */
5987 total_consts = emit->num_shader_consts[0];
5988
5989 /* Now, allocate constant slots for the "extra" constants.
5990 * Note: it's critical that these extra constant locations
5991 * exactly match what's emitted by the "extra" constants code
5992 * in svga_state_constants.c
5993 */
5994
5995 /* Vertex position scale/translation */
5996 if (emit->vposition.need_prescale) {
5997 emit->vposition.prescale_cbuf_index = total_consts;
5998 total_consts += (2 * emit->vposition.num_prescale);
5999 }
6000
6001 if (emit->unit == PIPE_SHADER_VERTEX) {
6002 if (emit->key.vs.undo_viewport) {
6003 emit->vs.viewport_index = total_consts++;
6004 }
6005 if (emit->key.vs.need_vertex_id_bias) {
6006 emit->vs.vertex_id_bias_index = total_consts++;
6007 }
6008 }
6009
6010 /* user-defined clip planes */
6011 if (emit->key.clip_plane_enable) {
6012 unsigned n = util_bitcount(emit->key.clip_plane_enable);
6013 assert(emit->unit != PIPE_SHADER_FRAGMENT &&
6014 emit->unit != PIPE_SHADER_COMPUTE);
6015 for (i = 0; i < n; i++) {
6016 emit->clip_plane_const[i] = total_consts++;
6017 }
6018 }
6019
6020 for (i = 0; i < emit->num_samplers; i++) {
6021
6022 if (emit->key.tex[i].sampler_view) {
6023 /* Texcoord scale factors for RECT textures */
6024 if (emit->key.tex[i].unnormalized) {
6025 emit->texcoord_scale_index[i] = total_consts++;
6026 }
6027
6028 /* Texture buffer sizes */
6029 if (emit->key.tex[i].target == PIPE_BUFFER) {
6030 emit->texture_buffer_size_index[i] = total_consts++;
6031 }
6032 }
6033 }
6034 if (emit->key.image_size_used) {
6035 emit->image_size_index = total_consts;
6036 total_consts += emit->num_images;
6037 }
6038
6039 if (total_consts > 0) {
6040 if (total_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
6041 debug_printf("Warning: Too many constants [%u] declared in constant"
6042 " buffer 0. %u is the limit.\n",
6043 total_consts,
6044 VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
6045 total_consts = VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT;
6046 emit->register_overflow = true;
6047 }
6048 begin_emit_instruction(emit);
6049 emit_dword(emit, opcode0.value);
6050 emit_dword(emit, operand0.value);
6051 emit_dword(emit, 0); /* which const buffer slot */
6052 emit_dword(emit, total_consts);
6053 end_emit_instruction(emit);
6054 }
6055
6056 /* Declare remaining constant buffers (UBOs) */
6057
6058 for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) {
6059 if (emit->num_shader_consts[i] > 0) {
6060 if (emit->raw_bufs & (1 << i)) {
6061 /* UBO declared as srv raw buffer */
6062 emit_rawbuf_declaration(emit, i + emit->raw_buf_srv_start_index);
6063 }
6064 else {
6065
6066 /* UBO declared as const buffer */
6067 begin_emit_instruction(emit);
6068 emit_dword(emit, opcode0.value);
6069 emit_dword(emit, operand0.value);
6070 emit_dword(emit, i); /* which const buffer slot */
6071 emit_dword(emit, emit->num_shader_consts[i]);
6072 end_emit_instruction(emit);
6073 }
6074 }
6075 }
6076
6077 return true;
6078 }
6079
6080
6081 /**
6082 * Emit declarations for samplers.
6083 */
6084 static bool
emit_sampler_declarations(struct svga_shader_emitter_v10 * emit)6085 emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
6086 {
6087 unsigned i;
6088
6089 for (i = 0; i < emit->key.num_samplers; i++) {
6090
6091 VGPU10OpcodeToken0 opcode0;
6092 VGPU10OperandToken0 operand0;
6093
6094 opcode0.value = 0;
6095 opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER;
6096 opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT;
6097
6098 operand0.value = 0;
6099 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6100 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
6101 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6102 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6103
6104 begin_emit_instruction(emit);
6105 emit_dword(emit, opcode0.value);
6106 emit_dword(emit, operand0.value);
6107 emit_dword(emit, i);
6108 end_emit_instruction(emit);
6109 }
6110
6111 return true;
6112 }
6113
6114
6115 /**
6116 * Translate PIPE_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x.
6117 */
6118 static unsigned
pipe_texture_to_resource_dimension(enum tgsi_texture_type target,unsigned num_samples,bool is_array,bool is_uav)6119 pipe_texture_to_resource_dimension(enum tgsi_texture_type target,
6120 unsigned num_samples,
6121 bool is_array,
6122 bool is_uav)
6123 {
6124 switch (target) {
6125 case PIPE_BUFFER:
6126 return VGPU10_RESOURCE_DIMENSION_BUFFER;
6127 case PIPE_TEXTURE_1D:
6128 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6129 case PIPE_TEXTURE_2D:
6130 return num_samples > 2 ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS :
6131 VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6132 case PIPE_TEXTURE_RECT:
6133 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6134 case PIPE_TEXTURE_3D:
6135 return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
6136 case PIPE_TEXTURE_CUBE:
6137 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
6138 case PIPE_TEXTURE_1D_ARRAY:
6139 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
6140 : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6141 case PIPE_TEXTURE_2D_ARRAY:
6142 if (num_samples > 2 && is_array)
6143 return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY;
6144 else if (is_array)
6145 return VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY;
6146 else
6147 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6148 case PIPE_TEXTURE_CUBE_ARRAY:
6149 return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY :
6150 (is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY :
6151 VGPU10_RESOURCE_DIMENSION_TEXTURECUBE);
6152 default:
6153 assert(!"Unexpected resource type");
6154 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6155 }
6156 }
6157
6158
6159 /**
6160 * Translate TGSI_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x.
6161 */
6162 static unsigned
tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,unsigned num_samples,bool is_array,bool is_uav)6163 tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,
6164 unsigned num_samples,
6165 bool is_array,
6166 bool is_uav)
6167 {
6168 if (target == TGSI_TEXTURE_2D_MSAA && num_samples < 2) {
6169 target = TGSI_TEXTURE_2D;
6170 }
6171 else if (target == TGSI_TEXTURE_2D_ARRAY_MSAA && num_samples < 2) {
6172 target = TGSI_TEXTURE_2D_ARRAY;
6173 }
6174
6175 switch (target) {
6176 case TGSI_TEXTURE_BUFFER:
6177 return VGPU10_RESOURCE_DIMENSION_BUFFER;
6178 case TGSI_TEXTURE_1D:
6179 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6180 case TGSI_TEXTURE_2D:
6181 case TGSI_TEXTURE_RECT:
6182 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6183 case TGSI_TEXTURE_3D:
6184 return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
6185 case TGSI_TEXTURE_CUBE:
6186 case TGSI_TEXTURE_SHADOWCUBE:
6187 return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY :
6188 VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
6189 case TGSI_TEXTURE_SHADOW1D:
6190 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6191 case TGSI_TEXTURE_SHADOW2D:
6192 case TGSI_TEXTURE_SHADOWRECT:
6193 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6194 case TGSI_TEXTURE_1D_ARRAY:
6195 case TGSI_TEXTURE_SHADOW1D_ARRAY:
6196 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
6197 : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6198 case TGSI_TEXTURE_2D_ARRAY:
6199 case TGSI_TEXTURE_SHADOW2D_ARRAY:
6200 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY
6201 : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6202 case TGSI_TEXTURE_2D_MSAA:
6203 return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
6204 case TGSI_TEXTURE_2D_ARRAY_MSAA:
6205 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
6206 : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
6207 case TGSI_TEXTURE_CUBE_ARRAY:
6208 return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY :
6209 (is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY :
6210 VGPU10_RESOURCE_DIMENSION_TEXTURECUBE);
6211 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
6212 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY
6213 : VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
6214 default:
6215 assert(!"Unexpected resource type");
6216 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6217 }
6218 }
6219
6220
6221 /**
6222 * Given a tgsi_return_type, return true iff it is an integer type.
6223 */
6224 static bool
is_integer_type(enum tgsi_return_type type)6225 is_integer_type(enum tgsi_return_type type)
6226 {
6227 switch (type) {
6228 case TGSI_RETURN_TYPE_SINT:
6229 case TGSI_RETURN_TYPE_UINT:
6230 return true;
6231 case TGSI_RETURN_TYPE_FLOAT:
6232 case TGSI_RETURN_TYPE_UNORM:
6233 case TGSI_RETURN_TYPE_SNORM:
6234 return false;
6235 case TGSI_RETURN_TYPE_COUNT:
6236 default:
6237 assert(!"is_integer_type: Unknown tgsi_return_type");
6238 return false;
6239 }
6240 }
6241
6242
6243 /**
6244 * Emit declarations for resources.
6245 * XXX When we're sure that all TGSI shaders will be generated with
6246 * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may
6247 * rework this code.
6248 */
6249 static bool
emit_resource_declarations(struct svga_shader_emitter_v10 * emit)6250 emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
6251 {
6252 unsigned i;
6253
6254 /* Emit resource decl for each sampler */
6255 for (i = 0; i < emit->num_samplers; i++) {
6256 if (!(emit->info.samplers_declared & (1 << i)))
6257 continue;
6258
6259 VGPU10OpcodeToken0 opcode0;
6260 VGPU10OperandToken0 operand0;
6261 VGPU10ResourceReturnTypeToken return_type;
6262 VGPU10_RESOURCE_RETURN_TYPE rt;
6263
6264 opcode0.value = 0;
6265 opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;
6266 if (emit->sampler_view[i] || !emit->key.tex[i].sampler_view) {
6267 opcode0.resourceDimension =
6268 tgsi_texture_to_resource_dimension(emit->sampler_target[i],
6269 emit->key.tex[i].num_samples,
6270 emit->key.tex[i].is_array,
6271 false);
6272 }
6273 else {
6274 opcode0.resourceDimension =
6275 pipe_texture_to_resource_dimension(emit->key.tex[i].target,
6276 emit->key.tex[i].num_samples,
6277 emit->key.tex[i].is_array,
6278 false);
6279 }
6280 opcode0.sampleCount = emit->key.tex[i].num_samples;
6281 operand0.value = 0;
6282 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6283 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
6284 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6285 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6286
6287 #if 1
6288 /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */
6289 STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1);
6290 STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1);
6291 STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1);
6292 STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1);
6293 STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1);
6294 assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT);
6295 if (emit->sampler_view[i] || !emit->key.tex[i].sampler_view) {
6296 rt = emit->sampler_return_type[i] + 1;
6297 }
6298 else {
6299 rt = emit->key.tex[i].sampler_return_type;
6300 }
6301 #else
6302 switch (emit->sampler_return_type[i]) {
6303 case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break;
6304 case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break;
6305 case TGSI_RETURN_TYPE_SINT: rt = VGPU10_RETURN_TYPE_SINT; break;
6306 case TGSI_RETURN_TYPE_UINT: rt = VGPU10_RETURN_TYPE_UINT; break;
6307 case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break;
6308 case TGSI_RETURN_TYPE_COUNT:
6309 default:
6310 rt = VGPU10_RETURN_TYPE_FLOAT;
6311 assert(!"emit_resource_declarations: Unknown tgsi_return_type");
6312 }
6313 #endif
6314
6315 return_type.value = 0;
6316 return_type.component0 = rt;
6317 return_type.component1 = rt;
6318 return_type.component2 = rt;
6319 return_type.component3 = rt;
6320
6321 begin_emit_instruction(emit);
6322 emit_dword(emit, opcode0.value);
6323 emit_dword(emit, operand0.value);
6324 emit_dword(emit, i);
6325 emit_dword(emit, return_type.value);
6326 end_emit_instruction(emit);
6327 }
6328
6329 return true;
6330 }
6331
6332
6333 /**
6334 * Emit instruction to declare uav for the shader image
6335 */
6336 static void
emit_image_declarations(struct svga_shader_emitter_v10 * emit)6337 emit_image_declarations(struct svga_shader_emitter_v10 *emit)
6338 {
6339 unsigned i = 0;
6340 unsigned unit = 0;
6341 unsigned uav_mask = 0;
6342
6343 /* Emit uav decl for each image */
6344 for (i = 0; i < emit->num_images; i++, unit++) {
6345
6346 /* Find the unit index of the next declared image.
6347 */
6348 while (!(emit->image_mask & (1 << unit))) {
6349 unit++;
6350 }
6351
6352 VGPU10OpcodeToken0 opcode0;
6353 VGPU10OperandToken0 operand0;
6354 VGPU10ResourceReturnTypeToken return_type;
6355
6356 /* If the corresponding uav for the image is already declared,
6357 * skip this image declaration.
6358 */
6359 if (uav_mask & (1 << emit->key.images[unit].uav_index))
6360 continue;
6361
6362 opcode0.value = 0;
6363 opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_TYPED;
6364 opcode0.uavResourceDimension =
6365 tgsi_texture_to_resource_dimension(emit->image[unit].Resource,
6366 0, emit->key.images[unit].is_array,
6367 true);
6368
6369 if (emit->key.images[unit].is_single_layer &&
6370 emit->key.images[unit].resource_target == PIPE_TEXTURE_3D) {
6371 opcode0.uavResourceDimension = VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
6372 }
6373
6374 /* Declare the uav as global coherent if the shader includes memory
6375 * barrier instructions.
6376 */
6377 opcode0.globallyCoherent =
6378 (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
6379
6380 operand0.value = 0;
6381 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6382 operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
6383 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6384 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6385
6386 return_type.value = 0;
6387 return_type.component0 =
6388 return_type.component1 =
6389 return_type.component2 =
6390 return_type.component3 = emit->key.images[unit].return_type + 1;
6391
6392 assert(emit->key.images[unit].uav_index != SVGA3D_INVALID_ID);
6393 begin_emit_instruction(emit);
6394 emit_dword(emit, opcode0.value);
6395 emit_dword(emit, operand0.value);
6396 emit_dword(emit, emit->key.images[unit].uav_index);
6397 emit_dword(emit, return_type.value);
6398 end_emit_instruction(emit);
6399
6400 /* Mark the uav is already declared */
6401 uav_mask |= 1 << emit->key.images[unit].uav_index;
6402 }
6403
6404 emit->uav_declared |= uav_mask;
6405 }
6406
6407
6408 /**
6409 * Emit instruction to declare uav for the shader buffer
6410 */
6411 static void
emit_shader_buf_declarations(struct svga_shader_emitter_v10 * emit)6412 emit_shader_buf_declarations(struct svga_shader_emitter_v10 *emit)
6413 {
6414 unsigned i;
6415 unsigned uav_mask = 0;
6416
6417 /* Emit uav decl for each shader buffer */
6418 for (i = 0; i < emit->num_shader_bufs; i++) {
6419 VGPU10OpcodeToken0 opcode0;
6420 VGPU10OperandToken0 operand0;
6421
6422 if (emit->raw_shaderbufs & (1 << i)) {
6423 emit_rawbuf_declaration(emit, i + emit->raw_shaderbuf_srv_start_index);
6424 continue;
6425 }
6426
6427 /* If the corresponding uav for the shader buf is already declared,
6428 * skip this shader buffer declaration.
6429 */
6430 if (uav_mask & (1 << emit->key.shader_buf_uav_index[i]))
6431 continue;
6432
6433 opcode0.value = 0;
6434 opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_RAW;
6435
6436 /* Declare the uav as global coherent if the shader includes memory
6437 * barrier instructions.
6438 */
6439 opcode0.globallyCoherent =
6440 (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
6441
6442 operand0.value = 0;
6443 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6444 operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
6445 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6446 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6447
6448 assert(emit->key.shader_buf_uav_index[i] != SVGA3D_INVALID_ID);
6449 begin_emit_instruction(emit);
6450 emit_dword(emit, opcode0.value);
6451 emit_dword(emit, operand0.value);
6452 emit_dword(emit, emit->key.shader_buf_uav_index[i]);
6453 end_emit_instruction(emit);
6454
6455 /* Mark the uav is already declared */
6456 uav_mask |= 1 << emit->key.shader_buf_uav_index[i];
6457 }
6458
6459 emit->uav_declared |= uav_mask;
6460 }
6461
6462
6463 /**
6464 * Emit instruction to declare thread group shared memory(tgsm) for shared memory
6465 */
6466 static void
emit_memory_declarations(struct svga_shader_emitter_v10 * emit)6467 emit_memory_declarations(struct svga_shader_emitter_v10 *emit)
6468 {
6469 if (emit->cs.shared_memory_declared) {
6470 VGPU10OpcodeToken0 opcode0;
6471 VGPU10OperandToken0 operand0;
6472
6473 opcode0.value = 0;
6474 opcode0.opcodeType = VGPU10_OPCODE_DCL_TGSM_RAW;
6475
6476 /* Declare the uav as global coherent if the shader includes memory
6477 * barrier instructions.
6478 */
6479 opcode0.globallyCoherent =
6480 (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
6481
6482 operand0.value = 0;
6483 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6484 operand0.operandType = VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY;
6485 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6486 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6487
6488 begin_emit_instruction(emit);
6489 emit_dword(emit, opcode0.value);
6490 emit_dword(emit, operand0.value);
6491
6492 /* Current state tracker only declares one shared memory for GLSL.
6493 * Use index 0 for this shared memory.
6494 */
6495 emit_dword(emit, 0);
6496 emit_dword(emit, emit->key.cs.mem_size); /* byte Count */
6497 end_emit_instruction(emit);
6498 }
6499 }
6500
6501
6502 /**
6503 * Emit instruction to declare uav for atomic buffers
6504 */
6505 static void
emit_atomic_buf_declarations(struct svga_shader_emitter_v10 * emit)6506 emit_atomic_buf_declarations(struct svga_shader_emitter_v10 *emit)
6507 {
6508 unsigned atomic_bufs_mask = emit->atomic_bufs_mask;
6509 unsigned uav_mask = 0;
6510
6511 /* Emit uav decl for each atomic buffer */
6512 while (atomic_bufs_mask) {
6513 unsigned buf_index = u_bit_scan(&atomic_bufs_mask);
6514 unsigned uav_index = emit->key.atomic_buf_uav_index[buf_index];
6515
6516 /* If the corresponding uav for the shader buf is already declared,
6517 * skip this shader buffer declaration.
6518 */
6519 if (uav_mask & (1 << uav_index))
6520 continue;
6521
6522 VGPU10OpcodeToken0 opcode0;
6523 VGPU10OperandToken0 operand0;
6524
6525 assert(uav_index != SVGA3D_INVALID_ID);
6526
6527 opcode0.value = 0;
6528 opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_RAW;
6529 opcode0.uavResourceDimension = VGPU10_RESOURCE_DIMENSION_BUFFER;
6530
6531 /* Declare the uav as global coherent if the shader includes memory
6532 * barrier instructions.
6533 */
6534 opcode0.globallyCoherent =
6535 (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
6536 opcode0.uavHasCounter = 1;
6537
6538 operand0.value = 0;
6539 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6540 operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
6541 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6542 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6543
6544 begin_emit_instruction(emit);
6545 emit_dword(emit, opcode0.value);
6546 emit_dword(emit, operand0.value);
6547 emit_dword(emit, uav_index);
6548 end_emit_instruction(emit);
6549
6550 /* Mark the uav is already declared */
6551 uav_mask |= 1 << uav_index;
6552 }
6553
6554 emit->uav_declared |= uav_mask;
6555
6556 /* Allocate immediates to be used for index to the atomic buffers */
6557 unsigned j = 0;
6558 for (unsigned i = 0; i <= emit->num_atomic_bufs / 4; i++, j+=4) {
6559 alloc_immediate_int4(emit, j+0, j+1, j+2, j+3);
6560 }
6561
6562 /* Allocate immediates for the atomic counter index */
6563 for (; j <= emit->max_atomic_counter_index; j+=4) {
6564 alloc_immediate_int4(emit, j+0, j+1, j+2, j+3);
6565 }
6566 }
6567
6568
6569 /**
6570 * Emit instruction with n=1, 2 or 3 source registers.
6571 */
6572 static void
emit_instruction_opn(struct svga_shader_emitter_v10 * emit,unsigned opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src1,const struct tgsi_full_src_register * src2,const struct tgsi_full_src_register * src3,bool saturate,bool precise)6573 emit_instruction_opn(struct svga_shader_emitter_v10 *emit,
6574 unsigned opcode,
6575 const struct tgsi_full_dst_register *dst,
6576 const struct tgsi_full_src_register *src1,
6577 const struct tgsi_full_src_register *src2,
6578 const struct tgsi_full_src_register *src3,
6579 bool saturate, bool precise)
6580 {
6581 begin_emit_instruction(emit);
6582 emit_opcode_precise(emit, opcode, saturate, precise);
6583 emit_dst_register(emit, dst);
6584 emit_src_register(emit, src1);
6585 if (src2) {
6586 emit_src_register(emit, src2);
6587 }
6588 if (src3) {
6589 emit_src_register(emit, src3);
6590 }
6591 end_emit_instruction(emit);
6592 }
6593
6594 static void
emit_instruction_op1(struct svga_shader_emitter_v10 * emit,unsigned opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6595 emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
6596 unsigned opcode,
6597 const struct tgsi_full_dst_register *dst,
6598 const struct tgsi_full_src_register *src)
6599 {
6600 emit_instruction_opn(emit, opcode, dst, src, NULL, NULL, false, false);
6601 }
6602
6603 static void
emit_instruction_op2(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src1,const struct tgsi_full_src_register * src2)6604 emit_instruction_op2(struct svga_shader_emitter_v10 *emit,
6605 VGPU10_OPCODE_TYPE opcode,
6606 const struct tgsi_full_dst_register *dst,
6607 const struct tgsi_full_src_register *src1,
6608 const struct tgsi_full_src_register *src2)
6609 {
6610 emit_instruction_opn(emit, opcode, dst, src1, src2, NULL, false, false);
6611 }
6612
6613 static void
emit_instruction_op3(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src1,const struct tgsi_full_src_register * src2,const struct tgsi_full_src_register * src3)6614 emit_instruction_op3(struct svga_shader_emitter_v10 *emit,
6615 VGPU10_OPCODE_TYPE opcode,
6616 const struct tgsi_full_dst_register *dst,
6617 const struct tgsi_full_src_register *src1,
6618 const struct tgsi_full_src_register *src2,
6619 const struct tgsi_full_src_register *src3)
6620 {
6621 emit_instruction_opn(emit, opcode, dst, src1, src2, src3, false, false);
6622 }
6623
6624 static void
emit_instruction_op0(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcode)6625 emit_instruction_op0(struct svga_shader_emitter_v10 *emit,
6626 VGPU10_OPCODE_TYPE opcode)
6627 {
6628 begin_emit_instruction(emit);
6629 emit_opcode(emit, opcode, false);
6630 end_emit_instruction(emit);
6631 }
6632
6633 /**
6634 * Tessellation inner/outer levels needs to be store into its
6635 * appropriate registers depending on prim_mode.
6636 */
6637 static void
store_tesslevels(struct svga_shader_emitter_v10 * emit)6638 store_tesslevels(struct svga_shader_emitter_v10 *emit)
6639 {
6640 int i;
6641
6642 /* tessellation levels are required input/out in hull shader.
6643 * emitting the inner/outer tessellation levels, either from
6644 * values provided in tcs or fallback default values which is 1.0
6645 */
6646 if (emit->key.tcs.prim_mode == MESA_PRIM_QUADS) {
6647 struct tgsi_full_src_register temp_src;
6648
6649 if (emit->tcs.inner.tgsi_index != INVALID_INDEX)
6650 temp_src = make_src_temp_reg(emit->tcs.inner.temp_index);
6651 else
6652 temp_src = make_immediate_reg_float(emit, 1.0f);
6653
6654 for (i = 0; i < 2; i++) {
6655 struct tgsi_full_src_register src =
6656 scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6657 struct tgsi_full_dst_register dst =
6658 make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index + i);
6659 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6660 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6661 }
6662
6663 if (emit->tcs.outer.tgsi_index != INVALID_INDEX)
6664 temp_src = make_src_temp_reg(emit->tcs.outer.temp_index);
6665 else
6666 temp_src = make_immediate_reg_float(emit, 1.0f);
6667
6668 for (i = 0; i < 4; i++) {
6669 struct tgsi_full_src_register src =
6670 scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6671 struct tgsi_full_dst_register dst =
6672 make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i);
6673 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6674 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6675 }
6676 }
6677 else if (emit->key.tcs.prim_mode == MESA_PRIM_TRIANGLES) {
6678 struct tgsi_full_src_register temp_src;
6679
6680 if (emit->tcs.inner.tgsi_index != INVALID_INDEX)
6681 temp_src = make_src_temp_reg(emit->tcs.inner.temp_index);
6682 else
6683 temp_src = make_immediate_reg_float(emit, 1.0f);
6684
6685 struct tgsi_full_src_register src =
6686 scalar_src(&temp_src, TGSI_SWIZZLE_X);
6687 struct tgsi_full_dst_register dst =
6688 make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index);
6689 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6690 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6691
6692 if (emit->tcs.outer.tgsi_index != INVALID_INDEX)
6693 temp_src = make_src_temp_reg(emit->tcs.outer.temp_index);
6694 else
6695 temp_src = make_immediate_reg_float(emit, 1.0f);
6696
6697 for (i = 0; i < 3; i++) {
6698 struct tgsi_full_src_register src =
6699 scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6700 struct tgsi_full_dst_register dst =
6701 make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i);
6702 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6703 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6704 }
6705 }
6706 else if (emit->key.tcs.prim_mode == MESA_PRIM_LINES) {
6707 if (emit->tcs.outer.tgsi_index != INVALID_INDEX) {
6708 struct tgsi_full_src_register temp_src =
6709 make_src_temp_reg(emit->tcs.outer.temp_index);
6710 for (i = 0; i < 2; i++) {
6711 struct tgsi_full_src_register src =
6712 scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6713 struct tgsi_full_dst_register dst =
6714 make_dst_reg(TGSI_FILE_OUTPUT,
6715 emit->tcs.outer.out_index + i);
6716 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6717 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6718 }
6719 }
6720 }
6721 else {
6722 debug_printf("Unsupported primitive type");
6723 }
6724 }
6725
6726
6727 /**
6728 * Emit the actual clip distance instructions to be used for clipping
6729 * by copying the clip distance from the temporary registers to the
6730 * CLIPDIST registers written with the enabled planes mask.
6731 * Also copy the clip distance from the temporary to the clip distance
6732 * shadow copy register which will be referenced by the input shader
6733 */
6734 static void
emit_clip_distance_instructions(struct svga_shader_emitter_v10 * emit)6735 emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
6736 {
6737 struct tgsi_full_src_register tmp_clip_dist_src;
6738 struct tgsi_full_dst_register clip_dist_dst;
6739
6740 unsigned i;
6741 unsigned clip_plane_enable = emit->key.clip_plane_enable;
6742 unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index;
6743 int num_written_clipdist = emit->info.num_written_clipdistance;
6744
6745 assert(emit->clip_dist_out_index != INVALID_INDEX);
6746 assert(emit->clip_dist_tmp_index != INVALID_INDEX);
6747
6748 /**
6749 * Temporary reset the temporary clip dist register index so
6750 * that the copy to the real clip dist register will not
6751 * attempt to copy to the temporary register again
6752 */
6753 emit->clip_dist_tmp_index = INVALID_INDEX;
6754
6755 for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) {
6756
6757 tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i);
6758
6759 /**
6760 * copy to the shadow copy for use by varying variable and
6761 * stream output. All clip distances
6762 * will be written regardless of the enabled clipping planes.
6763 */
6764 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
6765 emit->clip_dist_so_index + i);
6766
6767 /* MOV clip_dist_so, tmp_clip_dist */
6768 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
6769 &tmp_clip_dist_src);
6770
6771 /**
6772 * copy those clip distances to enabled clipping planes
6773 * to CLIPDIST registers for clipping
6774 */
6775 if (clip_plane_enable & 0xf) {
6776 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
6777 emit->clip_dist_out_index + i);
6778 clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf);
6779
6780 /* MOV CLIPDIST, tmp_clip_dist */
6781 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
6782 &tmp_clip_dist_src);
6783 }
6784 /* four clip planes per clip register */
6785 clip_plane_enable >>= 4;
6786 }
6787 /**
6788 * set the temporary clip dist register index back to the
6789 * temporary index for the next vertex
6790 */
6791 emit->clip_dist_tmp_index = clip_dist_tmp_index;
6792 }
6793
6794 /* Declare clip distance output registers for user-defined clip planes
6795 * or the TGSI_CLIPVERTEX output.
6796 */
6797 static void
emit_clip_distance_declarations(struct svga_shader_emitter_v10 * emit)6798 emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit)
6799 {
6800 unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
6801 unsigned index = emit->num_outputs;
6802 unsigned plane_mask;
6803
6804 assert(emit->unit != PIPE_SHADER_FRAGMENT);
6805 assert(num_clip_planes <= 8);
6806
6807 if (emit->clip_mode != CLIP_LEGACY &&
6808 emit->clip_mode != CLIP_VERTEX) {
6809 return;
6810 }
6811
6812 if (num_clip_planes == 0)
6813 return;
6814
6815 /* Convert clip vertex to clip distances only in the last vertex stage */
6816 if (!emit->key.last_vertex_stage)
6817 return;
6818
6819 /* Declare one or two clip output registers. The number of components
6820 * in the mask reflects the number of clip planes. For example, if 5
6821 * clip planes are needed, we'll declare outputs similar to:
6822 * dcl_output_siv o2.xyzw, clip_distance
6823 * dcl_output_siv o3.x, clip_distance
6824 */
6825 emit->clip_dist_out_index = index; /* save the starting clip dist reg index */
6826
6827 plane_mask = (1 << num_clip_planes) - 1;
6828 if (plane_mask & 0xf) {
6829 unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
6830 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index,
6831 VGPU10_NAME_CLIP_DISTANCE, cmask, true,
6832 SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
6833 emit->num_outputs++;
6834 }
6835 if (plane_mask & 0xf0) {
6836 unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
6837 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1,
6838 VGPU10_NAME_CLIP_DISTANCE, cmask, true,
6839 SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
6840 emit->num_outputs++;
6841 }
6842 }
6843
6844
6845 /**
6846 * Emit the instructions for writing to the clip distance registers
6847 * to handle legacy/automatic clip planes.
6848 * For each clip plane, the distance is the dot product of the vertex
6849 * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients.
6850 * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE
6851 * output registers already declared.
6852 */
6853 static void
emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 * emit,unsigned vpos_tmp_index)6854 emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit,
6855 unsigned vpos_tmp_index)
6856 {
6857 unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
6858
6859 assert(emit->clip_mode == CLIP_LEGACY);
6860 assert(num_clip_planes <= 8);
6861
6862 assert(emit->unit == PIPE_SHADER_VERTEX ||
6863 emit->unit == PIPE_SHADER_GEOMETRY ||
6864 emit->unit == PIPE_SHADER_TESS_EVAL);
6865
6866 for (i = 0; i < num_clip_planes; i++) {
6867 struct tgsi_full_dst_register dst;
6868 struct tgsi_full_src_register plane_src, vpos_src;
6869 unsigned reg_index = emit->clip_dist_out_index + i / 4;
6870 unsigned comp = i % 4;
6871 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
6872
6873 /* create dst, src regs */
6874 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
6875 dst = writemask_dst(&dst, writemask);
6876
6877 plane_src = make_src_const_reg(emit->clip_plane_const[i]);
6878 vpos_src = make_src_temp_reg(vpos_tmp_index);
6879
6880 /* DP4 clip_dist, plane, vpos */
6881 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
6882 &plane_src, &vpos_src);
6883 }
6884 }
6885
6886
6887 /**
6888 * Emit the instructions for computing the clip distance results from
6889 * the clip vertex temporary.
6890 * For each clip plane, the distance is the dot product of the clip vertex
6891 * position (found in a temp reg) and the clip plane coefficients.
6892 */
6893 static void
emit_clip_vertex_instructions(struct svga_shader_emitter_v10 * emit)6894 emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit)
6895 {
6896 const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable);
6897 unsigned i;
6898 struct tgsi_full_dst_register dst;
6899 struct tgsi_full_src_register clipvert_src;
6900 const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index;
6901
6902 assert(emit->unit == PIPE_SHADER_VERTEX ||
6903 emit->unit == PIPE_SHADER_GEOMETRY ||
6904 emit->unit == PIPE_SHADER_TESS_EVAL);
6905
6906 assert(emit->clip_mode == CLIP_VERTEX);
6907
6908 clipvert_src = make_src_temp_reg(clip_vertex_tmp);
6909
6910 for (i = 0; i < num_clip; i++) {
6911 struct tgsi_full_src_register plane_src;
6912 unsigned reg_index = emit->clip_dist_out_index + i / 4;
6913 unsigned comp = i % 4;
6914 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
6915
6916 /* create dst, src regs */
6917 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
6918 dst = writemask_dst(&dst, writemask);
6919
6920 plane_src = make_src_const_reg(emit->clip_plane_const[i]);
6921
6922 /* DP4 clip_dist, plane, vpos */
6923 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
6924 &plane_src, &clipvert_src);
6925 }
6926
6927 /* copy temporary clip vertex register to the clip vertex register */
6928
6929 assert(emit->clip_vertex_out_index != INVALID_INDEX);
6930
6931 /**
6932 * temporary reset the temporary clip vertex register index so
6933 * that copy to the clip vertex register will not attempt
6934 * to copy to the temporary register again
6935 */
6936 emit->clip_vertex_tmp_index = INVALID_INDEX;
6937
6938 /* MOV clip_vertex, clip_vertex_tmp */
6939 dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index);
6940 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
6941 &dst, &clipvert_src);
6942
6943 /**
6944 * set the temporary clip vertex register index back to the
6945 * temporary index for the next vertex
6946 */
6947 emit->clip_vertex_tmp_index = clip_vertex_tmp;
6948 }
6949
6950 /**
6951 * Emit code to convert RGBA to BGRA
6952 */
6953 static void
emit_swap_r_b(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6954 emit_swap_r_b(struct svga_shader_emitter_v10 *emit,
6955 const struct tgsi_full_dst_register *dst,
6956 const struct tgsi_full_src_register *src)
6957 {
6958 struct tgsi_full_src_register bgra_src =
6959 swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W);
6960
6961 begin_emit_instruction(emit);
6962 emit_opcode(emit, VGPU10_OPCODE_MOV, false);
6963 emit_dst_register(emit, dst);
6964 emit_src_register(emit, &bgra_src);
6965 end_emit_instruction(emit);
6966 }
6967
6968
6969 /** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */
6970 static void
emit_puint_to_snorm(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6971 emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit,
6972 const struct tgsi_full_dst_register *dst,
6973 const struct tgsi_full_src_register *src)
6974 {
6975 struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f);
6976 struct tgsi_full_src_register two =
6977 make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f);
6978 struct tgsi_full_src_register neg_two =
6979 make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
6980
6981 unsigned val_tmp = get_temp_index(emit);
6982 struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp);
6983 struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp);
6984
6985 unsigned bias_tmp = get_temp_index(emit);
6986 struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp);
6987 struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp);
6988
6989 /* val = src * 2.0 */
6990 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, src, &two);
6991
6992 /* bias = src > 0.5 */
6993 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, src, &half);
6994
6995 /* bias = bias & -2.0 */
6996 emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst,
6997 &bias_src, &neg_two);
6998
6999 /* dst = val + bias */
7000 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst,
7001 &val_src, &bias_src);
7002
7003 free_temp_indexes(emit);
7004 }
7005
7006
7007 /** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */
7008 static void
emit_puint_to_uscaled(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)7009 emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit,
7010 const struct tgsi_full_dst_register *dst,
7011 const struct tgsi_full_src_register *src)
7012 {
7013 struct tgsi_full_src_register scale =
7014 make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f);
7015
7016 /* dst = src * scale */
7017 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale);
7018 }
7019
7020
7021 /** Convert from R32_UINT to 10_10_10_2_sscaled */
7022 static void
emit_puint_to_sscaled(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)7023 emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit,
7024 const struct tgsi_full_dst_register *dst,
7025 const struct tgsi_full_src_register *src)
7026 {
7027 struct tgsi_full_src_register lshift =
7028 make_immediate_reg_int4(emit, 22, 12, 2, 0);
7029 struct tgsi_full_src_register rshift =
7030 make_immediate_reg_int4(emit, 22, 22, 22, 30);
7031
7032 struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X);
7033
7034 unsigned tmp = get_temp_index(emit);
7035 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7036 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7037
7038 /*
7039 * r = (pixel << 22) >> 22; # signed int in [511, -512]
7040 * g = (pixel << 12) >> 22; # signed int in [511, -512]
7041 * b = (pixel << 2) >> 22; # signed int in [511, -512]
7042 * a = (pixel << 0) >> 30; # signed int in [1, -2]
7043 * dst = i_to_f(r,g,b,a); # convert to float
7044 */
7045 emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst,
7046 &src_xxxx, &lshift);
7047 emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst,
7048 &tmp_src, &rshift);
7049 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src);
7050
7051 free_temp_indexes(emit);
7052 }
7053
7054
7055 /**
7056 * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction.
7057 */
7058 static bool
emit_arl_uarl(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7059 emit_arl_uarl(struct svga_shader_emitter_v10 *emit,
7060 const struct tgsi_full_instruction *inst)
7061 {
7062 unsigned index = inst->Dst[0].Register.Index;
7063 struct tgsi_full_dst_register dst;
7064 VGPU10_OPCODE_TYPE opcode;
7065
7066 assert(index < MAX_VGPU10_ADDR_REGS);
7067 dst = make_dst_temp_reg(emit->address_reg_index[index]);
7068 dst = writemask_dst(&dst, inst->Dst[0].Register.WriteMask);
7069
7070 /* ARL dst, s0
7071 * Translates into:
7072 * FTOI address_tmp, s0
7073 *
7074 * UARL dst, s0
7075 * Translates into:
7076 * MOV address_tmp, s0
7077 */
7078 if (inst->Instruction.Opcode == TGSI_OPCODE_ARL)
7079 opcode = VGPU10_OPCODE_FTOI;
7080 else
7081 opcode = VGPU10_OPCODE_MOV;
7082
7083 emit_instruction_op1(emit, opcode, &dst, &inst->Src[0]);
7084
7085 return true;
7086 }
7087
7088
7089 /**
7090 * Emit code for TGSI_OPCODE_CAL instruction.
7091 */
7092 static bool
emit_cal(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7093 emit_cal(struct svga_shader_emitter_v10 *emit,
7094 const struct tgsi_full_instruction *inst)
7095 {
7096 unsigned label = inst->Label.Label;
7097 VGPU10OperandToken0 operand;
7098 operand.value = 0;
7099 operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
7100
7101 begin_emit_instruction(emit);
7102 emit_dword(emit, operand.value);
7103 emit_dword(emit, label);
7104 end_emit_instruction(emit);
7105
7106 return true;
7107 }
7108
7109
7110 /**
7111 * Emit code for TGSI_OPCODE_IABS instruction.
7112 */
7113 static bool
emit_iabs(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7114 emit_iabs(struct svga_shader_emitter_v10 *emit,
7115 const struct tgsi_full_instruction *inst)
7116 {
7117 /* dst.x = (src0.x < 0) ? -src0.x : src0.x
7118 * dst.y = (src0.y < 0) ? -src0.y : src0.y
7119 * dst.z = (src0.z < 0) ? -src0.z : src0.z
7120 * dst.w = (src0.w < 0) ? -src0.w : src0.w
7121 *
7122 * Translates into
7123 * IMAX dst, src, neg(src)
7124 */
7125 struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
7126 emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0],
7127 &inst->Src[0], &neg_src);
7128
7129 return true;
7130 }
7131
7132
7133 /**
7134 * Emit code for TGSI_OPCODE_CMP instruction.
7135 */
7136 static bool
emit_cmp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7137 emit_cmp(struct svga_shader_emitter_v10 *emit,
7138 const struct tgsi_full_instruction *inst)
7139 {
7140 /* dst.x = (src0.x < 0) ? src1.x : src2.x
7141 * dst.y = (src0.y < 0) ? src1.y : src2.y
7142 * dst.z = (src0.z < 0) ? src1.z : src2.z
7143 * dst.w = (src0.w < 0) ? src1.w : src2.w
7144 *
7145 * Translates into
7146 * LT tmp, src0, 0.0
7147 * MOVC dst, tmp, src1, src2
7148 */
7149 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7150 unsigned tmp = get_temp_index(emit);
7151 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7152 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7153
7154 emit_instruction_opn(emit, VGPU10_OPCODE_LT, &tmp_dst,
7155 &inst->Src[0], &zero, NULL, false,
7156 inst->Instruction.Precise);
7157 emit_instruction_opn(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0],
7158 &tmp_src, &inst->Src[1], &inst->Src[2],
7159 inst->Instruction.Saturate, false);
7160
7161 free_temp_indexes(emit);
7162
7163 return true;
7164 }
7165
7166
7167 /**
7168 * Emit code for TGSI_OPCODE_DST instruction.
7169 */
7170 static bool
emit_dst(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7171 emit_dst(struct svga_shader_emitter_v10 *emit,
7172 const struct tgsi_full_instruction *inst)
7173 {
7174 /*
7175 * dst.x = 1
7176 * dst.y = src0.y * src1.y
7177 * dst.z = src0.z
7178 * dst.w = src1.w
7179 */
7180
7181 struct tgsi_full_src_register s0_yyyy =
7182 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
7183 struct tgsi_full_src_register s0_zzzz =
7184 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
7185 struct tgsi_full_src_register s1_yyyy =
7186 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
7187 struct tgsi_full_src_register s1_wwww =
7188 scalar_src(&inst->Src[1], TGSI_SWIZZLE_W);
7189
7190 /*
7191 * If dst and either src0 and src1 are the same we need
7192 * to create a temporary for it and insert a extra move.
7193 */
7194 unsigned tmp_move = get_temp_index(emit);
7195 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
7196 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
7197
7198 /* MOV dst.x, 1.0 */
7199 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7200 struct tgsi_full_dst_register dst_x =
7201 writemask_dst(&move_dst, TGSI_WRITEMASK_X);
7202 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7203
7204 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one);
7205 }
7206
7207 /* MUL dst.y, s0.y, s1.y */
7208 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7209 struct tgsi_full_dst_register dst_y =
7210 writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
7211
7212 emit_instruction_opn(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy,
7213 &s1_yyyy, NULL, inst->Instruction.Saturate,
7214 inst->Instruction.Precise);
7215 }
7216
7217 /* MOV dst.z, s0.z */
7218 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7219 struct tgsi_full_dst_register dst_z =
7220 writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
7221
7222 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7223 &dst_z, &s0_zzzz, NULL, NULL,
7224 inst->Instruction.Saturate,
7225 inst->Instruction.Precise);
7226 }
7227
7228 /* MOV dst.w, s1.w */
7229 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7230 struct tgsi_full_dst_register dst_w =
7231 writemask_dst(&move_dst, TGSI_WRITEMASK_W);
7232
7233 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7234 &dst_w, &s1_wwww, NULL, NULL,
7235 inst->Instruction.Saturate,
7236 inst->Instruction.Precise);
7237 }
7238
7239 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
7240 free_temp_indexes(emit);
7241
7242 return true;
7243 }
7244
7245
7246 /**
7247 * A helper function to return the stream index as specified in
7248 * the immediate register
7249 */
7250 static inline unsigned
find_stream_index(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * src)7251 find_stream_index(struct svga_shader_emitter_v10 *emit,
7252 const struct tgsi_full_src_register *src)
7253 {
7254 return emit->immediates[src->Register.Index][src->Register.SwizzleX].Int;
7255 }
7256
7257
7258 /**
7259 * Emit code for TGSI_OPCODE_ENDPRIM (GS only)
7260 */
7261 static bool
emit_endprim(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7262 emit_endprim(struct svga_shader_emitter_v10 *emit,
7263 const struct tgsi_full_instruction *inst)
7264 {
7265 assert(emit->unit == PIPE_SHADER_GEOMETRY);
7266
7267 begin_emit_instruction(emit);
7268 if (emit->version >= 50) {
7269 unsigned streamIndex = find_stream_index(emit, &inst->Src[0]);
7270
7271 if (emit->info.num_stream_output_components[streamIndex] == 0) {
7272 /**
7273 * If there is no output for this stream, discard this instruction.
7274 */
7275 emit->discard_instruction = true;
7276 }
7277 else {
7278 emit_opcode(emit, VGPU10_OPCODE_CUT_STREAM, false);
7279 assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE);
7280 emit_stream_register(emit, streamIndex);
7281 }
7282 }
7283 else {
7284 emit_opcode(emit, VGPU10_OPCODE_CUT, false);
7285 }
7286 end_emit_instruction(emit);
7287 return true;
7288 }
7289
7290
7291 /**
7292 * Emit code for TGSI_OPCODE_EX2 (2^x) instruction.
7293 */
7294 static bool
emit_ex2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7295 emit_ex2(struct svga_shader_emitter_v10 *emit,
7296 const struct tgsi_full_instruction *inst)
7297 {
7298 /* Note that TGSI_OPCODE_EX2 computes only one value from src.x
7299 * while VGPU10 computes four values.
7300 *
7301 * dst = EX2(src):
7302 * dst.xyzw = 2.0 ^ src.x
7303 */
7304
7305 struct tgsi_full_src_register src_xxxx =
7306 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7307 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7308
7309 /* EXP tmp, s0.xxxx */
7310 emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx,
7311 NULL, NULL,
7312 inst->Instruction.Saturate,
7313 inst->Instruction.Precise);
7314
7315 return true;
7316 }
7317
7318
7319 /**
7320 * Emit code for TGSI_OPCODE_EXP instruction.
7321 */
7322 static bool
emit_exp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7323 emit_exp(struct svga_shader_emitter_v10 *emit,
7324 const struct tgsi_full_instruction *inst)
7325 {
7326 /*
7327 * dst.x = 2 ^ floor(s0.x)
7328 * dst.y = s0.x - floor(s0.x)
7329 * dst.z = 2 ^ s0.x
7330 * dst.w = 1.0
7331 */
7332
7333 struct tgsi_full_src_register src_xxxx =
7334 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
7335 unsigned tmp = get_temp_index(emit);
7336 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7337 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7338
7339 /*
7340 * If dst and src are the same we need to create
7341 * a temporary for it and insert a extra move.
7342 */
7343 unsigned tmp_move = get_temp_index(emit);
7344 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
7345 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
7346
7347 /* only use X component of temp reg */
7348 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7349 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7350
7351 /* ROUND_NI tmp.x, s0.x */
7352 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
7353 &src_xxxx); /* round to -infinity */
7354
7355 /* EXP dst.x, tmp.x */
7356 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7357 struct tgsi_full_dst_register dst_x =
7358 writemask_dst(&move_dst, TGSI_WRITEMASK_X);
7359
7360 emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src,
7361 NULL, NULL,
7362 inst->Instruction.Saturate,
7363 inst->Instruction.Precise);
7364 }
7365
7366 /* ADD dst.y, s0.x, -tmp */
7367 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7368 struct tgsi_full_dst_register dst_y =
7369 writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
7370 struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src);
7371
7372 emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx,
7373 &neg_tmp_src, NULL,
7374 inst->Instruction.Saturate,
7375 inst->Instruction.Precise);
7376 }
7377
7378 /* EXP dst.z, s0.x */
7379 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7380 struct tgsi_full_dst_register dst_z =
7381 writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
7382
7383 emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx,
7384 NULL, NULL,
7385 inst->Instruction.Saturate,
7386 inst->Instruction.Precise);
7387 }
7388
7389 /* MOV dst.w, 1.0 */
7390 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7391 struct tgsi_full_dst_register dst_w =
7392 writemask_dst(&move_dst, TGSI_WRITEMASK_W);
7393 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7394
7395 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
7396 }
7397
7398 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
7399
7400 free_temp_indexes(emit);
7401
7402 return true;
7403 }
7404
7405
7406 /**
7407 * Emit code for TGSI_OPCODE_IF instruction.
7408 */
7409 static bool
emit_if(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * src)7410 emit_if(struct svga_shader_emitter_v10 *emit,
7411 const struct tgsi_full_src_register *src)
7412 {
7413 VGPU10OpcodeToken0 opcode0;
7414
7415 /* The src register should be a scalar */
7416 assert(src->Register.SwizzleX == src->Register.SwizzleY &&
7417 src->Register.SwizzleX == src->Register.SwizzleZ &&
7418 src->Register.SwizzleX == src->Register.SwizzleW);
7419
7420 /* The only special thing here is that we need to set the
7421 * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if
7422 * src.x is non-zero.
7423 */
7424 opcode0.value = 0;
7425 opcode0.opcodeType = VGPU10_OPCODE_IF;
7426 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
7427
7428 begin_emit_instruction(emit);
7429 emit_dword(emit, opcode0.value);
7430 emit_src_register(emit, src);
7431 end_emit_instruction(emit);
7432
7433 return true;
7434 }
7435
7436
7437 /**
7438 * Emit code for conditional discard instruction (discard fragment if any of
7439 * the register components are negative).
7440 */
7441 static bool
emit_cond_discard(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7442 emit_cond_discard(struct svga_shader_emitter_v10 *emit,
7443 const struct tgsi_full_instruction *inst)
7444 {
7445 unsigned tmp = get_temp_index(emit);
7446 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7447 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7448
7449 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7450
7451 struct tgsi_full_dst_register tmp_dst_x =
7452 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7453 struct tgsi_full_src_register tmp_src_xxxx =
7454 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7455
7456 /* tmp = src[0] < 0.0 */
7457 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], &zero);
7458
7459 if (!same_swizzle_terms(&inst->Src[0])) {
7460 /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to
7461 * logically OR the swizzle terms. Most uses of this conditional
7462 * discard instruction only test one channel so it's good to
7463 * avoid these extra steps.
7464 */
7465 struct tgsi_full_src_register tmp_src_yyyy =
7466 scalar_src(&tmp_src, TGSI_SWIZZLE_Y);
7467 struct tgsi_full_src_register tmp_src_zzzz =
7468 scalar_src(&tmp_src, TGSI_SWIZZLE_Z);
7469 struct tgsi_full_src_register tmp_src_wwww =
7470 scalar_src(&tmp_src, TGSI_SWIZZLE_W);
7471
7472 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
7473 &tmp_src_yyyy);
7474 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
7475 &tmp_src_zzzz);
7476 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
7477 &tmp_src_wwww);
7478 }
7479
7480 begin_emit_instruction(emit);
7481 emit_discard_opcode(emit, true); /* discard if src0.x is non-zero */
7482 emit_src_register(emit, &tmp_src_xxxx);
7483 end_emit_instruction(emit);
7484
7485 free_temp_indexes(emit);
7486
7487 return true;
7488 }
7489
7490
7491 /**
7492 * Emit code for the unconditional discard instruction.
7493 */
7494 static bool
emit_discard(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7495 emit_discard(struct svga_shader_emitter_v10 *emit,
7496 const struct tgsi_full_instruction *inst)
7497 {
7498 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7499
7500 /* DISCARD if 0.0 is zero */
7501 begin_emit_instruction(emit);
7502 emit_discard_opcode(emit, false);
7503 emit_src_register(emit, &zero);
7504 end_emit_instruction(emit);
7505
7506 return true;
7507 }
7508
7509
7510 /**
7511 * Emit code for TGSI_OPCODE_LG2 instruction.
7512 */
7513 static bool
emit_lg2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7514 emit_lg2(struct svga_shader_emitter_v10 *emit,
7515 const struct tgsi_full_instruction *inst)
7516 {
7517 /* Note that TGSI_OPCODE_LG2 computes only one value from src.x
7518 * while VGPU10 computes four values.
7519 *
7520 * dst = LG2(src):
7521 * dst.xyzw = log2(src.x)
7522 */
7523
7524 struct tgsi_full_src_register src_xxxx =
7525 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7526 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7527
7528 /* LOG tmp, s0.xxxx */
7529 emit_instruction_opn(emit, VGPU10_OPCODE_LOG,
7530 &inst->Dst[0], &src_xxxx, NULL, NULL,
7531 inst->Instruction.Saturate,
7532 inst->Instruction.Precise);
7533
7534 return true;
7535 }
7536
7537
7538 /**
7539 * Emit code for TGSI_OPCODE_LIT instruction.
7540 */
7541 static bool
emit_lit(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7542 emit_lit(struct svga_shader_emitter_v10 *emit,
7543 const struct tgsi_full_instruction *inst)
7544 {
7545 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7546
7547 /*
7548 * If dst and src are the same we need to create
7549 * a temporary for it and insert a extra move.
7550 */
7551 unsigned tmp_move = get_temp_index(emit);
7552 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
7553 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
7554
7555 /*
7556 * dst.x = 1
7557 * dst.y = max(src.x, 0)
7558 * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
7559 * dst.w = 1
7560 */
7561
7562 /* MOV dst.x, 1.0 */
7563 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7564 struct tgsi_full_dst_register dst_x =
7565 writemask_dst(&move_dst, TGSI_WRITEMASK_X);
7566 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one);
7567 }
7568
7569 /* MOV dst.w, 1.0 */
7570 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7571 struct tgsi_full_dst_register dst_w =
7572 writemask_dst(&move_dst, TGSI_WRITEMASK_W);
7573 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
7574 }
7575
7576 /* MAX dst.y, src.x, 0.0 */
7577 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7578 struct tgsi_full_dst_register dst_y =
7579 writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
7580 struct tgsi_full_src_register zero =
7581 make_immediate_reg_float(emit, 0.0f);
7582 struct tgsi_full_src_register src_xxxx =
7583 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7584 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7585
7586 emit_instruction_opn(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx,
7587 &zero, NULL, inst->Instruction.Saturate, false);
7588 }
7589
7590 /*
7591 * tmp1 = clamp(src.w, -128, 128);
7592 * MAX tmp1, src.w, -128
7593 * MIN tmp1, tmp1, 128
7594 *
7595 * tmp2 = max(tmp2, 0);
7596 * MAX tmp2, src.y, 0
7597 *
7598 * tmp1 = pow(tmp2, tmp1);
7599 * LOG tmp2, tmp2
7600 * MUL tmp1, tmp2, tmp1
7601 * EXP tmp1, tmp1
7602 *
7603 * tmp1 = (src.w == 0) ? 1 : tmp1;
7604 * EQ tmp2, 0, src.w
7605 * MOVC tmp1, tmp2, 1.0, tmp1
7606 *
7607 * dst.z = (0 < src.x) ? tmp1 : 0;
7608 * LT tmp2, 0, src.x
7609 * MOVC dst.z, tmp2, tmp1, 0.0
7610 */
7611 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7612 struct tgsi_full_dst_register dst_z =
7613 writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
7614
7615 unsigned tmp1 = get_temp_index(emit);
7616 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
7617 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
7618 unsigned tmp2 = get_temp_index(emit);
7619 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
7620 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
7621
7622 struct tgsi_full_src_register src_xxxx =
7623 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
7624 struct tgsi_full_src_register src_yyyy =
7625 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
7626 struct tgsi_full_src_register src_wwww =
7627 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
7628
7629 struct tgsi_full_src_register zero =
7630 make_immediate_reg_float(emit, 0.0f);
7631 struct tgsi_full_src_register lowerbound =
7632 make_immediate_reg_float(emit, -128.0f);
7633 struct tgsi_full_src_register upperbound =
7634 make_immediate_reg_float(emit, 128.0f);
7635
7636 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww,
7637 &lowerbound);
7638 emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src,
7639 &upperbound);
7640 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy,
7641 &zero);
7642
7643 /* POW tmp1, tmp2, tmp1 */
7644 /* LOG tmp2, tmp2 */
7645 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src);
7646
7647 /* MUL tmp1, tmp2, tmp1 */
7648 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src,
7649 &tmp1_src);
7650
7651 /* EXP tmp1, tmp1 */
7652 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src);
7653
7654 /* EQ tmp2, 0, src.w */
7655 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, &src_wwww);
7656 /* MOVC tmp1.z, tmp2, tmp1, 1.0 */
7657 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst,
7658 &tmp2_src, &one, &tmp1_src);
7659
7660 /* LT tmp2, 0, src.x */
7661 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, &src_xxxx);
7662 /* MOVC dst.z, tmp2, tmp1, 0.0 */
7663 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z,
7664 &tmp2_src, &tmp1_src, &zero);
7665 }
7666
7667 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
7668 free_temp_indexes(emit);
7669
7670 return true;
7671 }
7672
7673
7674 /**
7675 * Emit Level Of Detail Query (LODQ) instruction.
7676 */
7677 static bool
emit_lodq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7678 emit_lodq(struct svga_shader_emitter_v10 *emit,
7679 const struct tgsi_full_instruction *inst)
7680 {
7681 const uint unit = inst->Src[1].Register.Index;
7682
7683 assert(emit->version >= 41);
7684
7685 /* LOD dst, coord, resource, sampler */
7686 begin_emit_instruction(emit);
7687 emit_opcode(emit, VGPU10_OPCODE_LOD, false);
7688 emit_dst_register(emit, &inst->Dst[0]);
7689 emit_src_register(emit, &inst->Src[0]); /* coord */
7690 emit_resource_register(emit, unit);
7691 emit_sampler_register(emit, unit);
7692 end_emit_instruction(emit);
7693
7694 return true;
7695 }
7696
7697
7698 /**
7699 * Emit code for TGSI_OPCODE_LOG instruction.
7700 */
7701 static bool
emit_log(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7702 emit_log(struct svga_shader_emitter_v10 *emit,
7703 const struct tgsi_full_instruction *inst)
7704 {
7705 /*
7706 * dst.x = floor(lg2(abs(s0.x)))
7707 * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x))))
7708 * dst.z = lg2(abs(s0.x))
7709 * dst.w = 1.0
7710 */
7711
7712 struct tgsi_full_src_register src_xxxx =
7713 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
7714 unsigned tmp = get_temp_index(emit);
7715 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7716 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7717 struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx);
7718
7719 /* only use X component of temp reg */
7720 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7721 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7722
7723 /* LOG tmp.x, abs(s0.x) */
7724 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
7725 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &abs_src_xxxx);
7726 }
7727
7728 /* MOV dst.z, tmp.x */
7729 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7730 struct tgsi_full_dst_register dst_z =
7731 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z);
7732
7733 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7734 &dst_z, &tmp_src, NULL, NULL,
7735 inst->Instruction.Saturate, false);
7736 }
7737
7738 /* FLR tmp.x, tmp.x */
7739 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
7740 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, &tmp_src);
7741 }
7742
7743 /* MOV dst.x, tmp.x */
7744 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7745 struct tgsi_full_dst_register dst_x =
7746 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
7747
7748 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7749 &dst_x, &tmp_src, NULL, NULL,
7750 inst->Instruction.Saturate, false);
7751 }
7752
7753 /* EXP tmp.x, tmp.x */
7754 /* DIV dst.y, abs(s0.x), tmp.x */
7755 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7756 struct tgsi_full_dst_register dst_y =
7757 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
7758
7759 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src);
7760 emit_instruction_opn(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx,
7761 &tmp_src, NULL, inst->Instruction.Saturate, false);
7762 }
7763
7764 /* MOV dst.w, 1.0 */
7765 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7766 struct tgsi_full_dst_register dst_w =
7767 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W);
7768 struct tgsi_full_src_register one =
7769 make_immediate_reg_float(emit, 1.0f);
7770
7771 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
7772 }
7773
7774 free_temp_indexes(emit);
7775
7776 return true;
7777 }
7778
7779
7780 /**
7781 * Emit code for TGSI_OPCODE_LRP instruction.
7782 */
7783 static bool
emit_lrp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7784 emit_lrp(struct svga_shader_emitter_v10 *emit,
7785 const struct tgsi_full_instruction *inst)
7786 {
7787 /* dst = LRP(s0, s1, s2):
7788 * dst = s0 * (s1 - s2) + s2
7789 * Translates into:
7790 * SUB tmp, s1, s2; tmp = s1 - s2
7791 * MAD dst, s0, tmp, s2; dst = s0 * t1 + s2
7792 */
7793 unsigned tmp = get_temp_index(emit);
7794 struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp);
7795 struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp);
7796 struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]);
7797
7798 /* ADD tmp, s1, -s2 */
7799 emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_tmp,
7800 &inst->Src[1], &neg_src2, NULL, false,
7801 inst->Instruction.Precise);
7802
7803 /* MAD dst, s1, tmp, s3 */
7804 emit_instruction_opn(emit, VGPU10_OPCODE_MAD, &inst->Dst[0],
7805 &inst->Src[0], &src_tmp, &inst->Src[2],
7806 inst->Instruction.Saturate,
7807 inst->Instruction.Precise);
7808
7809 free_temp_indexes(emit);
7810
7811 return true;
7812 }
7813
7814
7815 /**
7816 * Emit code for TGSI_OPCODE_POW instruction.
7817 */
7818 static bool
emit_pow(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7819 emit_pow(struct svga_shader_emitter_v10 *emit,
7820 const struct tgsi_full_instruction *inst)
7821 {
7822 /* Note that TGSI_OPCODE_POW computes only one value from src0.x and
7823 * src1.x while VGPU10 computes four values.
7824 *
7825 * dst = POW(src0, src1):
7826 * dst.xyzw = src0.x ^ src1.x
7827 */
7828 unsigned tmp = get_temp_index(emit);
7829 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7830 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7831 struct tgsi_full_src_register src0_xxxx =
7832 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7833 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7834 struct tgsi_full_src_register src1_xxxx =
7835 swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7836 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7837
7838 /* LOG tmp, s0.xxxx */
7839 emit_instruction_opn(emit, VGPU10_OPCODE_LOG,
7840 &tmp_dst, &src0_xxxx, NULL, NULL,
7841 false, inst->Instruction.Precise);
7842
7843 /* MUL tmp, tmp, s1.xxxx */
7844 emit_instruction_opn(emit, VGPU10_OPCODE_MUL,
7845 &tmp_dst, &tmp_src, &src1_xxxx, NULL,
7846 false, inst->Instruction.Precise);
7847
7848 /* EXP tmp, s0.xxxx */
7849 emit_instruction_opn(emit, VGPU10_OPCODE_EXP,
7850 &inst->Dst[0], &tmp_src, NULL, NULL,
7851 inst->Instruction.Saturate,
7852 inst->Instruction.Precise);
7853
7854 /* free tmp */
7855 free_temp_indexes(emit);
7856
7857 return true;
7858 }
7859
7860
7861 /**
7862 * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction.
7863 */
7864 static bool
emit_rcp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7865 emit_rcp(struct svga_shader_emitter_v10 *emit,
7866 const struct tgsi_full_instruction *inst)
7867 {
7868 if (emit->version >= 50) {
7869 /* use new RCP instruction. But VGPU10_OPCODE_RCP is component-wise
7870 * while TGSI_OPCODE_RCP computes dst.xyzw = 1.0 / src.xxxx so we need
7871 * to manipulate the src register's swizzle.
7872 */
7873 struct tgsi_full_src_register src = inst->Src[0];
7874 src.Register.SwizzleY =
7875 src.Register.SwizzleZ =
7876 src.Register.SwizzleW = src.Register.SwizzleX;
7877
7878 begin_emit_instruction(emit);
7879 emit_opcode_precise(emit, VGPU10_OPCODE_RCP,
7880 inst->Instruction.Saturate,
7881 inst->Instruction.Precise);
7882 emit_dst_register(emit, &inst->Dst[0]);
7883 emit_src_register(emit, &src);
7884 end_emit_instruction(emit);
7885 }
7886 else {
7887 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7888
7889 unsigned tmp = get_temp_index(emit);
7890 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7891 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7892
7893 struct tgsi_full_dst_register tmp_dst_x =
7894 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7895 struct tgsi_full_src_register tmp_src_xxxx =
7896 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7897
7898 /* DIV tmp.x, 1.0, s0 */
7899 emit_instruction_opn(emit, VGPU10_OPCODE_DIV,
7900 &tmp_dst_x, &one, &inst->Src[0], NULL,
7901 false, inst->Instruction.Precise);
7902
7903 /* MOV dst, tmp.xxxx */
7904 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7905 &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7906 inst->Instruction.Saturate,
7907 inst->Instruction.Precise);
7908
7909 free_temp_indexes(emit);
7910 }
7911
7912 return true;
7913 }
7914
7915
7916 /**
7917 * Emit code for TGSI_OPCODE_RSQ instruction.
7918 */
7919 static bool
emit_rsq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7920 emit_rsq(struct svga_shader_emitter_v10 *emit,
7921 const struct tgsi_full_instruction *inst)
7922 {
7923 /* dst = RSQ(src):
7924 * dst.xyzw = 1 / sqrt(src.x)
7925 * Translates into:
7926 * RSQ tmp, src.x
7927 * MOV dst, tmp.xxxx
7928 */
7929
7930 unsigned tmp = get_temp_index(emit);
7931 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7932 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7933
7934 struct tgsi_full_dst_register tmp_dst_x =
7935 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7936 struct tgsi_full_src_register tmp_src_xxxx =
7937 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7938
7939 /* RSQ tmp, src.x */
7940 emit_instruction_opn(emit, VGPU10_OPCODE_RSQ,
7941 &tmp_dst_x, &inst->Src[0], NULL, NULL,
7942 false, inst->Instruction.Precise);
7943
7944 /* MOV dst, tmp.xxxx */
7945 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7946 &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7947 inst->Instruction.Saturate,
7948 inst->Instruction.Precise);
7949
7950 /* free tmp */
7951 free_temp_indexes(emit);
7952
7953 return true;
7954 }
7955
7956
7957 /**
7958 * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
7959 */
7960 static bool
emit_seq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7961 emit_seq(struct svga_shader_emitter_v10 *emit,
7962 const struct tgsi_full_instruction *inst)
7963 {
7964 /* dst = SEQ(s0, s1):
7965 * dst = s0 == s1 ? 1.0 : 0.0 (per component)
7966 * Translates into:
7967 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp)
7968 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
7969 */
7970 unsigned tmp = get_temp_index(emit);
7971 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7972 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7973 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7974 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7975
7976 /* EQ tmp, s0, s1 */
7977 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0],
7978 &inst->Src[1]);
7979
7980 /* MOVC dst, tmp, one, zero */
7981 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7982 &one, &zero);
7983
7984 free_temp_indexes(emit);
7985
7986 return true;
7987 }
7988
7989
7990 /**
7991 * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction.
7992 */
7993 static bool
emit_sge(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7994 emit_sge(struct svga_shader_emitter_v10 *emit,
7995 const struct tgsi_full_instruction *inst)
7996 {
7997 /* dst = SGE(s0, s1):
7998 * dst = s0 >= s1 ? 1.0 : 0.0 (per component)
7999 * Translates into:
8000 * GE tmp, s0, s1; tmp = s0 >= s1 : 0xffffffff : 0 (per comp)
8001 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
8002 */
8003 unsigned tmp = get_temp_index(emit);
8004 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8005 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8006 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8007 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8008
8009 /* GE tmp, s0, s1 */
8010 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0],
8011 &inst->Src[1]);
8012
8013 /* MOVC dst, tmp, one, zero */
8014 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
8015 &one, &zero);
8016
8017 free_temp_indexes(emit);
8018
8019 return true;
8020 }
8021
8022
8023 /**
8024 * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction.
8025 */
8026 static bool
emit_sgt(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8027 emit_sgt(struct svga_shader_emitter_v10 *emit,
8028 const struct tgsi_full_instruction *inst)
8029 {
8030 /* dst = SGT(s0, s1):
8031 * dst = s0 > s1 ? 1.0 : 0.0 (per component)
8032 * Translates into:
8033 * LT tmp, s1, s0; tmp = s1 < s0 ? 0xffffffff : 0 (per comp)
8034 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
8035 */
8036 unsigned tmp = get_temp_index(emit);
8037 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8038 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8039 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8040 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8041
8042 /* LT tmp, s1, s0 */
8043 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1],
8044 &inst->Src[0]);
8045
8046 /* MOVC dst, tmp, one, zero */
8047 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
8048 &one, &zero);
8049
8050 free_temp_indexes(emit);
8051
8052 return true;
8053 }
8054
8055
8056 /**
8057 * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions.
8058 */
8059 static bool
emit_sincos(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8060 emit_sincos(struct svga_shader_emitter_v10 *emit,
8061 const struct tgsi_full_instruction *inst)
8062 {
8063 unsigned tmp = get_temp_index(emit);
8064 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8065 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8066
8067 struct tgsi_full_src_register tmp_src_xxxx =
8068 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
8069 struct tgsi_full_dst_register tmp_dst_x =
8070 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
8071
8072 begin_emit_instruction(emit);
8073 emit_opcode(emit, VGPU10_OPCODE_SINCOS, false);
8074
8075 if(inst->Instruction.Opcode == TGSI_OPCODE_SIN)
8076 {
8077 emit_dst_register(emit, &tmp_dst_x); /* first destination register */
8078 emit_null_dst_register(emit); /* second destination register */
8079 }
8080 else {
8081 emit_null_dst_register(emit);
8082 emit_dst_register(emit, &tmp_dst_x);
8083 }
8084
8085 emit_src_register(emit, &inst->Src[0]);
8086 end_emit_instruction(emit);
8087
8088 emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
8089 &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
8090 inst->Instruction.Saturate,
8091 inst->Instruction.Precise);
8092
8093 free_temp_indexes(emit);
8094
8095 return true;
8096 }
8097
8098
8099 /**
8100 * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction.
8101 */
8102 static bool
emit_sle(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8103 emit_sle(struct svga_shader_emitter_v10 *emit,
8104 const struct tgsi_full_instruction *inst)
8105 {
8106 /* dst = SLE(s0, s1):
8107 * dst = s0 <= s1 ? 1.0 : 0.0 (per component)
8108 * Translates into:
8109 * GE tmp, s1, s0; tmp = s1 >= s0 : 0xffffffff : 0 (per comp)
8110 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
8111 */
8112 unsigned tmp = get_temp_index(emit);
8113 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8114 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8115 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8116 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8117
8118 /* GE tmp, s1, s0 */
8119 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1],
8120 &inst->Src[0]);
8121
8122 /* MOVC dst, tmp, one, zero */
8123 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
8124 &one, &zero);
8125
8126 free_temp_indexes(emit);
8127
8128 return true;
8129 }
8130
8131
8132 /**
8133 * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction.
8134 */
8135 static bool
emit_slt(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8136 emit_slt(struct svga_shader_emitter_v10 *emit,
8137 const struct tgsi_full_instruction *inst)
8138 {
8139 /* dst = SLT(s0, s1):
8140 * dst = s0 < s1 ? 1.0 : 0.0 (per component)
8141 * Translates into:
8142 * LT tmp, s0, s1; tmp = s0 < s1 ? 0xffffffff : 0 (per comp)
8143 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
8144 */
8145 unsigned tmp = get_temp_index(emit);
8146 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8147 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8148 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8149 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8150
8151 /* LT tmp, s0, s1 */
8152 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
8153 &inst->Src[1]);
8154
8155 /* MOVC dst, tmp, one, zero */
8156 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
8157 &one, &zero);
8158
8159 free_temp_indexes(emit);
8160
8161 return true;
8162 }
8163
8164
8165 /**
8166 * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction.
8167 */
8168 static bool
emit_sne(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8169 emit_sne(struct svga_shader_emitter_v10 *emit,
8170 const struct tgsi_full_instruction *inst)
8171 {
8172 /* dst = SNE(s0, s1):
8173 * dst = s0 != s1 ? 1.0 : 0.0 (per component)
8174 * Translates into:
8175 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp)
8176 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component)
8177 */
8178 unsigned tmp = get_temp_index(emit);
8179 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8180 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8181 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8182 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8183
8184 /* NE tmp, s0, s1 */
8185 emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0],
8186 &inst->Src[1]);
8187
8188 /* MOVC dst, tmp, one, zero */
8189 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
8190 &one, &zero);
8191
8192 free_temp_indexes(emit);
8193
8194 return true;
8195 }
8196
8197
8198 /**
8199 * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction.
8200 */
8201 static bool
emit_ssg(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8202 emit_ssg(struct svga_shader_emitter_v10 *emit,
8203 const struct tgsi_full_instruction *inst)
8204 {
8205 /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
8206 * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
8207 * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
8208 * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
8209 * Translates into:
8210 * LT tmp1, src, zero; tmp1 = src < zero ? 0xffffffff : 0 (per comp)
8211 * MOVC tmp2, tmp1, -1.0, 0.0; tmp2 = tmp1 ? -1.0 : 0.0 (per component)
8212 * LT tmp1, zero, src; tmp1 = zero < src ? 0xffffffff : 0 (per comp)
8213 * MOVC dst, tmp1, 1.0, tmp2; dst = tmp1 ? 1.0 : tmp2 (per component)
8214 */
8215 struct tgsi_full_src_register zero =
8216 make_immediate_reg_float(emit, 0.0f);
8217 struct tgsi_full_src_register one =
8218 make_immediate_reg_float(emit, 1.0f);
8219 struct tgsi_full_src_register neg_one =
8220 make_immediate_reg_float(emit, -1.0f);
8221
8222 unsigned tmp1 = get_temp_index(emit);
8223 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
8224 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
8225
8226 unsigned tmp2 = get_temp_index(emit);
8227 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
8228 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
8229
8230 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0],
8231 &zero);
8232 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src,
8233 &neg_one, &zero);
8234 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero,
8235 &inst->Src[0]);
8236 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src,
8237 &one, &tmp2_src);
8238
8239 free_temp_indexes(emit);
8240
8241 return true;
8242 }
8243
8244
8245 /**
8246 * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction.
8247 */
8248 static bool
emit_issg(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8249 emit_issg(struct svga_shader_emitter_v10 *emit,
8250 const struct tgsi_full_instruction *inst)
8251 {
8252 /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
8253 * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
8254 * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
8255 * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
8256 * Translates into:
8257 * ILT tmp1, src, 0 tmp1 = src < 0 ? -1 : 0 (per component)
8258 * ILT tmp2, 0, src tmp2 = 0 < src ? -1 : 0 (per component)
8259 * IADD dst, tmp1, neg(tmp2) dst = tmp1 - tmp2 (per component)
8260 */
8261 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8262
8263 unsigned tmp1 = get_temp_index(emit);
8264 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
8265 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
8266
8267 unsigned tmp2 = get_temp_index(emit);
8268 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
8269 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
8270
8271 struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src);
8272
8273 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst,
8274 &inst->Src[0], &zero);
8275 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst,
8276 &zero, &inst->Src[0]);
8277 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0],
8278 &tmp1_src, &neg_tmp2);
8279
8280 free_temp_indexes(emit);
8281
8282 return true;
8283 }
8284
8285
8286 /**
8287 * Emit a comparison instruction. The dest register will get
8288 * 0 or ~0 values depending on the outcome of comparing src0 to src1.
8289 */
8290 static void
emit_comparison(struct svga_shader_emitter_v10 * emit,SVGA3dCmpFunc func,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src0,const struct tgsi_full_src_register * src1)8291 emit_comparison(struct svga_shader_emitter_v10 *emit,
8292 SVGA3dCmpFunc func,
8293 const struct tgsi_full_dst_register *dst,
8294 const struct tgsi_full_src_register *src0,
8295 const struct tgsi_full_src_register *src1)
8296 {
8297 struct tgsi_full_src_register immediate;
8298 VGPU10OpcodeToken0 opcode0;
8299 bool swapSrc = false;
8300
8301 /* Sanity checks for svga vs. gallium enums */
8302 STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1));
8303 STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1));
8304
8305 opcode0.value = 0;
8306
8307 switch (func) {
8308 case SVGA3D_CMP_NEVER:
8309 immediate = make_immediate_reg_int(emit, 0);
8310 /* MOV dst, {0} */
8311 begin_emit_instruction(emit);
8312 emit_dword(emit, VGPU10_OPCODE_MOV);
8313 emit_dst_register(emit, dst);
8314 emit_src_register(emit, &immediate);
8315 end_emit_instruction(emit);
8316 return;
8317 case SVGA3D_CMP_ALWAYS:
8318 immediate = make_immediate_reg_int(emit, -1);
8319 /* MOV dst, {-1} */
8320 begin_emit_instruction(emit);
8321 emit_dword(emit, VGPU10_OPCODE_MOV);
8322 emit_dst_register(emit, dst);
8323 emit_src_register(emit, &immediate);
8324 end_emit_instruction(emit);
8325 return;
8326 case SVGA3D_CMP_LESS:
8327 opcode0.opcodeType = VGPU10_OPCODE_LT;
8328 break;
8329 case SVGA3D_CMP_EQUAL:
8330 opcode0.opcodeType = VGPU10_OPCODE_EQ;
8331 break;
8332 case SVGA3D_CMP_LESSEQUAL:
8333 opcode0.opcodeType = VGPU10_OPCODE_GE;
8334 swapSrc = true;
8335 break;
8336 case SVGA3D_CMP_GREATER:
8337 opcode0.opcodeType = VGPU10_OPCODE_LT;
8338 swapSrc = true;
8339 break;
8340 case SVGA3D_CMP_NOTEQUAL:
8341 opcode0.opcodeType = VGPU10_OPCODE_NE;
8342 break;
8343 case SVGA3D_CMP_GREATEREQUAL:
8344 opcode0.opcodeType = VGPU10_OPCODE_GE;
8345 break;
8346 default:
8347 assert(!"Unexpected comparison mode");
8348 opcode0.opcodeType = VGPU10_OPCODE_EQ;
8349 }
8350
8351 begin_emit_instruction(emit);
8352 emit_dword(emit, opcode0.value);
8353 emit_dst_register(emit, dst);
8354 if (swapSrc) {
8355 emit_src_register(emit, src1);
8356 emit_src_register(emit, src0);
8357 }
8358 else {
8359 emit_src_register(emit, src0);
8360 emit_src_register(emit, src1);
8361 }
8362 end_emit_instruction(emit);
8363 }
8364
8365
8366 /**
8367 * Get texel/address offsets for a texture instruction.
8368 */
8369 static void
get_texel_offsets(const struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,int offsets[3])8370 get_texel_offsets(const struct svga_shader_emitter_v10 *emit,
8371 const struct tgsi_full_instruction *inst, int offsets[3])
8372 {
8373 if (inst->Texture.NumOffsets == 1) {
8374 /* According to OpenGL Shader Language spec the offsets are only
8375 * fetched from a previously-declared immediate/literal.
8376 */
8377 const struct tgsi_texture_offset *off = inst->TexOffsets;
8378 const unsigned index = off[0].Index;
8379 const unsigned swizzleX = off[0].SwizzleX;
8380 const unsigned swizzleY = off[0].SwizzleY;
8381 const unsigned swizzleZ = off[0].SwizzleZ;
8382 const union tgsi_immediate_data *imm = emit->immediates[index];
8383
8384 assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE);
8385
8386 offsets[0] = imm[swizzleX].Int;
8387 offsets[1] = imm[swizzleY].Int;
8388 offsets[2] = imm[swizzleZ].Int;
8389 }
8390 else {
8391 offsets[0] = offsets[1] = offsets[2] = 0;
8392 }
8393 }
8394
8395
8396 /**
8397 * Set up the coordinate register for texture sampling.
8398 * When we're sampling from a RECT texture we have to scale the
8399 * unnormalized coordinate to a normalized coordinate.
8400 * We do that by multiplying the coordinate by an "extra" constant.
8401 * An alternative would be to use the RESINFO instruction to query the
8402 * texture's size.
8403 */
8404 static struct tgsi_full_src_register
setup_texcoord(struct svga_shader_emitter_v10 * emit,unsigned unit,const struct tgsi_full_src_register * coord)8405 setup_texcoord(struct svga_shader_emitter_v10 *emit,
8406 unsigned unit,
8407 const struct tgsi_full_src_register *coord)
8408 {
8409 if (emit->key.tex[unit].sampler_view && emit->key.tex[unit].unnormalized) {
8410 unsigned scale_index = emit->texcoord_scale_index[unit];
8411 unsigned tmp = get_temp_index(emit);
8412 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8413 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8414 struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index);
8415
8416 if (emit->key.tex[unit].texel_bias) {
8417 /* to fix texture coordinate rounding issue, 0.0001 offset is
8418 * been added. This fixes piglit test fbo-blit-scaled-linear. */
8419 struct tgsi_full_src_register offset =
8420 make_immediate_reg_float(emit, 0.0001f);
8421
8422 /* ADD tmp, coord, offset */
8423 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_dst,
8424 coord, &offset);
8425 /* MUL tmp, tmp, scale */
8426 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
8427 &tmp_src, &scale_src);
8428 }
8429 else {
8430 /* MUL tmp, coord, const[] */
8431 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
8432 coord, &scale_src);
8433 }
8434 return tmp_src;
8435 }
8436 else {
8437 /* use texcoord as-is */
8438 return *coord;
8439 }
8440 }
8441
8442
8443 /**
8444 * For SAMPLE_C instructions, emit the extra src register which indicates
8445 * the reference/comparision value.
8446 */
8447 static void
emit_tex_compare_refcoord(struct svga_shader_emitter_v10 * emit,enum tgsi_texture_type target,const struct tgsi_full_src_register * coord)8448 emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit,
8449 enum tgsi_texture_type target,
8450 const struct tgsi_full_src_register *coord)
8451 {
8452 struct tgsi_full_src_register coord_src_ref;
8453 int component;
8454
8455 assert(tgsi_is_shadow_target(target));
8456
8457 component = tgsi_util_get_shadow_ref_src_index(target) % 4;
8458 assert(component >= 0);
8459
8460 coord_src_ref = scalar_src(coord, component);
8461
8462 emit_src_register(emit, &coord_src_ref);
8463 }
8464
8465
8466 /**
8467 * Info for implementing texture swizzles.
8468 * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle()
8469 * functions use this to encapsulate the extra steps needed to perform
8470 * a texture swizzle, or shadow/depth comparisons.
8471 * The shadow/depth comparison is only done here if for the cases where
8472 * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare).
8473 */
8474 struct tex_swizzle_info
8475 {
8476 bool swizzled;
8477 bool shadow_compare;
8478 unsigned unit;
8479 enum tgsi_texture_type texture_target; /**< TGSI_TEXTURE_x */
8480 struct tgsi_full_src_register tmp_src;
8481 struct tgsi_full_dst_register tmp_dst;
8482 const struct tgsi_full_dst_register *inst_dst;
8483 const struct tgsi_full_src_register *coord_src;
8484 };
8485
8486
8487 /**
8488 * Do setup for handling texture swizzles or shadow compares.
8489 * \param unit the texture unit
8490 * \param inst the TGSI texture instruction
8491 * \param shadow_compare do shadow/depth comparison?
8492 * \param swz returns the swizzle info
8493 */
8494 static void
begin_tex_swizzle(struct svga_shader_emitter_v10 * emit,unsigned unit,const struct tgsi_full_instruction * inst,bool shadow_compare,struct tex_swizzle_info * swz)8495 begin_tex_swizzle(struct svga_shader_emitter_v10 *emit,
8496 unsigned unit,
8497 const struct tgsi_full_instruction *inst,
8498 bool shadow_compare,
8499 struct tex_swizzle_info *swz)
8500 {
8501 swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X ||
8502 emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y ||
8503 emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z ||
8504 emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W);
8505
8506 swz->shadow_compare = shadow_compare;
8507 swz->texture_target = inst->Texture.Texture;
8508
8509 if (swz->swizzled || shadow_compare) {
8510 /* Allocate temp register for the result of the SAMPLE instruction
8511 * and the source of the MOV/compare/swizzle instructions.
8512 */
8513 unsigned tmp = get_temp_index(emit);
8514 swz->tmp_src = make_src_temp_reg(tmp);
8515 swz->tmp_dst = make_dst_temp_reg(tmp);
8516
8517 swz->unit = unit;
8518 }
8519 swz->inst_dst = &inst->Dst[0];
8520 swz->coord_src = &inst->Src[0];
8521
8522 emit->shadow_compare_units |= shadow_compare << unit;
8523 }
8524
8525
8526 /**
8527 * Returns the register to put the SAMPLE instruction results into.
8528 * This will either be the original instruction dst reg (if no swizzle
8529 * and no shadow comparison) or a temporary reg if there is a swizzle.
8530 */
8531 static const struct tgsi_full_dst_register *
get_tex_swizzle_dst(const struct tex_swizzle_info * swz)8532 get_tex_swizzle_dst(const struct tex_swizzle_info *swz)
8533 {
8534 return (swz->swizzled || swz->shadow_compare)
8535 ? &swz->tmp_dst : swz->inst_dst;
8536 }
8537
8538
8539 /**
8540 * This emits the MOV instruction that actually implements a texture swizzle
8541 * and/or shadow comparison.
8542 */
8543 static void
end_tex_swizzle(struct svga_shader_emitter_v10 * emit,const struct tex_swizzle_info * swz)8544 end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
8545 const struct tex_swizzle_info *swz)
8546 {
8547 if (swz->shadow_compare) {
8548 /* Emit extra instructions to compare the fetched texel value against
8549 * a texture coordinate component. The result of the comparison
8550 * is 0.0 or 1.0.
8551 */
8552 struct tgsi_full_src_register coord_src;
8553 struct tgsi_full_src_register texel_src =
8554 scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X);
8555 struct tgsi_full_src_register one =
8556 make_immediate_reg_float(emit, 1.0f);
8557 /* convert gallium comparison func to SVGA comparison func */
8558 SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1;
8559
8560 int component =
8561 tgsi_util_get_shadow_ref_src_index(swz->texture_target) % 4;
8562 assert(component >= 0);
8563 coord_src = scalar_src(swz->coord_src, component);
8564
8565 /* COMPARE tmp, coord, texel */
8566 emit_comparison(emit, compare_func,
8567 &swz->tmp_dst, &coord_src, &texel_src);
8568
8569 /* AND dest, tmp, {1.0} */
8570 begin_emit_instruction(emit);
8571 emit_opcode(emit, VGPU10_OPCODE_AND, false);
8572 if (swz->swizzled) {
8573 emit_dst_register(emit, &swz->tmp_dst);
8574 }
8575 else {
8576 emit_dst_register(emit, swz->inst_dst);
8577 }
8578 emit_src_register(emit, &swz->tmp_src);
8579 emit_src_register(emit, &one);
8580 end_emit_instruction(emit);
8581 }
8582
8583 if (swz->swizzled) {
8584 unsigned swz_r = emit->key.tex[swz->unit].swizzle_r;
8585 unsigned swz_g = emit->key.tex[swz->unit].swizzle_g;
8586 unsigned swz_b = emit->key.tex[swz->unit].swizzle_b;
8587 unsigned swz_a = emit->key.tex[swz->unit].swizzle_a;
8588 unsigned writemask_0 = 0, writemask_1 = 0;
8589 bool int_tex = is_integer_type(emit->sampler_return_type[swz->unit]);
8590
8591 /* Swizzle w/out zero/one terms */
8592 struct tgsi_full_src_register src_swizzled =
8593 swizzle_src(&swz->tmp_src,
8594 swz_r < PIPE_SWIZZLE_0 ? swz_r : PIPE_SWIZZLE_X,
8595 swz_g < PIPE_SWIZZLE_0 ? swz_g : PIPE_SWIZZLE_Y,
8596 swz_b < PIPE_SWIZZLE_0 ? swz_b : PIPE_SWIZZLE_Z,
8597 swz_a < PIPE_SWIZZLE_0 ? swz_a : PIPE_SWIZZLE_W);
8598
8599 /* MOV dst, color(tmp).<swizzle> */
8600 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
8601 swz->inst_dst, &src_swizzled);
8602
8603 /* handle swizzle zero terms */
8604 writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) |
8605 ((swz_g == PIPE_SWIZZLE_0) << 1) |
8606 ((swz_b == PIPE_SWIZZLE_0) << 2) |
8607 ((swz_a == PIPE_SWIZZLE_0) << 3));
8608 writemask_0 &= swz->inst_dst->Register.WriteMask;
8609
8610 if (writemask_0) {
8611 struct tgsi_full_src_register zero = int_tex ?
8612 make_immediate_reg_int(emit, 0) :
8613 make_immediate_reg_float(emit, 0.0f);
8614 struct tgsi_full_dst_register dst =
8615 writemask_dst(swz->inst_dst, writemask_0);
8616
8617 /* MOV dst.writemask_0, {0,0,0,0} */
8618 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &zero);
8619 }
8620
8621 /* handle swizzle one terms */
8622 writemask_1 = (((swz_r == PIPE_SWIZZLE_1) << 0) |
8623 ((swz_g == PIPE_SWIZZLE_1) << 1) |
8624 ((swz_b == PIPE_SWIZZLE_1) << 2) |
8625 ((swz_a == PIPE_SWIZZLE_1) << 3));
8626 writemask_1 &= swz->inst_dst->Register.WriteMask;
8627
8628 if (writemask_1) {
8629 struct tgsi_full_src_register one = int_tex ?
8630 make_immediate_reg_int(emit, 1) :
8631 make_immediate_reg_float(emit, 1.0f);
8632 struct tgsi_full_dst_register dst =
8633 writemask_dst(swz->inst_dst, writemask_1);
8634
8635 /* MOV dst.writemask_1, {1,1,1,1} */
8636 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one);
8637 }
8638 }
8639 }
8640
8641
8642 /**
8643 * Emit code for TGSI_OPCODE_SAMPLE instruction.
8644 */
8645 static bool
emit_sample(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8646 emit_sample(struct svga_shader_emitter_v10 *emit,
8647 const struct tgsi_full_instruction *inst)
8648 {
8649 const unsigned resource_unit = inst->Src[1].Register.Index;
8650 const unsigned sampler_unit = inst->Src[2].Register.Index;
8651 struct tgsi_full_src_register coord;
8652 int offsets[3];
8653 struct tex_swizzle_info swz_info;
8654
8655 begin_tex_swizzle(emit, sampler_unit, inst, false, &swz_info);
8656
8657 get_texel_offsets(emit, inst, offsets);
8658
8659 coord = setup_texcoord(emit, resource_unit, &inst->Src[0]);
8660
8661 /* SAMPLE dst, coord(s0), resource, sampler */
8662 begin_emit_instruction(emit);
8663
8664 /* NOTE: for non-fragment shaders, we should use VGPU10_OPCODE_SAMPLE_L
8665 * with LOD=0. But our virtual GPU accepts this as-is.
8666 */
8667 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE,
8668 inst->Instruction.Saturate, offsets);
8669 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8670 emit_src_register(emit, &coord);
8671 emit_resource_register(emit, resource_unit);
8672 emit_sampler_register(emit, sampler_unit);
8673 end_emit_instruction(emit);
8674
8675 end_tex_swizzle(emit, &swz_info);
8676
8677 free_temp_indexes(emit);
8678
8679 return true;
8680 }
8681
8682
8683 /**
8684 * Check if a texture instruction is valid.
8685 * An example of an invalid texture instruction is doing shadow comparison
8686 * with an integer-valued texture.
8687 * If we detect an invalid texture instruction, we replace it with:
8688 * MOV dst, {1,1,1,1};
8689 * \return TRUE if valid, FALSE if invalid.
8690 */
8691 static bool
is_valid_tex_instruction(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8692 is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit,
8693 const struct tgsi_full_instruction *inst)
8694 {
8695 const unsigned unit = inst->Src[1].Register.Index;
8696 const enum tgsi_texture_type target = inst->Texture.Texture;
8697 bool valid = true;
8698
8699 if (tgsi_is_shadow_target(target) &&
8700 is_integer_type(emit->sampler_return_type[unit])) {
8701 debug_printf("Invalid SAMPLE_C with an integer texture!\n");
8702 valid = false;
8703 }
8704 /* XXX might check for other conditions in the future here */
8705
8706 if (!valid) {
8707 /* emit a MOV dst, {1,1,1,1} instruction. */
8708 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8709 begin_emit_instruction(emit);
8710 emit_opcode(emit, VGPU10_OPCODE_MOV, false);
8711 emit_dst_register(emit, &inst->Dst[0]);
8712 emit_src_register(emit, &one);
8713 end_emit_instruction(emit);
8714 }
8715
8716 return valid;
8717 }
8718
8719
8720 /**
8721 * Emit code for TGSI_OPCODE_TEX (simple texture lookup)
8722 */
8723 static bool
emit_tex(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8724 emit_tex(struct svga_shader_emitter_v10 *emit,
8725 const struct tgsi_full_instruction *inst)
8726 {
8727 const uint unit = inst->Src[1].Register.Index;
8728 const enum tgsi_texture_type target = inst->Texture.Texture;
8729 VGPU10_OPCODE_TYPE opcode;
8730 struct tgsi_full_src_register coord;
8731 int offsets[3];
8732 struct tex_swizzle_info swz_info;
8733 bool compare_in_shader;
8734
8735 /* check that the sampler returns a float */
8736 if (!is_valid_tex_instruction(emit, inst))
8737 return true;
8738
8739 compare_in_shader = tgsi_is_shadow_target(target) &&
8740 emit->key.tex[unit].compare_in_shader;
8741
8742 begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info);
8743
8744 get_texel_offsets(emit, inst, offsets);
8745
8746 coord = setup_texcoord(emit, unit, &inst->Src[0]);
8747
8748 /* SAMPLE dst, coord(s0), resource, sampler */
8749 begin_emit_instruction(emit);
8750
8751 if (tgsi_is_shadow_target(target) && !compare_in_shader)
8752 opcode = VGPU10_OPCODE_SAMPLE_C;
8753 else
8754 opcode = VGPU10_OPCODE_SAMPLE;
8755
8756 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8757 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8758 emit_src_register(emit, &coord);
8759 emit_resource_register(emit, unit);
8760 emit_sampler_register(emit, unit);
8761 if (opcode == VGPU10_OPCODE_SAMPLE_C) {
8762 emit_tex_compare_refcoord(emit, target, &coord);
8763 }
8764 end_emit_instruction(emit);
8765
8766 end_tex_swizzle(emit, &swz_info);
8767
8768 free_temp_indexes(emit);
8769
8770 return true;
8771 }
8772
8773 /**
8774 * Emit code for TGSI_OPCODE_TG4 (texture lookup for texture gather)
8775 */
8776 static bool
emit_tg4(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8777 emit_tg4(struct svga_shader_emitter_v10 *emit,
8778 const struct tgsi_full_instruction *inst)
8779 {
8780 const uint unit = inst->Src[2].Register.Index;
8781 struct tgsi_full_src_register src;
8782 struct tgsi_full_src_register offset_src, sampler, ref;
8783 int offsets[3];
8784
8785 /* check that the sampler returns a float */
8786 if (!is_valid_tex_instruction(emit, inst))
8787 return true;
8788
8789 if (emit->version >= 50) {
8790 unsigned target = inst->Texture.Texture;
8791 int index = inst->Src[1].Register.Index;
8792 const union tgsi_immediate_data *imm = emit->immediates[index];
8793 int select_comp = imm[inst->Src[1].Register.SwizzleX].Int;
8794 unsigned select_swizzle = PIPE_SWIZZLE_X;
8795
8796 if (!tgsi_is_shadow_target(target)) {
8797 switch (select_comp) {
8798 case 0:
8799 select_swizzle = emit->key.tex[unit].swizzle_r;
8800 break;
8801 case 1:
8802 select_swizzle = emit->key.tex[unit].swizzle_g;
8803 break;
8804 case 2:
8805 select_swizzle = emit->key.tex[unit].swizzle_b;
8806 break;
8807 case 3:
8808 select_swizzle = emit->key.tex[unit].swizzle_a;
8809 break;
8810 default:
8811 assert(!"Unexpected component in texture gather swizzle");
8812 }
8813 }
8814 else {
8815 select_swizzle = emit->key.tex[unit].swizzle_r;
8816 }
8817
8818 if (select_swizzle == PIPE_SWIZZLE_1) {
8819 src = make_immediate_reg_float(emit, 1.0);
8820 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8821 return true;
8822 }
8823 else if (select_swizzle == PIPE_SWIZZLE_0) {
8824 src = make_immediate_reg_float(emit, 0.0);
8825 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8826 return true;
8827 }
8828
8829 src = setup_texcoord(emit, unit, &inst->Src[0]);
8830
8831 /* GATHER4 dst, coord, resource, sampler */
8832 /* GATHER4_C dst, coord, resource, sampler ref */
8833 /* GATHER4_PO dst, coord, offset resource, sampler */
8834 /* GATHER4_PO_C dst, coord, offset resource, sampler, ref */
8835 begin_emit_instruction(emit);
8836 if (inst->Texture.NumOffsets == 1) {
8837 if (tgsi_is_shadow_target(target)) {
8838 emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO_C,
8839 inst->Instruction.Saturate);
8840 }
8841 else {
8842 emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO,
8843 inst->Instruction.Saturate);
8844 }
8845 }
8846 else {
8847 if (tgsi_is_shadow_target(target)) {
8848 emit_opcode(emit, VGPU10_OPCODE_GATHER4_C,
8849 inst->Instruction.Saturate);
8850 }
8851 else {
8852 emit_opcode(emit, VGPU10_OPCODE_GATHER4,
8853 inst->Instruction.Saturate);
8854 }
8855 }
8856
8857 emit_dst_register(emit, &inst->Dst[0]);
8858 emit_src_register(emit, &src);
8859 if (inst->Texture.NumOffsets == 1) {
8860 /* offset */
8861 offset_src = make_src_reg(inst->TexOffsets[0].File,
8862 inst->TexOffsets[0].Index);
8863 offset_src = swizzle_src(&offset_src, inst->TexOffsets[0].SwizzleX,
8864 inst->TexOffsets[0].SwizzleY,
8865 inst->TexOffsets[0].SwizzleZ,
8866 TGSI_SWIZZLE_W);
8867 emit_src_register(emit, &offset_src);
8868 }
8869
8870 /* resource */
8871 emit_resource_register(emit, unit);
8872
8873 /* sampler */
8874 sampler = make_src_reg(TGSI_FILE_SAMPLER,
8875 emit->key.tex[unit].sampler_index);
8876 sampler.Register.SwizzleX =
8877 sampler.Register.SwizzleY =
8878 sampler.Register.SwizzleZ =
8879 sampler.Register.SwizzleW = select_swizzle;
8880 emit_src_register(emit, &sampler);
8881
8882 if (tgsi_is_shadow_target(target)) {
8883 /* ref */
8884 if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
8885 ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8886 emit_tex_compare_refcoord(emit, target, &ref);
8887 }
8888 else {
8889 emit_tex_compare_refcoord(emit, target, &src);
8890 }
8891 }
8892
8893 end_emit_instruction(emit);
8894 free_temp_indexes(emit);
8895 }
8896 else {
8897 /* Only a single channel is supported in SM4_1 and we report
8898 * PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 1.
8899 * Only the 0th component will be gathered.
8900 */
8901 switch (emit->key.tex[unit].swizzle_r) {
8902 case PIPE_SWIZZLE_X:
8903 get_texel_offsets(emit, inst, offsets);
8904 src = setup_texcoord(emit, unit, &inst->Src[0]);
8905
8906 /* Gather dst, coord, resource, sampler */
8907 begin_emit_instruction(emit);
8908 emit_sample_opcode(emit, VGPU10_OPCODE_GATHER4,
8909 inst->Instruction.Saturate, offsets);
8910 emit_dst_register(emit, &inst->Dst[0]);
8911 emit_src_register(emit, &src);
8912 emit_resource_register(emit, unit);
8913
8914 /* sampler */
8915 sampler = make_src_reg(TGSI_FILE_SAMPLER,
8916 emit->key.tex[unit].sampler_index);
8917 sampler.Register.SwizzleX =
8918 sampler.Register.SwizzleY =
8919 sampler.Register.SwizzleZ =
8920 sampler.Register.SwizzleW = PIPE_SWIZZLE_X;
8921 emit_src_register(emit, &sampler);
8922
8923 end_emit_instruction(emit);
8924 break;
8925 case PIPE_SWIZZLE_W:
8926 case PIPE_SWIZZLE_1:
8927 src = make_immediate_reg_float(emit, 1.0);
8928 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8929 break;
8930 case PIPE_SWIZZLE_Y:
8931 case PIPE_SWIZZLE_Z:
8932 case PIPE_SWIZZLE_0:
8933 default:
8934 src = make_immediate_reg_float(emit, 0.0);
8935 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8936 break;
8937 }
8938 }
8939
8940 return true;
8941 }
8942
8943
8944
8945 /**
8946 * Emit code for TGSI_OPCODE_TEX2 (texture lookup for shadow cube map arrays)
8947 */
8948 static bool
emit_tex2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8949 emit_tex2(struct svga_shader_emitter_v10 *emit,
8950 const struct tgsi_full_instruction *inst)
8951 {
8952 const uint unit = inst->Src[2].Register.Index;
8953 unsigned target = inst->Texture.Texture;
8954 struct tgsi_full_src_register coord, ref;
8955 int offsets[3];
8956 struct tex_swizzle_info swz_info;
8957 VGPU10_OPCODE_TYPE opcode;
8958 bool compare_in_shader;
8959
8960 /* check that the sampler returns a float */
8961 if (!is_valid_tex_instruction(emit, inst))
8962 return true;
8963
8964 compare_in_shader = emit->key.tex[unit].compare_in_shader;
8965 if (compare_in_shader)
8966 opcode = VGPU10_OPCODE_SAMPLE;
8967 else
8968 opcode = VGPU10_OPCODE_SAMPLE_C;
8969
8970 begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info);
8971
8972 get_texel_offsets(emit, inst, offsets);
8973
8974 coord = setup_texcoord(emit, unit, &inst->Src[0]);
8975 ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8976
8977 /* SAMPLE_C dst, coord, resource, sampler, ref */
8978 begin_emit_instruction(emit);
8979 emit_sample_opcode(emit, opcode,
8980 inst->Instruction.Saturate, offsets);
8981 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8982 emit_src_register(emit, &coord);
8983 emit_resource_register(emit, unit);
8984 emit_sampler_register(emit, unit);
8985 if (opcode == VGPU10_OPCODE_SAMPLE_C) {
8986 emit_tex_compare_refcoord(emit, target, &ref);
8987 }
8988 end_emit_instruction(emit);
8989
8990 end_tex_swizzle(emit, &swz_info);
8991
8992 free_temp_indexes(emit);
8993
8994 return true;
8995 }
8996
8997
8998 /**
8999 * Emit code for TGSI_OPCODE_TXP (projective texture)
9000 */
9001 static bool
emit_txp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9002 emit_txp(struct svga_shader_emitter_v10 *emit,
9003 const struct tgsi_full_instruction *inst)
9004 {
9005 const uint unit = inst->Src[1].Register.Index;
9006 const enum tgsi_texture_type target = inst->Texture.Texture;
9007 VGPU10_OPCODE_TYPE opcode;
9008 int offsets[3];
9009 unsigned tmp = get_temp_index(emit);
9010 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9011 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9012 struct tgsi_full_src_register src0_wwww =
9013 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
9014 struct tgsi_full_src_register coord;
9015 struct tex_swizzle_info swz_info;
9016 bool compare_in_shader;
9017
9018 /* check that the sampler returns a float */
9019 if (!is_valid_tex_instruction(emit, inst))
9020 return true;
9021
9022 compare_in_shader = tgsi_is_shadow_target(target) &&
9023 emit->key.tex[unit].compare_in_shader;
9024
9025 begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info);
9026
9027 get_texel_offsets(emit, inst, offsets);
9028
9029 coord = setup_texcoord(emit, unit, &inst->Src[0]);
9030
9031 /* DIV tmp, coord, coord.wwww */
9032 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst,
9033 &coord, &src0_wwww);
9034
9035 /* SAMPLE dst, coord(tmp), resource, sampler */
9036 begin_emit_instruction(emit);
9037
9038 if (tgsi_is_shadow_target(target) && !compare_in_shader)
9039 /* NOTE: for non-fragment shaders, we should use
9040 * VGPU10_OPCODE_SAMPLE_C_LZ, but our virtual GPU accepts this as-is.
9041 */
9042 opcode = VGPU10_OPCODE_SAMPLE_C;
9043 else
9044 opcode = VGPU10_OPCODE_SAMPLE;
9045
9046 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
9047 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9048 emit_src_register(emit, &tmp_src); /* projected coord */
9049 emit_resource_register(emit, unit);
9050 emit_sampler_register(emit, unit);
9051 if (opcode == VGPU10_OPCODE_SAMPLE_C) {
9052 emit_tex_compare_refcoord(emit, target, &tmp_src);
9053 }
9054 end_emit_instruction(emit);
9055
9056 end_tex_swizzle(emit, &swz_info);
9057
9058 free_temp_indexes(emit);
9059
9060 return true;
9061 }
9062
9063
9064 /**
9065 * Emit code for TGSI_OPCODE_TXD (explicit derivatives)
9066 */
9067 static bool
emit_txd(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9068 emit_txd(struct svga_shader_emitter_v10 *emit,
9069 const struct tgsi_full_instruction *inst)
9070 {
9071 const uint unit = inst->Src[3].Register.Index;
9072 const enum tgsi_texture_type target = inst->Texture.Texture;
9073 int offsets[3];
9074 struct tgsi_full_src_register coord;
9075 struct tex_swizzle_info swz_info;
9076
9077 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
9078 &swz_info);
9079
9080 get_texel_offsets(emit, inst, offsets);
9081
9082 coord = setup_texcoord(emit, unit, &inst->Src[0]);
9083
9084 /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */
9085 begin_emit_instruction(emit);
9086 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D,
9087 inst->Instruction.Saturate, offsets);
9088 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9089 emit_src_register(emit, &coord);
9090 emit_resource_register(emit, unit);
9091 emit_sampler_register(emit, unit);
9092 emit_src_register(emit, &inst->Src[1]); /* Xderiv */
9093 emit_src_register(emit, &inst->Src[2]); /* Yderiv */
9094 end_emit_instruction(emit);
9095
9096 end_tex_swizzle(emit, &swz_info);
9097
9098 free_temp_indexes(emit);
9099
9100 return true;
9101 }
9102
9103
9104 /**
9105 * Emit code for TGSI_OPCODE_TXF (texel fetch)
9106 */
9107 static bool
emit_txf(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9108 emit_txf(struct svga_shader_emitter_v10 *emit,
9109 const struct tgsi_full_instruction *inst)
9110 {
9111 const uint unit = inst->Src[1].Register.Index;
9112 const bool msaa = tgsi_is_msaa_target(inst->Texture.Texture)
9113 && emit->key.tex[unit].num_samples > 1;
9114 int offsets[3];
9115 struct tex_swizzle_info swz_info;
9116
9117 begin_tex_swizzle(emit, unit, inst, false, &swz_info);
9118
9119 get_texel_offsets(emit, inst, offsets);
9120
9121 if (msaa) {
9122 assert(emit->key.tex[unit].num_samples > 1);
9123
9124 /* Fetch one sample from an MSAA texture */
9125 struct tgsi_full_src_register sampleIndex =
9126 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
9127 /* LD_MS dst, coord(s0), resource, sampleIndex */
9128 begin_emit_instruction(emit);
9129 emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS,
9130 inst->Instruction.Saturate, offsets);
9131 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9132 emit_src_register(emit, &inst->Src[0]);
9133 emit_resource_register(emit, unit);
9134 emit_src_register(emit, &sampleIndex);
9135 end_emit_instruction(emit);
9136 }
9137 else {
9138 /* Fetch one texel specified by integer coordinate */
9139 /* LD dst, coord(s0), resource */
9140 begin_emit_instruction(emit);
9141 emit_sample_opcode(emit, VGPU10_OPCODE_LD,
9142 inst->Instruction.Saturate, offsets);
9143 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9144 emit_src_register(emit, &inst->Src[0]);
9145 emit_resource_register(emit, unit);
9146 end_emit_instruction(emit);
9147 }
9148
9149 end_tex_swizzle(emit, &swz_info);
9150
9151 free_temp_indexes(emit);
9152
9153 return true;
9154 }
9155
9156
9157 /**
9158 * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias)
9159 * or TGSI_OPCODE_TXB2 (for cube shadow maps).
9160 */
9161 static bool
emit_txl_txb(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9162 emit_txl_txb(struct svga_shader_emitter_v10 *emit,
9163 const struct tgsi_full_instruction *inst)
9164 {
9165 const enum tgsi_texture_type target = inst->Texture.Texture;
9166 VGPU10_OPCODE_TYPE opcode;
9167 unsigned unit;
9168 int offsets[3];
9169 struct tgsi_full_src_register coord, lod_bias;
9170 struct tex_swizzle_info swz_info;
9171
9172 assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
9173 inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
9174 inst->Instruction.Opcode == TGSI_OPCODE_TXB2);
9175
9176 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
9177 lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
9178 unit = inst->Src[2].Register.Index;
9179 }
9180 else {
9181 lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
9182 unit = inst->Src[1].Register.Index;
9183 }
9184
9185 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
9186 &swz_info);
9187
9188 get_texel_offsets(emit, inst, offsets);
9189
9190 coord = setup_texcoord(emit, unit, &inst->Src[0]);
9191
9192 /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */
9193 begin_emit_instruction(emit);
9194 if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
9195 opcode = VGPU10_OPCODE_SAMPLE_L;
9196 }
9197 else {
9198 opcode = VGPU10_OPCODE_SAMPLE_B;
9199 }
9200 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
9201 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9202 emit_src_register(emit, &coord);
9203 emit_resource_register(emit, unit);
9204 emit_sampler_register(emit, unit);
9205 emit_src_register(emit, &lod_bias);
9206 end_emit_instruction(emit);
9207
9208 end_tex_swizzle(emit, &swz_info);
9209
9210 free_temp_indexes(emit);
9211
9212 return true;
9213 }
9214
9215
9216 /**
9217 * Emit code for TGSI_OPCODE_TXL2 (explicit LOD) for cubemap array.
9218 */
9219 static bool
emit_txl2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9220 emit_txl2(struct svga_shader_emitter_v10 *emit,
9221 const struct tgsi_full_instruction *inst)
9222 {
9223 unsigned target = inst->Texture.Texture;
9224 unsigned opcode, unit;
9225 int offsets[3];
9226 struct tgsi_full_src_register coord, lod;
9227 struct tex_swizzle_info swz_info;
9228
9229 assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL2);
9230
9231 lod = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
9232 unit = inst->Src[2].Register.Index;
9233
9234 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
9235 &swz_info);
9236
9237 get_texel_offsets(emit, inst, offsets);
9238
9239 coord = setup_texcoord(emit, unit, &inst->Src[0]);
9240
9241 /* SAMPLE_L dst, coord(s0), resource, sampler, lod(s3) */
9242 begin_emit_instruction(emit);
9243 opcode = VGPU10_OPCODE_SAMPLE_L;
9244 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
9245 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9246 emit_src_register(emit, &coord);
9247 emit_resource_register(emit, unit);
9248 emit_sampler_register(emit, unit);
9249 emit_src_register(emit, &lod);
9250 end_emit_instruction(emit);
9251
9252 end_tex_swizzle(emit, &swz_info);
9253
9254 free_temp_indexes(emit);
9255
9256 return true;
9257 }
9258
9259
9260 /**
9261 * Emit code for TGSI_OPCODE_TXQ (texture query) instruction.
9262 */
9263 static bool
emit_txq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9264 emit_txq(struct svga_shader_emitter_v10 *emit,
9265 const struct tgsi_full_instruction *inst)
9266 {
9267 const uint unit = inst->Src[1].Register.Index;
9268
9269 if (emit->key.tex[unit].target == PIPE_BUFFER) {
9270 /* RESINFO does not support querying texture buffers, so we instead
9271 * store texture buffer sizes in shader constants, then copy them to
9272 * implement TXQ instead of emitting RESINFO.
9273 * MOV dst, const[texture_buffer_size_index[unit]]
9274 */
9275 struct tgsi_full_src_register size_src =
9276 make_src_const_reg(emit->texture_buffer_size_index[unit]);
9277 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src);
9278 } else {
9279 /* RESINFO dst, srcMipLevel, resource */
9280 begin_emit_instruction(emit);
9281 emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
9282 emit_dst_register(emit, &inst->Dst[0]);
9283 emit_src_register(emit, &inst->Src[0]);
9284 emit_resource_register(emit, unit);
9285 end_emit_instruction(emit);
9286 }
9287
9288 free_temp_indexes(emit);
9289
9290 return true;
9291 }
9292
9293
9294 /**
9295 * Does this opcode produce a double-precision result?
9296 * XXX perhaps move this to a TGSI utility.
9297 */
9298 static bool
opcode_has_dbl_dst(unsigned opcode)9299 opcode_has_dbl_dst(unsigned opcode)
9300 {
9301 switch (opcode) {
9302 case TGSI_OPCODE_F2D:
9303 case TGSI_OPCODE_DABS:
9304 case TGSI_OPCODE_DADD:
9305 case TGSI_OPCODE_DFRAC:
9306 case TGSI_OPCODE_DMAX:
9307 case TGSI_OPCODE_DMIN:
9308 case TGSI_OPCODE_DMUL:
9309 case TGSI_OPCODE_DNEG:
9310 case TGSI_OPCODE_I2D:
9311 case TGSI_OPCODE_U2D:
9312 case TGSI_OPCODE_DFMA:
9313 // XXX more TBD
9314 return true;
9315 default:
9316 return false;
9317 }
9318 }
9319
9320
9321 /**
9322 * Does this opcode use double-precision source registers?
9323 */
9324 static bool
opcode_has_dbl_src(unsigned opcode)9325 opcode_has_dbl_src(unsigned opcode)
9326 {
9327 switch (opcode) {
9328 case TGSI_OPCODE_D2F:
9329 case TGSI_OPCODE_DABS:
9330 case TGSI_OPCODE_DADD:
9331 case TGSI_OPCODE_DFRAC:
9332 case TGSI_OPCODE_DMAX:
9333 case TGSI_OPCODE_DMIN:
9334 case TGSI_OPCODE_DMUL:
9335 case TGSI_OPCODE_DNEG:
9336 case TGSI_OPCODE_D2I:
9337 case TGSI_OPCODE_D2U:
9338 case TGSI_OPCODE_DFMA:
9339 case TGSI_OPCODE_DSLT:
9340 case TGSI_OPCODE_DSGE:
9341 case TGSI_OPCODE_DSEQ:
9342 case TGSI_OPCODE_DSNE:
9343 case TGSI_OPCODE_DRCP:
9344 case TGSI_OPCODE_DSQRT:
9345 case TGSI_OPCODE_DMAD:
9346 case TGSI_OPCODE_DLDEXP:
9347 case TGSI_OPCODE_DRSQ:
9348 case TGSI_OPCODE_DTRUNC:
9349 case TGSI_OPCODE_DCEIL:
9350 case TGSI_OPCODE_DFLR:
9351 case TGSI_OPCODE_DROUND:
9352 case TGSI_OPCODE_DSSG:
9353 return true;
9354 default:
9355 return false;
9356 }
9357 }
9358
9359
9360 /**
9361 * Check that the swizzle for reading from a double-precision register
9362 * is valid. If not valid, move the source to a temporary register first.
9363 */
9364 static struct tgsi_full_src_register
check_double_src(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * reg)9365 check_double_src(struct svga_shader_emitter_v10 *emit,
9366 const struct tgsi_full_src_register *reg)
9367 {
9368 struct tgsi_full_src_register src;
9369
9370 if (((reg->Register.SwizzleX == PIPE_SWIZZLE_X &&
9371 reg->Register.SwizzleY == PIPE_SWIZZLE_Y) ||
9372 (reg->Register.SwizzleX == PIPE_SWIZZLE_Z &&
9373 reg->Register.SwizzleY == PIPE_SWIZZLE_W)) &&
9374 ((reg->Register.SwizzleZ == PIPE_SWIZZLE_X &&
9375 reg->Register.SwizzleW == PIPE_SWIZZLE_Y) ||
9376 (reg->Register.SwizzleZ == PIPE_SWIZZLE_Z &&
9377 reg->Register.SwizzleW == PIPE_SWIZZLE_W))) {
9378 src = *reg;
9379 } else {
9380 /* move the src to a temporary to fix the swizzle */
9381 unsigned tmp = get_temp_index(emit);
9382 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9383 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9384 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp_dst, reg);
9385 src = tmp_src;
9386
9387 /* The temporary index will be released in the caller */
9388 }
9389 return src;
9390 }
9391
9392 /**
9393 * Check that the writemask for a double-precision instruction is valid.
9394 */
9395 static void
check_double_dst_writemask(const struct tgsi_full_instruction * inst)9396 check_double_dst_writemask(const struct tgsi_full_instruction *inst)
9397 {
9398 ASSERTED unsigned writemask = inst->Dst[0].Register.WriteMask;
9399
9400 switch (inst->Instruction.Opcode) {
9401 case TGSI_OPCODE_DABS:
9402 case TGSI_OPCODE_DADD:
9403 case TGSI_OPCODE_DFRAC:
9404 case TGSI_OPCODE_DNEG:
9405 case TGSI_OPCODE_DMAD:
9406 case TGSI_OPCODE_DMAX:
9407 case TGSI_OPCODE_DMIN:
9408 case TGSI_OPCODE_DMUL:
9409 case TGSI_OPCODE_DRCP:
9410 case TGSI_OPCODE_DSQRT:
9411 case TGSI_OPCODE_F2D:
9412 case TGSI_OPCODE_DFMA:
9413 assert(writemask == TGSI_WRITEMASK_XYZW ||
9414 writemask == TGSI_WRITEMASK_XY ||
9415 writemask == TGSI_WRITEMASK_ZW);
9416 break;
9417 case TGSI_OPCODE_DSEQ:
9418 case TGSI_OPCODE_DSGE:
9419 case TGSI_OPCODE_DSNE:
9420 case TGSI_OPCODE_DSLT:
9421 case TGSI_OPCODE_D2I:
9422 case TGSI_OPCODE_D2U:
9423 /* Write to 1 or 2 components only */
9424 assert(util_bitcount(writemask) <= 2);
9425 break;
9426 default:
9427 /* XXX this list may be incomplete */
9428 ;
9429 }
9430 }
9431
9432
9433 /**
9434 * Double-precision absolute value.
9435 */
9436 static bool
emit_dabs(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9437 emit_dabs(struct svga_shader_emitter_v10 *emit,
9438 const struct tgsi_full_instruction *inst)
9439 {
9440 assert(emit->version >= 50);
9441
9442 struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
9443 check_double_dst_writemask(inst);
9444
9445 struct tgsi_full_src_register abs_src = absolute_src(&src);
9446
9447 /* DMOV dst, |src| */
9448 emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &abs_src);
9449
9450 free_temp_indexes(emit);
9451 return true;
9452 }
9453
9454
9455 /**
9456 * Double-precision negation
9457 */
9458 static bool
emit_dneg(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9459 emit_dneg(struct svga_shader_emitter_v10 *emit,
9460 const struct tgsi_full_instruction *inst)
9461 {
9462 assert(emit->version >= 50);
9463 struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
9464 check_double_dst_writemask(inst);
9465
9466 struct tgsi_full_src_register neg_src = negate_src(&src);
9467
9468 /* DMOV dst, -src */
9469 emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &neg_src);
9470
9471 free_temp_indexes(emit);
9472 return true;
9473 }
9474
9475
9476 /**
9477 * SM5 has no DMAD opcode. Implement negation with DMUL/DADD.
9478 */
9479 static bool
emit_dmad(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9480 emit_dmad(struct svga_shader_emitter_v10 *emit,
9481 const struct tgsi_full_instruction *inst)
9482 {
9483 assert(emit->version >= 50);
9484 struct tgsi_full_src_register src0 = check_double_src(emit, &inst->Src[0]);
9485 struct tgsi_full_src_register src1 = check_double_src(emit, &inst->Src[1]);
9486 struct tgsi_full_src_register src2 = check_double_src(emit, &inst->Src[2]);
9487 check_double_dst_writemask(inst);
9488
9489 unsigned tmp = get_temp_index(emit);
9490 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9491 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9492
9493 /* DMUL tmp, src[0], src[1] */
9494 emit_instruction_opn(emit, VGPU10_OPCODE_DMUL,
9495 &tmp_dst, &src0, &src1, NULL,
9496 false, inst->Instruction.Precise);
9497
9498 /* DADD dst, tmp, src[2] */
9499 emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
9500 &inst->Dst[0], &tmp_src, &src2, NULL,
9501 inst->Instruction.Saturate, inst->Instruction.Precise);
9502 free_temp_indexes(emit);
9503
9504 return true;
9505 }
9506
9507
9508 /**
9509 * Double precision reciprocal square root
9510 */
9511 static bool
emit_drsq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)9512 emit_drsq(struct svga_shader_emitter_v10 *emit,
9513 const struct tgsi_full_dst_register *dst,
9514 const struct tgsi_full_src_register *src)
9515 {
9516 assert(emit->version >= 50);
9517
9518 VGPU10OpcodeToken0 token0;
9519 struct tgsi_full_src_register dsrc = check_double_src(emit, src);
9520
9521 begin_emit_instruction(emit);
9522
9523 token0.value = 0;
9524 token0.opcodeType = VGPU10_OPCODE_VMWARE;
9525 token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DRSQ;
9526 emit_dword(emit, token0.value);
9527 emit_dst_register(emit, dst);
9528 emit_src_register(emit, &dsrc);
9529 end_emit_instruction(emit);
9530
9531 free_temp_indexes(emit);
9532
9533 return true;
9534 }
9535
9536
9537 /**
9538 * There is no SM5 opcode for double precision square root.
9539 * It will be implemented with DRSQ.
9540 * dst = src * DRSQ(src)
9541 */
9542 static bool
emit_dsqrt(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9543 emit_dsqrt(struct svga_shader_emitter_v10 *emit,
9544 const struct tgsi_full_instruction *inst)
9545 {
9546 assert(emit->version >= 50);
9547
9548 struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
9549
9550 /* temporary register to hold the source */
9551 unsigned tmp = get_temp_index(emit);
9552 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9553 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9554
9555 /* temporary register to hold the DEQ result */
9556 unsigned tmp_cond = get_temp_index(emit);
9557 struct tgsi_full_dst_register tmp_cond_dst = make_dst_temp_reg(tmp_cond);
9558 struct tgsi_full_dst_register tmp_cond_dst_xy =
9559 writemask_dst(&tmp_cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
9560 struct tgsi_full_src_register tmp_cond_src = make_src_temp_reg(tmp_cond);
9561 struct tgsi_full_src_register tmp_cond_src_xy =
9562 swizzle_src(&tmp_cond_src,
9563 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
9564 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y);
9565
9566 /* The reciprocal square root of zero yields INF.
9567 * So if the source is 0, we replace it with 1 in the tmp register.
9568 * The later multiplication of zero in the original source will yield 0
9569 * in the result.
9570 */
9571
9572 /* tmp1 = (src == 0) ? 1 : src;
9573 * EQ tmp1, 0, src
9574 * MOVC tmp, tmp1, 1.0, src
9575 */
9576 struct tgsi_full_src_register zero =
9577 make_immediate_reg_double(emit, 0);
9578
9579 struct tgsi_full_src_register one =
9580 make_immediate_reg_double(emit, 1.0);
9581
9582 emit_instruction_op2(emit, VGPU10_OPCODE_DEQ, &tmp_cond_dst_xy,
9583 &zero, &src);
9584 emit_instruction_op3(emit, VGPU10_OPCODE_DMOVC, &tmp_dst,
9585 &tmp_cond_src_xy, &one, &src);
9586
9587 struct tgsi_full_dst_register tmp_rsq_dst = make_dst_temp_reg(tmp);
9588 struct tgsi_full_src_register tmp_rsq_src = make_src_temp_reg(tmp);
9589
9590 /* DRSQ tmp_rsq, tmp */
9591 emit_drsq(emit, &tmp_rsq_dst, &tmp_src);
9592
9593 /* DMUL dst, tmp_rsq, src[0] */
9594 emit_instruction_op2(emit, VGPU10_OPCODE_DMUL, &inst->Dst[0],
9595 &tmp_rsq_src, &src);
9596
9597 free_temp_indexes(emit);
9598
9599 return true;
9600 }
9601
9602
9603 /**
9604 * glsl-nir path does not lower DTRUNC, so we need to
9605 * add the translation here.
9606 *
9607 * frac = DFRAC(src)
9608 * tmp = src - frac
9609 * dst = src >= 0 ? tmp : (tmp + (frac==0 ? 0 : 1))
9610 */
9611 static bool
emit_dtrunc(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9612 emit_dtrunc(struct svga_shader_emitter_v10 *emit,
9613 const struct tgsi_full_instruction *inst)
9614 {
9615 assert(emit->version >= 50);
9616
9617 struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
9618
9619 /* frac = DFRAC(src) */
9620 unsigned frac_index = get_temp_index(emit);
9621 struct tgsi_full_dst_register frac_dst = make_dst_temp_reg(frac_index);
9622 struct tgsi_full_src_register frac_src = make_src_temp_reg(frac_index);
9623
9624 VGPU10OpcodeToken0 token0;
9625 begin_emit_instruction(emit);
9626 token0.value = 0;
9627 token0.opcodeType = VGPU10_OPCODE_VMWARE;
9628 token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DFRC;
9629 emit_dword(emit, token0.value);
9630 emit_dst_register(emit, &frac_dst);
9631 emit_src_register(emit, &src);
9632 end_emit_instruction(emit);
9633
9634 /* tmp = src - frac */
9635 unsigned tmp_index = get_temp_index(emit);
9636 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp_index);
9637 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index);
9638 struct tgsi_full_src_register negate_frac_src = negate_src(&frac_src);
9639 emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
9640 &tmp_dst, &src, &negate_frac_src, NULL,
9641 inst->Instruction.Saturate, inst->Instruction.Precise);
9642
9643 /* cond = frac==0 */
9644 unsigned cond_index = get_temp_index(emit);
9645 struct tgsi_full_dst_register cond_dst = make_dst_temp_reg(cond_index);
9646 struct tgsi_full_src_register cond_src = make_src_temp_reg(cond_index);
9647 struct tgsi_full_src_register zero =
9648 make_immediate_reg_double(emit, 0);
9649
9650 /* Only use one or two components for double opcode */
9651 cond_dst = writemask_dst(&cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
9652
9653 emit_instruction_opn(emit, VGPU10_OPCODE_DEQ,
9654 &cond_dst, &frac_src, &zero, NULL,
9655 inst->Instruction.Saturate, inst->Instruction.Precise);
9656
9657 /* tmp2 = cond ? 0 : 1 */
9658 unsigned tmp2_index = get_temp_index(emit);
9659 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2_index);
9660 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2_index);
9661 struct tgsi_full_src_register cond_src_xy =
9662 swizzle_src(&cond_src, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
9663 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y);
9664 struct tgsi_full_src_register one =
9665 make_immediate_reg_double(emit, 1.0);
9666
9667 emit_instruction_opn(emit, VGPU10_OPCODE_DMOVC,
9668 &tmp2_dst, &cond_src_xy, &zero, &one,
9669 inst->Instruction.Saturate, inst->Instruction.Precise);
9670
9671 /* tmp2 = tmp + tmp2 */
9672 emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
9673 &tmp2_dst, &tmp_src, &tmp2_src, NULL,
9674 inst->Instruction.Saturate, inst->Instruction.Precise);
9675
9676 /* cond = src>=0 */
9677 emit_instruction_opn(emit, VGPU10_OPCODE_DGE,
9678 &cond_dst, &src, &zero, NULL,
9679 inst->Instruction.Saturate, inst->Instruction.Precise);
9680
9681 /* dst = cond ? tmp : tmp2 */
9682 emit_instruction_opn(emit, VGPU10_OPCODE_DMOVC,
9683 &inst->Dst[0], &cond_src_xy, &tmp_src, &tmp2_src,
9684 inst->Instruction.Saturate, inst->Instruction.Precise);
9685
9686 free_temp_indexes(emit);
9687 return true;
9688 }
9689
9690
9691 static bool
emit_interp_offset(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9692 emit_interp_offset(struct svga_shader_emitter_v10 *emit,
9693 const struct tgsi_full_instruction *inst)
9694 {
9695 assert(emit->version >= 50);
9696
9697 /* The src1.xy offset is a float with values in the range [-0.5, 0.5]
9698 * where (0,0) is the center of the pixel. We need to translate that
9699 * into an integer offset on a 16x16 grid in the range [-8/16, 7/16].
9700 * Also need to flip the Y axis (I think).
9701 */
9702 unsigned tmp = get_temp_index(emit);
9703 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9704 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9705 struct tgsi_full_dst_register tmp_dst_xy =
9706 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
9707 struct tgsi_full_src_register const16 =
9708 make_immediate_reg_float4(emit, 16.0f, -16.0, 0, 0);
9709
9710 /* MUL tmp.xy, src1, {16, -16, 0, 0} */
9711 emit_instruction_op2(emit, VGPU10_OPCODE_MUL,
9712 &tmp_dst_xy, &inst->Src[1], &const16);
9713
9714 /* FTOI tmp.xy, tmp */
9715 emit_instruction_op1(emit, VGPU10_OPCODE_FTOI, &tmp_dst_xy, &tmp_src);
9716
9717 /* EVAL_SNAPPED dst, src0, tmp */
9718 emit_instruction_op2(emit, VGPU10_OPCODE_EVAL_SNAPPED,
9719 &inst->Dst[0], &inst->Src[0], &tmp_src);
9720
9721 free_temp_indexes(emit);
9722
9723 return true;
9724 }
9725
9726
9727 /**
9728 * Emit a simple instruction (like ADD, MUL, MIN, etc).
9729 */
9730 static bool
emit_simple(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9731 emit_simple(struct svga_shader_emitter_v10 *emit,
9732 const struct tgsi_full_instruction *inst)
9733 {
9734 const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9735 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
9736 const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
9737 const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
9738 unsigned i;
9739
9740 struct tgsi_full_src_register src[3];
9741
9742 if (inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) {
9743 emit->current_loop_depth++;
9744 }
9745 else if (inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP) {
9746 emit->current_loop_depth--;
9747 }
9748
9749 for (i = 0; i < op->num_src; i++) {
9750 if (dbl_src)
9751 src[i] = check_double_src(emit, &inst->Src[i]);
9752 else
9753 src[i] = inst->Src[i];
9754 }
9755
9756 begin_emit_instruction(emit);
9757 emit_opcode_precise(emit, translate_opcode(inst->Instruction.Opcode),
9758 inst->Instruction.Saturate,
9759 inst->Instruction.Precise);
9760 for (i = 0; i < op->num_dst; i++) {
9761 if (dbl_dst) {
9762 check_double_dst_writemask(inst);
9763 }
9764 emit_dst_register(emit, &inst->Dst[i]);
9765 }
9766 for (i = 0; i < op->num_src; i++) {
9767 emit_src_register(emit, &src[i]);
9768 }
9769 end_emit_instruction(emit);
9770
9771 free_temp_indexes(emit);
9772 return true;
9773 }
9774
9775
9776 /**
9777 * Emit MSB instruction (like IMSB, UMSB).
9778 *
9779 * GLSL returns the index starting from the LSB;
9780 * whereas in SM5, firstbit_hi/shi returns the index starting from the MSB.
9781 * To get correct location as per glsl from SM5 device, we should
9782 * return (31 - index) if returned index is not -1.
9783 */
9784 static bool
emit_msb(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9785 emit_msb(struct svga_shader_emitter_v10 *emit,
9786 const struct tgsi_full_instruction *inst)
9787 {
9788 const struct tgsi_full_dst_register *index_dst = &inst->Dst[0];
9789
9790 assert(index_dst->Register.File != TGSI_FILE_OUTPUT);
9791
9792 struct tgsi_full_src_register index_src =
9793 make_src_reg(index_dst->Register.File, index_dst->Register.Index);
9794 struct tgsi_full_src_register imm31 =
9795 make_immediate_reg_int(emit, 31);
9796 imm31 = scalar_src(&imm31, TGSI_SWIZZLE_X);
9797 struct tgsi_full_src_register neg_one =
9798 make_immediate_reg_int(emit, -1);
9799 neg_one = scalar_src(&neg_one, TGSI_SWIZZLE_X);
9800 unsigned tmp = get_temp_index(emit);
9801 const struct tgsi_full_dst_register tmp_dst =
9802 make_dst_temp_reg(tmp);
9803 const struct tgsi_full_dst_register tmp_dst_x =
9804 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
9805 const struct tgsi_full_src_register tmp_src_x =
9806 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp, TGSI_SWIZZLE_X);
9807 int writemask = TGSI_WRITEMASK_X;
9808 int src_swizzle = TGSI_SWIZZLE_X;
9809 int dst_writemask = index_dst->Register.WriteMask;
9810
9811 emit_simple(emit, inst);
9812
9813 /* index conversion from SM5 to GLSL */
9814 while (writemask & dst_writemask) {
9815 struct tgsi_full_src_register index_src_comp =
9816 scalar_src(&index_src, src_swizzle);
9817 struct tgsi_full_dst_register index_dst_comp =
9818 writemask_dst(index_dst, writemask);
9819
9820 /* check if index_src_comp != -1 */
9821 emit_instruction_op2(emit, VGPU10_OPCODE_INE,
9822 &tmp_dst_x, &index_src_comp, &neg_one);
9823
9824 /* if */
9825 emit_if(emit, &tmp_src_x);
9826
9827 index_src_comp = negate_src(&index_src_comp);
9828 /* SUB DST, IMM{31}, DST */
9829 emit_instruction_op2(emit, VGPU10_OPCODE_IADD,
9830 &index_dst_comp, &imm31, &index_src_comp);
9831
9832 /* endif */
9833 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9834
9835 writemask = writemask << 1;
9836 src_swizzle = src_swizzle + 1;
9837 }
9838 free_temp_indexes(emit);
9839 return true;
9840 }
9841
9842
9843 /**
9844 * Emit a BFE instruction (like UBFE, IBFE).
9845 * tgsi representation:
9846 * U/IBFE dst, value, offset, width
9847 * SM5 representation:
9848 * U/IBFE dst, width, offset, value
9849 * Note: SM5 has width & offset range (0-31);
9850 * whereas GLSL has width & offset range (0-32)
9851 */
9852 static bool
emit_bfe(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9853 emit_bfe(struct svga_shader_emitter_v10 *emit,
9854 const struct tgsi_full_instruction *inst)
9855 {
9856 const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9857 struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32);
9858 imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X);
9859 struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
9860 zero = scalar_src(&zero, TGSI_SWIZZLE_X);
9861
9862 unsigned tmp1 = get_temp_index(emit);
9863 const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1);
9864 const struct tgsi_full_dst_register cond1_dst_x =
9865 writemask_dst(&cond1_dst, TGSI_WRITEMASK_X);
9866 const struct tgsi_full_src_register cond1_src_x =
9867 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X);
9868
9869 unsigned tmp2 = get_temp_index(emit);
9870 const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2);
9871 const struct tgsi_full_dst_register cond2_dst_x =
9872 writemask_dst(&cond2_dst, TGSI_WRITEMASK_X);
9873 const struct tgsi_full_src_register cond2_src_x =
9874 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X);
9875
9876 /**
9877 * In SM5, when width = 32 and offset = 0, it returns 0.
9878 * On the other hand GLSL, expects value to be copied as it is, to dst.
9879 */
9880
9881 /* cond1 = width ! = 32 */
9882 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9883 &cond1_dst_x, &inst->Src[2], &imm32);
9884
9885 /* cond2 = offset ! = 0 */
9886 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9887 &cond2_dst_x, &inst->Src[1], &zero);
9888
9889 /* cond 2 = cond1 & cond 2 */
9890 emit_instruction_op2(emit, VGPU10_OPCODE_AND, &cond2_dst_x,
9891 &cond2_src_x,
9892 &cond1_src_x);
9893 /* IF */
9894 emit_if(emit, &cond2_src_x);
9895
9896 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
9897 &inst->Src[0]);
9898
9899 /* ELSE */
9900 emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
9901
9902 /* U/IBFE dst, width, offset, value */
9903 emit_instruction_op3(emit, translate_opcode(opcode), &inst->Dst[0],
9904 &inst->Src[2], &inst->Src[1], &inst->Src[0]);
9905
9906 /* ENDIF */
9907 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9908
9909 free_temp_indexes(emit);
9910 return true;
9911 }
9912
9913
9914 /**
9915 * Emit BFI instruction
9916 * tgsi representation:
9917 * BFI dst, base, insert, offset, width
9918 * SM5 representation:
9919 * BFI dst, width, offset, insert, base
9920 * Note: SM5 has width & offset range (0-31);
9921 * whereas GLSL has width & offset range (0-32)
9922 */
9923 static bool
emit_bfi(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9924 emit_bfi(struct svga_shader_emitter_v10 *emit,
9925 const struct tgsi_full_instruction *inst)
9926 {
9927 const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9928 struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32);
9929 imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X);
9930
9931 struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
9932 zero = scalar_src(&zero, TGSI_SWIZZLE_X);
9933
9934 unsigned tmp1 = get_temp_index(emit);
9935 const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1);
9936 const struct tgsi_full_dst_register cond1_dst_x =
9937 writemask_dst(&cond1_dst, TGSI_WRITEMASK_X);
9938 const struct tgsi_full_src_register cond1_src_x =
9939 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X);
9940
9941 unsigned tmp2 = get_temp_index(emit);
9942 const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2);
9943 const struct tgsi_full_dst_register cond2_dst_x =
9944 writemask_dst(&cond2_dst, TGSI_WRITEMASK_X);
9945 const struct tgsi_full_src_register cond2_src_x =
9946 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X);
9947
9948 /**
9949 * In SM5, when width = 32 and offset = 0, it returns 0.
9950 * On the other hand GLSL, expects insert to be copied as it is, to dst.
9951 */
9952
9953 /* cond1 = width == 32 */
9954 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9955 &cond1_dst_x, &inst->Src[3], &imm32);
9956
9957 /* cond1 = offset == 0 */
9958 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9959 &cond2_dst_x, &inst->Src[2], &zero);
9960
9961 /* cond2 = cond1 & cond2 */
9962 emit_instruction_op2(emit, VGPU10_OPCODE_AND,
9963 &cond2_dst_x, &cond2_src_x, &cond1_src_x);
9964
9965 /* if */
9966 emit_if(emit, &cond2_src_x);
9967
9968 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
9969 &inst->Src[1]);
9970
9971 /* else */
9972 emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
9973
9974 /* BFI dst, width, offset, insert, base */
9975 begin_emit_instruction(emit);
9976 emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
9977 emit_dst_register(emit, &inst->Dst[0]);
9978 emit_src_register(emit, &inst->Src[3]);
9979 emit_src_register(emit, &inst->Src[2]);
9980 emit_src_register(emit, &inst->Src[1]);
9981 emit_src_register(emit, &inst->Src[0]);
9982 end_emit_instruction(emit);
9983
9984 /* endif */
9985 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9986
9987 free_temp_indexes(emit);
9988 return true;
9989 }
9990
9991
9992 /**
9993 * We only special case the MOV instruction to try to detect constant
9994 * color writes in the fragment shader.
9995 */
9996 static bool
emit_mov(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9997 emit_mov(struct svga_shader_emitter_v10 *emit,
9998 const struct tgsi_full_instruction *inst)
9999 {
10000 const struct tgsi_full_src_register *src = &inst->Src[0];
10001 const struct tgsi_full_dst_register *dst = &inst->Dst[0];
10002
10003 if (emit->unit == PIPE_SHADER_FRAGMENT &&
10004 dst->Register.File == TGSI_FILE_OUTPUT &&
10005 dst->Register.Index == 0 &&
10006 src->Register.File == TGSI_FILE_CONSTANT &&
10007 !src->Register.Indirect) {
10008 emit->constant_color_output = true;
10009 }
10010
10011 return emit_simple(emit, inst);
10012 }
10013
10014
10015 /**
10016 * Emit a simple VGPU10 instruction which writes to multiple dest registers,
10017 * where TGSI only uses one dest register.
10018 */
10019 static bool
emit_simple_1dst(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,unsigned dst_count,unsigned dst_index)10020 emit_simple_1dst(struct svga_shader_emitter_v10 *emit,
10021 const struct tgsi_full_instruction *inst,
10022 unsigned dst_count,
10023 unsigned dst_index)
10024 {
10025 const enum tgsi_opcode opcode = inst->Instruction.Opcode;
10026 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
10027 unsigned i;
10028
10029 begin_emit_instruction(emit);
10030 emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
10031
10032 for (i = 0; i < dst_count; i++) {
10033 if (i == dst_index) {
10034 emit_dst_register(emit, &inst->Dst[0]);
10035 } else {
10036 emit_null_dst_register(emit);
10037 }
10038 }
10039
10040 for (i = 0; i < op->num_src; i++) {
10041 emit_src_register(emit, &inst->Src[i]);
10042 }
10043 end_emit_instruction(emit);
10044
10045 return true;
10046 }
10047
10048
10049 /**
10050 * Emit a vmware specific VGPU10 instruction.
10051 */
10052 static bool
emit_vmware(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,VGPU10_VMWARE_OPCODE_TYPE subopcode)10053 emit_vmware(struct svga_shader_emitter_v10 *emit,
10054 const struct tgsi_full_instruction *inst,
10055 VGPU10_VMWARE_OPCODE_TYPE subopcode)
10056 {
10057 VGPU10OpcodeToken0 token0;
10058 const enum tgsi_opcode opcode = inst->Instruction.Opcode;
10059 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
10060 const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
10061 const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
10062 unsigned i;
10063 struct tgsi_full_src_register src[3];
10064
10065 for (i = 0; i < op->num_src; i++) {
10066 if (dbl_src)
10067 src[i] = check_double_src(emit, &inst->Src[i]);
10068 else
10069 src[i] = inst->Src[i];
10070 }
10071
10072 begin_emit_instruction(emit);
10073
10074 assert((subopcode > 0 && emit->version >= 50) || subopcode == 0);
10075
10076 token0.value = 0;
10077 token0.opcodeType = VGPU10_OPCODE_VMWARE;
10078 token0.vmwareOpcodeType = subopcode;
10079 emit_dword(emit, token0.value);
10080
10081 if (subopcode == VGPU10_VMWARE_OPCODE_IDIV) {
10082 /* IDIV only uses the first dest register. */
10083 emit_dst_register(emit, &inst->Dst[0]);
10084 emit_null_dst_register(emit);
10085 } else {
10086 for (i = 0; i < op->num_dst; i++) {
10087 if (dbl_dst) {
10088 check_double_dst_writemask(inst);
10089 }
10090 emit_dst_register(emit, &inst->Dst[i]);
10091 }
10092 }
10093
10094 for (i = 0; i < op->num_src; i++) {
10095 emit_src_register(emit, &src[i]);
10096 }
10097 end_emit_instruction(emit);
10098
10099 free_temp_indexes(emit);
10100 return true;
10101 }
10102
10103 /**
10104 * Emit a memory register
10105 */
10106
10107 typedef enum {
10108 MEM_STORE = 0,
10109 MEM_LOAD = 1,
10110 MEM_ATOMIC_COUNTER
10111 } memory_op;
10112
10113 static void
emit_memory_register(struct svga_shader_emitter_v10 * emit,memory_op mem_op,const struct tgsi_full_instruction * inst,unsigned regIndex,unsigned writemask)10114 emit_memory_register(struct svga_shader_emitter_v10 *emit,
10115 memory_op mem_op,
10116 const struct tgsi_full_instruction *inst,
10117 unsigned regIndex, unsigned writemask)
10118 {
10119 VGPU10OperandToken0 operand0;
10120 unsigned resIndex = 0;
10121
10122 operand0.value = 0;
10123 operand0.operandType = VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY;
10124 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
10125 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
10126
10127 switch (mem_op) {
10128 case MEM_ATOMIC_COUNTER:
10129 {
10130 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
10131 resIndex = inst->Src[regIndex].Register.Index;
10132 break;
10133 }
10134 case MEM_STORE:
10135 {
10136 const struct tgsi_full_dst_register *reg = &inst->Dst[regIndex];
10137
10138 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
10139 operand0.mask = writemask;
10140 resIndex = reg->Register.Index;
10141 break;
10142 }
10143 case MEM_LOAD:
10144 {
10145 const struct tgsi_full_src_register *reg = &inst->Src[regIndex];
10146
10147 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
10148 operand0.swizzleX = reg->Register.SwizzleX;
10149 operand0.swizzleY = reg->Register.SwizzleY;
10150 operand0.swizzleZ = reg->Register.SwizzleZ;
10151 operand0.swizzleW = reg->Register.SwizzleW;
10152 resIndex = reg->Register.Index;
10153 break;
10154 }
10155 default:
10156 assert(!"Unexpected memory opcode");
10157 break;
10158 }
10159
10160 emit_dword(emit, operand0.value);
10161 emit_dword(emit, resIndex);
10162 }
10163
10164
10165 typedef enum {
10166 UAV_STORE = 0,
10167 UAV_LOAD = 1,
10168 UAV_ATOMIC = 2,
10169 UAV_RESQ = 3,
10170 } UAV_OP;
10171
10172
10173 /**
10174 * Emit a uav register
10175 * \param uav_index index of resource register
10176 * \param uav_op UAV_STORE/ UAV_LOAD/ UAV_ATOMIC depending on opcode
10177 * \param resourceType resource file type
10178 * \param writemask resource writemask
10179 */
10180
10181 static void
emit_uav_register(struct svga_shader_emitter_v10 * emit,unsigned res_index,UAV_OP uav_op,enum tgsi_file_type resourceType,unsigned writemask)10182 emit_uav_register(struct svga_shader_emitter_v10 *emit,
10183 unsigned res_index, UAV_OP uav_op,
10184 enum tgsi_file_type resourceType, unsigned writemask)
10185 {
10186 VGPU10OperandToken0 operand0;
10187 unsigned uav_index = INVALID_INDEX;
10188
10189 operand0.value = 0;
10190 operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
10191 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
10192 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
10193
10194 switch (resourceType) {
10195 case TGSI_FILE_IMAGE:
10196 uav_index = emit->key.images[res_index].uav_index;
10197 break;
10198 case TGSI_FILE_BUFFER:
10199 uav_index = emit->key.shader_buf_uav_index[res_index];
10200 break;
10201 case TGSI_FILE_HW_ATOMIC:
10202 uav_index = emit->key.atomic_buf_uav_index[res_index];
10203 break;
10204 default:
10205 assert(0);
10206 }
10207
10208 switch (uav_op) {
10209 case UAV_ATOMIC:
10210 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
10211 break;
10212
10213 case UAV_STORE:
10214 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
10215 operand0.mask = writemask;
10216 break;
10217
10218 case UAV_LOAD:
10219 case UAV_RESQ:
10220 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
10221 operand0.swizzleX = VGPU10_COMPONENT_X;
10222 operand0.swizzleY = VGPU10_COMPONENT_Y;
10223 operand0.swizzleZ = VGPU10_COMPONENT_Z;
10224 operand0.swizzleW = VGPU10_COMPONENT_W;
10225 break;
10226
10227 default:
10228 break;
10229 }
10230
10231 emit_dword(emit, operand0.value);
10232 emit_dword(emit, uav_index);
10233 }
10234
10235
10236 /**
10237 * A helper function to emit the uav address.
10238 * For memory, buffer, and image resource, it is set to the specified address.
10239 * For HW atomic counter, the address is the sum of the address offset and the
10240 * offset into the HW atomic buffer as specified by the register index.
10241 * It is also possible to specify the counter index as an indirect address.
10242 * And in this case, the uav address will be the sum of the address offset and the
10243 * counter index specified in the indirect address.
10244 */
10245 static
10246 struct tgsi_full_src_register
emit_uav_addr_offset(struct svga_shader_emitter_v10 * emit,enum tgsi_file_type resourceType,unsigned resourceIndex,unsigned resourceIndirect,unsigned resourceIndirectIndex,const struct tgsi_full_src_register * addr_reg)10247 emit_uav_addr_offset(struct svga_shader_emitter_v10 *emit,
10248 enum tgsi_file_type resourceType,
10249 unsigned resourceIndex,
10250 unsigned resourceIndirect,
10251 unsigned resourceIndirectIndex,
10252 const struct tgsi_full_src_register *addr_reg)
10253 {
10254 unsigned addr_tmp;
10255 struct tgsi_full_dst_register addr_dst;
10256 struct tgsi_full_src_register addr_src;
10257 struct tgsi_full_src_register two = make_immediate_reg_int(emit, 2);
10258 struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
10259
10260 addr_tmp = get_temp_index(emit);
10261 addr_dst = make_dst_temp_reg(addr_tmp);
10262 addr_src = make_src_temp_reg(addr_tmp);
10263
10264 /* specified address offset */
10265 if (addr_reg)
10266 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst, addr_reg);
10267 else
10268 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst, &zero);
10269
10270 /* For HW atomic counter, we need to find the index to the
10271 * HW atomic buffer.
10272 */
10273 if (resourceType == TGSI_FILE_HW_ATOMIC) {
10274 if (resourceIndirect) {
10275
10276 /**
10277 * uav addr offset = counter layout offset +
10278 * counter indirect index address + address offset
10279 */
10280
10281 /* counter layout offset */
10282 struct tgsi_full_src_register layout_offset;
10283 layout_offset =
10284 make_immediate_reg_int(emit, resourceIndex);
10285
10286 /* counter layout offset + address offset */
10287 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &addr_dst,
10288 &addr_src, &layout_offset);
10289
10290 /* counter indirect index address */
10291 unsigned indirect_addr =
10292 emit->address_reg_index[resourceIndirectIndex];
10293
10294 struct tgsi_full_src_register indirect_addr_src =
10295 make_src_temp_reg(indirect_addr);
10296
10297 indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
10298
10299 /* counter layout offset + address offset + counter indirect address */
10300 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &addr_dst,
10301 &addr_src, &indirect_addr_src);
10302
10303 } else {
10304 struct tgsi_full_src_register index_src;
10305
10306 index_src = make_immediate_reg_int(emit, resourceIndex);
10307
10308 /* uav addr offset = counter index address + address offset */
10309 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &addr_dst,
10310 &addr_src, &index_src);
10311 }
10312
10313 /* HW atomic buffer is declared as raw buffer, so the buffer address is
10314 * the byte offset, so we need to multiple the counter addr offset by 4.
10315 */
10316 emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &addr_dst,
10317 &addr_src, &two);
10318 }
10319 else if (resourceType == TGSI_FILE_IMAGE) {
10320 if ((emit->key.images[resourceIndex].resource_target == PIPE_TEXTURE_3D)
10321 && emit->key.images[resourceIndex].is_single_layer) {
10322
10323 struct tgsi_full_dst_register addr_dst_z =
10324 writemask_dst(&addr_dst, TGSI_WRITEMASK_Z);
10325
10326 /* For non-layered 3D texture image view, we have to make sure the z
10327 * component of the address offset is set to 0.
10328 */
10329 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst_z,
10330 &zero);
10331 }
10332 }
10333
10334 return addr_src;
10335 }
10336
10337
10338
10339 /**
10340 * A helper function to expand indirect indexing to uav resource
10341 * by looping through the resource array, compare the indirect index and
10342 * emit the instruction for each resource in the array.
10343 */
10344 static void
loop_instruction(unsigned index,unsigned count,struct tgsi_full_src_register * addr_index,void (* fb)(struct svga_shader_emitter_v10 *,const struct tgsi_full_instruction *,unsigned),struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10345 loop_instruction(unsigned index, unsigned count,
10346 struct tgsi_full_src_register *addr_index,
10347 void (*fb)(struct svga_shader_emitter_v10 *,
10348 const struct tgsi_full_instruction *, unsigned),
10349 struct svga_shader_emitter_v10 *emit,
10350 const struct tgsi_full_instruction *inst)
10351 {
10352 if (count == 0)
10353 return;
10354
10355 if (index > 0) {
10356 /* ELSE */
10357 emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
10358 }
10359
10360 struct tgsi_full_src_register index_src =
10361 make_immediate_reg_int(emit, index);
10362
10363 unsigned tmp_index = get_temp_index(emit);
10364 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index);
10365 struct tgsi_full_src_register tmp_src_x =
10366 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
10367 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp_index);
10368
10369 /* IEQ tmp, addr_tmp_index, index */
10370 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ, &tmp_dst,
10371 addr_index, &index_src);
10372
10373 /* IF tmp */
10374 emit_if(emit, &tmp_src_x);
10375
10376 free_temp_indexes(emit);
10377
10378 (*fb)(emit, inst, index);
10379
10380 loop_instruction(index+1, count-1, addr_index, fb, emit, inst);
10381
10382 /* ENDIF */
10383 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
10384 }
10385
10386
10387 /**
10388 * A helper function to emit the load instruction.
10389 */
10390 static void
emit_load_instruction(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,unsigned resourceIndex)10391 emit_load_instruction(struct svga_shader_emitter_v10 *emit,
10392 const struct tgsi_full_instruction *inst,
10393 unsigned resourceIndex)
10394 {
10395 VGPU10OpcodeToken0 token0;
10396 struct tgsi_full_src_register addr_src;
10397 enum tgsi_file_type resourceType = inst->Src[0].Register.File;
10398
10399 /* Resolve the resource address for this resource first */
10400 addr_src = emit_uav_addr_offset(emit, resourceType, resourceIndex,
10401 inst->Src[0].Register.Indirect,
10402 inst->Src[0].Indirect.Index,
10403 &inst->Src[1]);
10404
10405 /* LOAD resource, address, src */
10406 begin_emit_instruction(emit);
10407
10408 token0.value = 0;
10409
10410 if (resourceType == TGSI_FILE_MEMORY ||
10411 resourceType == TGSI_FILE_BUFFER ||
10412 resourceType == TGSI_FILE_HW_ATOMIC) {
10413 token0.opcodeType = VGPU10_OPCODE_LD_RAW;
10414 addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X);
10415 }
10416 else {
10417 token0.opcodeType = VGPU10_OPCODE_LD_UAV_TYPED;
10418 }
10419
10420 token0.saturate = inst->Instruction.Saturate,
10421 emit_dword(emit, token0.value);
10422
10423 emit_dst_register(emit, &inst->Dst[0]);
10424 emit_src_register(emit, &addr_src);
10425
10426 if (resourceType == TGSI_FILE_MEMORY) {
10427 emit_memory_register(emit, MEM_LOAD, inst, 0, 0);
10428 } else if (resourceType == TGSI_FILE_HW_ATOMIC) {
10429 emit_uav_register(emit, inst->Src[0].Dimension.Index,
10430 UAV_LOAD, inst->Src[0].Register.File, 0);
10431 } else if (resourceType == TGSI_FILE_BUFFER) {
10432 if (emit->raw_shaderbufs & (1 << resourceIndex))
10433 emit_resource_register(emit, resourceIndex +
10434 emit->raw_shaderbuf_srv_start_index);
10435 else
10436 emit_uav_register(emit, resourceIndex,
10437 UAV_LOAD, inst->Src[0].Register.File, 0);
10438 } else {
10439 emit_uav_register(emit, resourceIndex,
10440 UAV_LOAD, inst->Src[0].Register.File, 0);
10441 }
10442
10443 end_emit_instruction(emit);
10444
10445 free_temp_indexes(emit);
10446 }
10447
10448
10449 /**
10450 * Emit uav / memory load instruction
10451 */
10452 static bool
emit_load(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10453 emit_load(struct svga_shader_emitter_v10 *emit,
10454 const struct tgsi_full_instruction *inst)
10455 {
10456 enum tgsi_file_type resourceType = inst->Src[0].Register.File;
10457 unsigned resourceIndex = inst->Src[0].Register.Index;
10458
10459 /* If the resource register has indirect index, we will need
10460 * to expand it since SM5 device does not support indirect indexing
10461 * for uav.
10462 */
10463 if (inst->Src[0].Register.Indirect &&
10464 (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) {
10465
10466 unsigned indirect_index = inst->Src[0].Indirect.Index;
10467 unsigned num_resources =
10468 resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs :
10469 emit->num_images;
10470
10471 /* indirect index tmp register */
10472 unsigned indirect_addr = emit->address_reg_index[indirect_index];
10473 struct tgsi_full_src_register indirect_addr_src =
10474 make_src_temp_reg(indirect_addr);
10475 indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
10476
10477 /* Add offset to the indirect index */
10478 if (inst->Src[0].Register.Index != 0) {
10479 struct tgsi_full_src_register offset =
10480 make_immediate_reg_int(emit, inst->Src[0].Register.Index);
10481 struct tgsi_full_dst_register indirect_addr_dst =
10482 make_dst_temp_reg(indirect_addr);
10483 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &indirect_addr_dst,
10484 &indirect_addr_src, &offset);
10485 }
10486
10487 /* Loop through the resource array to find which resource to use.
10488 */
10489 loop_instruction(0, num_resources, &indirect_addr_src,
10490 emit_load_instruction, emit, inst);
10491 }
10492 else {
10493 emit_load_instruction(emit, inst, resourceIndex);
10494 }
10495
10496 free_temp_indexes(emit);
10497
10498 return true;
10499 }
10500
10501
10502 /**
10503 * A helper function to emit a store instruction.
10504 */
10505 static void
emit_store_instruction(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,unsigned resourceIndex)10506 emit_store_instruction(struct svga_shader_emitter_v10 *emit,
10507 const struct tgsi_full_instruction *inst,
10508 unsigned resourceIndex)
10509 {
10510 VGPU10OpcodeToken0 token0;
10511 enum tgsi_file_type resourceType = inst->Dst[0].Register.File;
10512 unsigned writemask = inst->Dst[0].Register.WriteMask;
10513 struct tgsi_full_src_register addr_src;
10514
10515 unsigned tmp_index = get_temp_index(emit);
10516 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index);
10517 struct tgsi_full_dst_register tmp_dst_xyzw = make_dst_temp_reg(tmp_index);
10518 struct tgsi_full_dst_register tmp_dst;
10519
10520 struct tgsi_full_src_register src = inst->Src[1];
10521 struct tgsi_full_src_register four = make_immediate_reg_int(emit, 4);
10522
10523 bool needLoad = false;
10524 bool needPerComponentStore = false;
10525 unsigned swizzles = 0;
10526
10527 /* Resolve the resource address for this resource first */
10528 addr_src = emit_uav_addr_offset(emit, resourceType,
10529 inst->Dst[0].Register.Index,
10530 inst->Dst[0].Register.Indirect,
10531 inst->Dst[0].Indirect.Index,
10532 &inst->Src[0]);
10533
10534 /* First check the writemask to see if it can be supported
10535 * by the store instruction.
10536 * store_raw only allows .x, .xy, .xyz, .xyzw. For the typeless memory,
10537 * we can adjust the address offset, and do a per-component store.
10538 * store_uav_typed only allows .xyzw. In this case, we need to
10539 * do a load first, update the temporary and then issue the
10540 * store. This does have a small risk that if different threads
10541 * update different components of the same address, data might not be
10542 * in sync.
10543 */
10544 if (resourceType == TGSI_FILE_IMAGE) {
10545 needLoad = (writemask == TGSI_WRITEMASK_XYZW) ? false : true;
10546 }
10547 else if (resourceType == TGSI_FILE_BUFFER ||
10548 resourceType == TGSI_FILE_MEMORY) {
10549 if (!(writemask == TGSI_WRITEMASK_X || writemask == TGSI_WRITEMASK_XY ||
10550 writemask == TGSI_WRITEMASK_XYZ ||
10551 writemask == TGSI_WRITEMASK_XYZW)) {
10552 needPerComponentStore = true;
10553 }
10554 }
10555
10556 if (needLoad) {
10557 assert(resourceType == TGSI_FILE_IMAGE);
10558
10559 /* LOAD resource, address, src */
10560 begin_emit_instruction(emit);
10561
10562 token0.value = 0;
10563 token0.opcodeType = VGPU10_OPCODE_LD_UAV_TYPED;
10564 token0.saturate = inst->Instruction.Saturate,
10565 emit_dword(emit, token0.value);
10566
10567 emit_dst_register(emit, &tmp_dst_xyzw);
10568 emit_src_register(emit, &addr_src);
10569 emit_uav_register(emit, resourceIndex, UAV_LOAD, resourceType, 0);
10570
10571 end_emit_instruction(emit);
10572
10573 /* MOV tmp(writemask) src */
10574 tmp_dst = writemask_dst(&tmp_dst_xyzw, writemask);
10575 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp_dst, &inst->Src[1]);
10576
10577 /* Now set the writemask to xyzw for the store_uav_typed instruction */
10578 writemask = TGSI_WRITEMASK_XYZW;
10579 }
10580 else if (needPerComponentStore) {
10581 /* Save the src swizzles */
10582 swizzles = src.Register.SwizzleX |
10583 src.Register.SwizzleY << 2 |
10584 src.Register.SwizzleZ << 4 |
10585 src.Register.SwizzleW << 6;
10586 }
10587
10588 bool storeDone = false;
10589 unsigned perComponentWritemask = writemask;
10590 unsigned shift = 0;
10591 struct tgsi_full_src_register shift_src;
10592
10593 while (!storeDone) {
10594
10595 if (needPerComponentStore) {
10596 assert(perComponentWritemask);
10597 while (!(perComponentWritemask & TGSI_WRITEMASK_X)) {
10598 shift++;
10599 perComponentWritemask >>= 1;
10600 }
10601
10602 /* First adjust the addr_src to the next component */
10603 if (shift != 0) {
10604 struct tgsi_full_dst_register addr_dst =
10605 make_dst_temp_reg(addr_src.Register.Index);
10606 shift_src = make_immediate_reg_int(emit, shift);
10607 emit_instruction_op3(emit, VGPU10_OPCODE_UMAD, &addr_dst, &four,
10608 &shift_src, &addr_src);
10609
10610 /* Adjust the src swizzle as well */
10611 swizzles >>= (shift * 2);
10612 }
10613
10614 /* Now the address offset is set to the next component,
10615 * we can set the writemask to .x and make sure to set
10616 * the src swizzle as well.
10617 */
10618 src.Register.SwizzleX = swizzles & 0x3;
10619 writemask = TGSI_WRITEMASK_X;
10620
10621 /* Shift for the next component check */
10622 perComponentWritemask >>= 1;
10623 shift = 1;
10624 }
10625
10626 /* STORE resource, address, src */
10627 begin_emit_instruction(emit);
10628
10629 token0.value = 0;
10630 token0.saturate = inst->Instruction.Saturate;
10631
10632 if (resourceType == TGSI_FILE_MEMORY) {
10633 token0.opcodeType = VGPU10_OPCODE_STORE_RAW;
10634 addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X);
10635 emit_dword(emit, token0.value);
10636 emit_memory_register(emit, MEM_STORE, inst, 0, writemask);
10637 }
10638 else if (resourceType == TGSI_FILE_BUFFER ||
10639 resourceType == TGSI_FILE_HW_ATOMIC) {
10640 token0.opcodeType = VGPU10_OPCODE_STORE_RAW;
10641 addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X);
10642 emit_dword(emit, token0.value);
10643 emit_uav_register(emit, resourceIndex, UAV_STORE,
10644 resourceType, writemask);
10645 }
10646 else {
10647 token0.opcodeType = VGPU10_OPCODE_STORE_UAV_TYPED;
10648 emit_dword(emit, token0.value);
10649 emit_uav_register(emit, resourceIndex, UAV_STORE,
10650 resourceType, writemask);
10651 }
10652
10653 emit_src_register(emit, &addr_src);
10654
10655 if (needLoad)
10656 emit_src_register(emit, &tmp_src);
10657 else
10658 emit_src_register(emit, &src);
10659
10660 end_emit_instruction(emit);
10661
10662 if (!needPerComponentStore || !perComponentWritemask)
10663 storeDone = true;
10664 }
10665
10666 free_temp_indexes(emit);
10667 }
10668
10669
10670 /**
10671 * Emit uav / memory store instruction
10672 */
10673 static bool
emit_store(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10674 emit_store(struct svga_shader_emitter_v10 *emit,
10675 const struct tgsi_full_instruction *inst)
10676 {
10677 enum tgsi_file_type resourceType = inst->Dst[0].Register.File;
10678 unsigned resourceIndex = inst->Dst[0].Register.Index;
10679
10680 /* If the resource register has indirect index, we will need
10681 * to expand it since SM5 device does not support indirect indexing
10682 * for uav.
10683 */
10684 if (inst->Dst[0].Register.Indirect &&
10685 (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) {
10686
10687 unsigned indirect_index = inst->Dst[0].Indirect.Index;
10688 unsigned num_resources =
10689 resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs :
10690 emit->num_images;
10691
10692 /* Indirect index tmp register */
10693 unsigned indirect_addr = emit->address_reg_index[indirect_index];
10694 struct tgsi_full_src_register indirect_addr_src =
10695 make_src_temp_reg(indirect_addr);
10696 indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
10697
10698 /* Add offset to the indirect index */
10699 if (inst->Dst[0].Register.Index != 0) {
10700 struct tgsi_full_src_register offset =
10701 make_immediate_reg_int(emit, inst->Dst[0].Register.Index);
10702 struct tgsi_full_dst_register indirect_addr_dst =
10703 make_dst_temp_reg(indirect_addr);
10704 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &indirect_addr_dst,
10705 &indirect_addr_src, &offset);
10706 }
10707
10708 /* Loop through the resource array to find which resource to use.
10709 */
10710 loop_instruction(0, num_resources, &indirect_addr_src,
10711 emit_store_instruction, emit, inst);
10712 }
10713 else {
10714 emit_store_instruction(emit, inst, resourceIndex);
10715 }
10716
10717 free_temp_indexes(emit);
10718
10719 return true;
10720 }
10721
10722
10723 /**
10724 * A helper function to emit an atomic instruction.
10725 */
10726
10727 static void
emit_atomic_instruction(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,unsigned resourceIndex)10728 emit_atomic_instruction(struct svga_shader_emitter_v10 *emit,
10729 const struct tgsi_full_instruction *inst,
10730 unsigned resourceIndex)
10731 {
10732 VGPU10OpcodeToken0 token0;
10733 enum tgsi_file_type resourceType = inst->Src[0].Register.File;
10734 struct tgsi_full_src_register addr_src;
10735 VGPU10_OPCODE_TYPE opcode = emit->cur_atomic_opcode;
10736 const struct tgsi_full_src_register *offset;
10737
10738 /* ntt does not specify offset for HWATOMIC. So just set offset to NULL. */
10739 offset = resourceType == TGSI_FILE_HW_ATOMIC ? NULL : &inst->Src[1];
10740
10741 /* Resolve the resource address */
10742 addr_src = emit_uav_addr_offset(emit, resourceType,
10743 inst->Src[0].Register.Index,
10744 inst->Src[0].Register.Indirect,
10745 inst->Src[0].Indirect.Index,
10746 offset);
10747
10748 /* Emit the atomic operation */
10749 begin_emit_instruction(emit);
10750
10751 token0.value = 0;
10752 token0.opcodeType = opcode;
10753 token0.saturate = inst->Instruction.Saturate,
10754 emit_dword(emit, token0.value);
10755
10756 emit_dst_register(emit, &inst->Dst[0]);
10757
10758 if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
10759 emit_memory_register(emit, MEM_ATOMIC_COUNTER, inst, 0, 0);
10760 } else if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC) {
10761 assert(inst->Src[0].Register.Dimension == 1);
10762 emit_uav_register(emit, inst->Src[0].Dimension.Index,
10763 UAV_ATOMIC, inst->Src[0].Register.File, 0);
10764 } else {
10765 emit_uav_register(emit, resourceIndex,
10766 UAV_ATOMIC, inst->Src[0].Register.File, 0);
10767 }
10768
10769 /* resource address offset */
10770 emit_src_register(emit, &addr_src);
10771
10772 struct tgsi_full_src_register src0_x =
10773 swizzle_src(&inst->Src[2], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
10774 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
10775 emit_src_register(emit, &src0_x);
10776
10777 if (opcode == VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH) {
10778 struct tgsi_full_src_register src1_x =
10779 swizzle_src(&inst->Src[3], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
10780 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
10781
10782 emit_src_register(emit, &src1_x);
10783 }
10784
10785 end_emit_instruction(emit);
10786
10787 free_temp_indexes(emit);
10788 }
10789
10790
10791 /**
10792 * Emit atomic instruction
10793 */
10794 static bool
emit_atomic(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,VGPU10_OPCODE_TYPE opcode)10795 emit_atomic(struct svga_shader_emitter_v10 *emit,
10796 const struct tgsi_full_instruction *inst,
10797 VGPU10_OPCODE_TYPE opcode)
10798 {
10799 enum tgsi_file_type resourceType = inst->Src[0].Register.File;
10800 unsigned resourceIndex = inst->Src[0].Register.Index;
10801
10802 emit->cur_atomic_opcode = opcode;
10803
10804 /* If the resource register has indirect index, we will need
10805 * to expand it since SM5 device does not support indirect indexing
10806 * for uav.
10807 */
10808 if (inst->Dst[0].Register.Indirect &&
10809 (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) {
10810
10811 unsigned indirect_index = inst->Dst[0].Indirect.Index;
10812 unsigned num_resources =
10813 resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs :
10814 emit->num_images;
10815
10816 /* indirect index tmp register */
10817 unsigned indirect_addr = emit->address_reg_index[indirect_index];
10818 struct tgsi_full_src_register indirect_addr_src =
10819 make_src_temp_reg(indirect_addr);
10820 indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
10821
10822 /* Loop through the resource array to find which resource to use.
10823 */
10824 loop_instruction(0, num_resources, &indirect_addr_src,
10825 emit_atomic_instruction, emit, inst);
10826 }
10827 else {
10828 emit_atomic_instruction(emit, inst, resourceIndex);
10829 }
10830
10831 free_temp_indexes(emit);
10832
10833 return true;
10834 }
10835
10836
10837 /**
10838 * Emit barrier instruction
10839 */
10840 static bool
emit_barrier(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10841 emit_barrier(struct svga_shader_emitter_v10 *emit,
10842 const struct tgsi_full_instruction *inst)
10843 {
10844 VGPU10OpcodeToken0 token0;
10845
10846 assert(emit->version >= 50);
10847
10848 token0.value = 0;
10849 token0.opcodeType = VGPU10_OPCODE_SYNC;
10850
10851 if (emit->unit == PIPE_SHADER_TESS_CTRL && emit->version == 50) {
10852 /* SM5 device doesn't support BARRIER in tcs . If barrier is used
10853 * in shader, don't do anything for this opcode and continue rest
10854 * of shader translation
10855 */
10856 util_debug_message(&emit->svga_debug_callback, INFO,
10857 "barrier instruction is not supported in tessellation control shader\n");
10858 return true;
10859 }
10860 else if (emit->unit == PIPE_SHADER_COMPUTE) {
10861 if (emit->cs.shared_memory_declared)
10862 token0.syncThreadGroupShared = 1;
10863
10864 if (emit->uav_declared)
10865 token0.syncUAVMemoryGroup = 1;
10866
10867 token0.syncThreadsInGroup = 1;
10868 } else {
10869 token0.syncUAVMemoryGlobal = 1;
10870 }
10871
10872 assert(token0.syncUAVMemoryGlobal || token0.syncUAVMemoryGroup ||
10873 token0.syncThreadGroupShared);
10874
10875 begin_emit_instruction(emit);
10876 emit_dword(emit, token0.value);
10877 end_emit_instruction(emit);
10878
10879 return true;
10880 }
10881
10882 /**
10883 * Emit memory barrier instruction
10884 */
10885 static bool
emit_memory_barrier(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10886 emit_memory_barrier(struct svga_shader_emitter_v10 *emit,
10887 const struct tgsi_full_instruction *inst)
10888 {
10889 unsigned index = inst->Src[0].Register.Index;
10890 unsigned swizzle = inst->Src[0].Register.SwizzleX;
10891 unsigned bartype = emit->immediates[index][swizzle].Int;
10892 VGPU10OpcodeToken0 token0;
10893
10894 token0.value = 0;
10895 token0.opcodeType = VGPU10_OPCODE_SYNC;
10896
10897 if (emit->unit == PIPE_SHADER_COMPUTE) {
10898
10899 /* For compute shader, issue sync opcode with different options
10900 * depending on the memory barrier type.
10901 *
10902 * Bit 0: Shader storage buffers
10903 * Bit 1: Atomic buffers
10904 * Bit 2: Images
10905 * Bit 3: Shared memory
10906 * Bit 4: Thread group
10907 */
10908
10909 if (bartype & (TGSI_MEMBAR_SHADER_BUFFER | TGSI_MEMBAR_ATOMIC_BUFFER |
10910 TGSI_MEMBAR_SHADER_IMAGE))
10911 token0.syncUAVMemoryGlobal = 1;
10912 else if (bartype & TGSI_MEMBAR_THREAD_GROUP)
10913 token0.syncUAVMemoryGroup = 1;
10914
10915 if (bartype & TGSI_MEMBAR_SHARED)
10916 token0.syncThreadGroupShared = 1;
10917 }
10918 else {
10919 /**
10920 * For graphics stages, only sync_uglobal is available.
10921 */
10922 if (bartype & (TGSI_MEMBAR_SHADER_BUFFER | TGSI_MEMBAR_ATOMIC_BUFFER |
10923 TGSI_MEMBAR_SHADER_IMAGE))
10924 token0.syncUAVMemoryGlobal = 1;
10925 }
10926
10927 assert(token0.syncUAVMemoryGlobal || token0.syncUAVMemoryGroup ||
10928 token0.syncThreadGroupShared);
10929
10930 begin_emit_instruction(emit);
10931 emit_dword(emit, token0.value);
10932 end_emit_instruction(emit);
10933
10934 return true;
10935 }
10936
10937
10938 /**
10939 * Emit code for TGSI_OPCODE_RESQ (image size) instruction.
10940 */
10941 static bool
emit_resq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10942 emit_resq(struct svga_shader_emitter_v10 *emit,
10943 const struct tgsi_full_instruction *inst)
10944 {
10945 struct tgsi_full_src_register zero =
10946 make_immediate_reg_int(emit, 0);
10947
10948 unsigned uav_resource = emit->image[inst->Src[0].Register.Index].Resource;
10949
10950 if (uav_resource == TGSI_TEXTURE_CUBE_ARRAY) {
10951 struct tgsi_full_src_register image_src;
10952
10953 image_src = make_src_const_reg(emit->image_size_index + inst->Src[0].Register.Index);
10954
10955 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &image_src);
10956 return true;
10957 }
10958
10959 begin_emit_instruction(emit);
10960 if (uav_resource == TGSI_TEXTURE_BUFFER) {
10961 emit_opcode(emit, VGPU10_OPCODE_BUFINFO, false);
10962 emit_dst_register(emit, &inst->Dst[0]);
10963 }
10964 else {
10965 emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
10966 emit_dst_register(emit, &inst->Dst[0]);
10967 emit_src_register(emit, &zero);
10968 }
10969 emit_uav_register(emit, inst->Src[0].Register.Index,
10970 UAV_RESQ, inst->Src[0].Register.File, 0);
10971 end_emit_instruction(emit);
10972
10973 return true;
10974 }
10975
10976
10977 static bool
emit_instruction(struct svga_shader_emitter_v10 * emit,unsigned inst_number,const struct tgsi_full_instruction * inst)10978 emit_instruction(struct svga_shader_emitter_v10 *emit,
10979 unsigned inst_number,
10980 const struct tgsi_full_instruction *inst)
10981 {
10982 const enum tgsi_opcode opcode = inst->Instruction.Opcode;
10983
10984 switch (opcode) {
10985 case TGSI_OPCODE_ADD:
10986 case TGSI_OPCODE_AND:
10987 case TGSI_OPCODE_BGNLOOP:
10988 case TGSI_OPCODE_BRK:
10989 case TGSI_OPCODE_CEIL:
10990 case TGSI_OPCODE_CONT:
10991 case TGSI_OPCODE_DDX:
10992 case TGSI_OPCODE_DDY:
10993 case TGSI_OPCODE_DIV:
10994 case TGSI_OPCODE_DP2:
10995 case TGSI_OPCODE_DP3:
10996 case TGSI_OPCODE_DP4:
10997 case TGSI_OPCODE_ELSE:
10998 case TGSI_OPCODE_ENDIF:
10999 case TGSI_OPCODE_ENDLOOP:
11000 case TGSI_OPCODE_ENDSUB:
11001 case TGSI_OPCODE_F2I:
11002 case TGSI_OPCODE_F2U:
11003 case TGSI_OPCODE_FLR:
11004 case TGSI_OPCODE_FRC:
11005 case TGSI_OPCODE_FSEQ:
11006 case TGSI_OPCODE_FSGE:
11007 case TGSI_OPCODE_FSLT:
11008 case TGSI_OPCODE_FSNE:
11009 case TGSI_OPCODE_I2F:
11010 case TGSI_OPCODE_IMAX:
11011 case TGSI_OPCODE_IMIN:
11012 case TGSI_OPCODE_INEG:
11013 case TGSI_OPCODE_ISGE:
11014 case TGSI_OPCODE_ISHR:
11015 case TGSI_OPCODE_ISLT:
11016 case TGSI_OPCODE_MAD:
11017 case TGSI_OPCODE_MAX:
11018 case TGSI_OPCODE_MIN:
11019 case TGSI_OPCODE_MUL:
11020 case TGSI_OPCODE_NOP:
11021 case TGSI_OPCODE_NOT:
11022 case TGSI_OPCODE_OR:
11023 case TGSI_OPCODE_UADD:
11024 case TGSI_OPCODE_USEQ:
11025 case TGSI_OPCODE_USGE:
11026 case TGSI_OPCODE_USLT:
11027 case TGSI_OPCODE_UMIN:
11028 case TGSI_OPCODE_UMAD:
11029 case TGSI_OPCODE_UMAX:
11030 case TGSI_OPCODE_ROUND:
11031 case TGSI_OPCODE_SQRT:
11032 case TGSI_OPCODE_SHL:
11033 case TGSI_OPCODE_TRUNC:
11034 case TGSI_OPCODE_U2F:
11035 case TGSI_OPCODE_UCMP:
11036 case TGSI_OPCODE_USHR:
11037 case TGSI_OPCODE_USNE:
11038 case TGSI_OPCODE_XOR:
11039 /* Begin SM5 opcodes */
11040 case TGSI_OPCODE_F2D:
11041 case TGSI_OPCODE_D2F:
11042 case TGSI_OPCODE_DADD:
11043 case TGSI_OPCODE_DMUL:
11044 case TGSI_OPCODE_DMAX:
11045 case TGSI_OPCODE_DMIN:
11046 case TGSI_OPCODE_DSGE:
11047 case TGSI_OPCODE_DSLT:
11048 case TGSI_OPCODE_DSEQ:
11049 case TGSI_OPCODE_DSNE:
11050 case TGSI_OPCODE_BREV:
11051 case TGSI_OPCODE_POPC:
11052 case TGSI_OPCODE_LSB:
11053 case TGSI_OPCODE_INTERP_CENTROID:
11054 case TGSI_OPCODE_INTERP_SAMPLE:
11055 /* simple instructions */
11056 return emit_simple(emit, inst);
11057 case TGSI_OPCODE_RET:
11058 if (emit->unit == PIPE_SHADER_TESS_CTRL &&
11059 !emit->tcs.control_point_phase) {
11060
11061 /* store the tessellation levels in the patch constant phase only */
11062 store_tesslevels(emit);
11063 }
11064 return emit_simple(emit, inst);
11065
11066 case TGSI_OPCODE_IMSB:
11067 case TGSI_OPCODE_UMSB:
11068 return emit_msb(emit, inst);
11069 case TGSI_OPCODE_IBFE:
11070 case TGSI_OPCODE_UBFE:
11071 return emit_bfe(emit, inst);
11072 case TGSI_OPCODE_BFI:
11073 return emit_bfi(emit, inst);
11074 case TGSI_OPCODE_MOV:
11075 return emit_mov(emit, inst);
11076 case TGSI_OPCODE_EMIT:
11077 return emit_vertex(emit, inst);
11078 case TGSI_OPCODE_ENDPRIM:
11079 return emit_endprim(emit, inst);
11080 case TGSI_OPCODE_IABS:
11081 return emit_iabs(emit, inst);
11082 case TGSI_OPCODE_ARL:
11083 FALLTHROUGH;
11084 case TGSI_OPCODE_UARL:
11085 return emit_arl_uarl(emit, inst);
11086 case TGSI_OPCODE_BGNSUB:
11087 /* no-op */
11088 return true;
11089 case TGSI_OPCODE_CAL:
11090 return emit_cal(emit, inst);
11091 case TGSI_OPCODE_CMP:
11092 return emit_cmp(emit, inst);
11093 case TGSI_OPCODE_COS:
11094 return emit_sincos(emit, inst);
11095 case TGSI_OPCODE_DST:
11096 return emit_dst(emit, inst);
11097 case TGSI_OPCODE_EX2:
11098 return emit_ex2(emit, inst);
11099 case TGSI_OPCODE_EXP:
11100 return emit_exp(emit, inst);
11101 case TGSI_OPCODE_IF:
11102 return emit_if(emit, &inst->Src[0]);
11103 case TGSI_OPCODE_KILL:
11104 return emit_discard(emit, inst);
11105 case TGSI_OPCODE_KILL_IF:
11106 return emit_cond_discard(emit, inst);
11107 case TGSI_OPCODE_LG2:
11108 return emit_lg2(emit, inst);
11109 case TGSI_OPCODE_LIT:
11110 return emit_lit(emit, inst);
11111 case TGSI_OPCODE_LODQ:
11112 return emit_lodq(emit, inst);
11113 case TGSI_OPCODE_LOG:
11114 return emit_log(emit, inst);
11115 case TGSI_OPCODE_LRP:
11116 return emit_lrp(emit, inst);
11117 case TGSI_OPCODE_POW:
11118 return emit_pow(emit, inst);
11119 case TGSI_OPCODE_RCP:
11120 return emit_rcp(emit, inst);
11121 case TGSI_OPCODE_RSQ:
11122 return emit_rsq(emit, inst);
11123 case TGSI_OPCODE_SAMPLE:
11124 return emit_sample(emit, inst);
11125 case TGSI_OPCODE_SEQ:
11126 return emit_seq(emit, inst);
11127 case TGSI_OPCODE_SGE:
11128 return emit_sge(emit, inst);
11129 case TGSI_OPCODE_SGT:
11130 return emit_sgt(emit, inst);
11131 case TGSI_OPCODE_SIN:
11132 return emit_sincos(emit, inst);
11133 case TGSI_OPCODE_SLE:
11134 return emit_sle(emit, inst);
11135 case TGSI_OPCODE_SLT:
11136 return emit_slt(emit, inst);
11137 case TGSI_OPCODE_SNE:
11138 return emit_sne(emit, inst);
11139 case TGSI_OPCODE_SSG:
11140 return emit_ssg(emit, inst);
11141 case TGSI_OPCODE_ISSG:
11142 return emit_issg(emit, inst);
11143 case TGSI_OPCODE_TEX:
11144 return emit_tex(emit, inst);
11145 case TGSI_OPCODE_TG4:
11146 return emit_tg4(emit, inst);
11147 case TGSI_OPCODE_TEX2:
11148 return emit_tex2(emit, inst);
11149 case TGSI_OPCODE_TXP:
11150 return emit_txp(emit, inst);
11151 case TGSI_OPCODE_TXB:
11152 case TGSI_OPCODE_TXB2:
11153 case TGSI_OPCODE_TXL:
11154 return emit_txl_txb(emit, inst);
11155 case TGSI_OPCODE_TXD:
11156 return emit_txd(emit, inst);
11157 case TGSI_OPCODE_TXF:
11158 return emit_txf(emit, inst);
11159 case TGSI_OPCODE_TXL2:
11160 return emit_txl2(emit, inst);
11161 case TGSI_OPCODE_TXQ:
11162 return emit_txq(emit, inst);
11163 case TGSI_OPCODE_UIF:
11164 return emit_if(emit, &inst->Src[0]);
11165 case TGSI_OPCODE_UMUL_HI:
11166 case TGSI_OPCODE_IMUL_HI:
11167 case TGSI_OPCODE_UDIV:
11168 /* These cases use only the FIRST of two destination registers */
11169 return emit_simple_1dst(emit, inst, 2, 0);
11170 case TGSI_OPCODE_IDIV:
11171 return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_IDIV);
11172 case TGSI_OPCODE_UMUL:
11173 case TGSI_OPCODE_UMOD:
11174 case TGSI_OPCODE_MOD:
11175 /* These cases use only the SECOND of two destination registers */
11176 return emit_simple_1dst(emit, inst, 2, 1);
11177
11178 /* Begin SM5 opcodes */
11179 case TGSI_OPCODE_DABS:
11180 return emit_dabs(emit, inst);
11181 case TGSI_OPCODE_DNEG:
11182 return emit_dneg(emit, inst);
11183 case TGSI_OPCODE_DRCP:
11184 return emit_simple(emit, inst);
11185 case TGSI_OPCODE_DSQRT:
11186 return emit_dsqrt(emit, inst);
11187 case TGSI_OPCODE_DMAD:
11188 return emit_dmad(emit, inst);
11189 case TGSI_OPCODE_DFRAC:
11190 return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_DFRC);
11191 case TGSI_OPCODE_D2I:
11192 case TGSI_OPCODE_D2U:
11193 return emit_simple(emit, inst);
11194 case TGSI_OPCODE_I2D:
11195 case TGSI_OPCODE_U2D:
11196 return emit_simple(emit, inst);
11197 case TGSI_OPCODE_DRSQ:
11198 return emit_drsq(emit, &inst->Dst[0], &inst->Src[0]);
11199 case TGSI_OPCODE_DDIV:
11200 return emit_simple(emit, inst);
11201 case TGSI_OPCODE_INTERP_OFFSET:
11202 return emit_interp_offset(emit, inst);
11203 case TGSI_OPCODE_FMA:
11204 case TGSI_OPCODE_DFMA:
11205 return emit_simple(emit, inst);
11206
11207 case TGSI_OPCODE_DTRUNC:
11208 return emit_dtrunc(emit, inst);
11209
11210 /* The following opcodes should never be seen here. We return zero
11211 * for PIPE_CAP_TGSI_DROUND_SUPPORTED.
11212 */
11213 case TGSI_OPCODE_LDEXP:
11214 case TGSI_OPCODE_DSSG:
11215 case TGSI_OPCODE_DLDEXP:
11216 case TGSI_OPCODE_DCEIL:
11217 case TGSI_OPCODE_DFLR:
11218 debug_printf("Unexpected TGSI opcode %s. "
11219 "Should have been translated away by the GLSL compiler.\n",
11220 tgsi_get_opcode_name(opcode));
11221 return false;
11222
11223 case TGSI_OPCODE_LOAD:
11224 return emit_load(emit, inst);
11225
11226 case TGSI_OPCODE_STORE:
11227 return emit_store(emit, inst);
11228
11229 case TGSI_OPCODE_ATOMAND:
11230 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_AND);
11231
11232 case TGSI_OPCODE_ATOMCAS:
11233 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH);
11234
11235 case TGSI_OPCODE_ATOMIMAX:
11236 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IMAX);
11237
11238 case TGSI_OPCODE_ATOMIMIN:
11239 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IMIN);
11240
11241 case TGSI_OPCODE_ATOMOR:
11242 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_OR);
11243
11244 case TGSI_OPCODE_ATOMUADD:
11245 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IADD);
11246
11247 case TGSI_OPCODE_ATOMUMAX:
11248 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_UMAX);
11249
11250 case TGSI_OPCODE_ATOMUMIN:
11251 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_UMIN);
11252
11253 case TGSI_OPCODE_ATOMXCHG:
11254 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_EXCH);
11255
11256 case TGSI_OPCODE_ATOMXOR:
11257 return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_XOR);
11258
11259 case TGSI_OPCODE_BARRIER:
11260 return emit_barrier(emit, inst);
11261
11262 case TGSI_OPCODE_MEMBAR:
11263 return emit_memory_barrier(emit, inst);
11264
11265 case TGSI_OPCODE_RESQ:
11266 return emit_resq(emit, inst);
11267
11268 case TGSI_OPCODE_END:
11269 if (!emit_post_helpers(emit))
11270 return false;
11271 return emit_simple(emit, inst);
11272
11273 default:
11274 debug_printf("Unimplemented tgsi instruction %s\n",
11275 tgsi_get_opcode_name(opcode));
11276 return false;
11277 }
11278
11279 return true;
11280 }
11281
11282
11283 /**
11284 * Translate a single TGSI instruction to VGPU10.
11285 */
11286 static bool
emit_vgpu10_instruction(struct svga_shader_emitter_v10 * emit,unsigned inst_number,const struct tgsi_full_instruction * inst)11287 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
11288 unsigned inst_number,
11289 const struct tgsi_full_instruction *inst)
11290 {
11291 if (emit->skip_instruction)
11292 return true;
11293
11294 bool ret = true;
11295 unsigned start_token = emit_get_num_tokens(emit);
11296
11297 emit->reemit_tgsi_instruction = false;
11298
11299 ret = emit_instruction(emit, inst_number, inst);
11300
11301 if (emit->reemit_tgsi_instruction) {
11302 /**
11303 * Reset emit->ptr to where the translation of this tgsi instruction
11304 * started.
11305 */
11306 VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
11307 emit->ptr = (char *) (tokens + start_token);
11308
11309 emit->reemit_tgsi_instruction = false;
11310 }
11311 return ret;
11312 }
11313
11314
11315 /**
11316 * Emit the extra instructions to adjust the vertex position.
11317 * There are two possible adjustments:
11318 * 1. Converting from Gallium to VGPU10 coordinate space by applying the
11319 * "prescale" and "pretranslate" values.
11320 * 2. Undoing the viewport transformation when we use the swtnl/draw path.
11321 * \param vs_pos_tmp_index which temporary register contains the vertex pos.
11322 */
11323 static void
emit_vpos_instructions(struct svga_shader_emitter_v10 * emit)11324 emit_vpos_instructions(struct svga_shader_emitter_v10 *emit)
11325 {
11326 struct tgsi_full_src_register tmp_pos_src;
11327 struct tgsi_full_dst_register pos_dst;
11328 const unsigned vs_pos_tmp_index = emit->vposition.tmp_index;
11329
11330 /* Don't bother to emit any extra vertex instructions if vertex position is
11331 * not written out
11332 */
11333 if (emit->vposition.out_index == INVALID_INDEX)
11334 return;
11335
11336 /**
11337 * Reset the temporary vertex position register index
11338 * so that emit_dst_register() will use the real vertex position output
11339 */
11340 emit->vposition.tmp_index = INVALID_INDEX;
11341
11342 tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index);
11343 pos_dst = make_dst_output_reg(emit->vposition.out_index);
11344
11345 /* If non-adjusted vertex position register index
11346 * is valid, copy the vertex position from the temporary
11347 * vertex position register before it is modified by the
11348 * prescale computation.
11349 */
11350 if (emit->vposition.so_index != INVALID_INDEX) {
11351 struct tgsi_full_dst_register pos_so_dst =
11352 make_dst_output_reg(emit->vposition.so_index);
11353
11354 /* MOV pos_so, tmp_pos */
11355 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, &tmp_pos_src);
11356 }
11357
11358 if (emit->vposition.need_prescale) {
11359 /* This code adjusts the vertex position to match the VGPU10 convention.
11360 * If p is the position computed by the shader (usually by applying the
11361 * modelview and projection matrices), the new position q is computed by:
11362 *
11363 * q.x = p.w * trans.x + p.x * scale.x
11364 * q.y = p.w * trans.y + p.y * scale.y
11365 * q.z = p.w * trans.z + p.z * scale.z;
11366 * q.w = p.w * trans.w + p.w;
11367 */
11368 struct tgsi_full_src_register tmp_pos_src_w =
11369 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
11370 struct tgsi_full_dst_register tmp_pos_dst =
11371 make_dst_temp_reg(vs_pos_tmp_index);
11372 struct tgsi_full_dst_register tmp_pos_dst_xyz =
11373 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ);
11374
11375 struct tgsi_full_src_register prescale_scale =
11376 make_src_temp_reg(emit->vposition.prescale_scale_index);
11377 struct tgsi_full_src_register prescale_trans =
11378 make_src_temp_reg(emit->vposition.prescale_trans_index);
11379
11380 /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */
11381 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz,
11382 &tmp_pos_src, &prescale_scale);
11383
11384 /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */
11385 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w,
11386 &prescale_trans, &tmp_pos_src);
11387 }
11388 else if (emit->key.vs.undo_viewport) {
11389 /* This code computes the final vertex position from the temporary
11390 * vertex position by undoing the viewport transformation and the
11391 * divide-by-W operation (we convert window coords back to clip coords).
11392 * This is needed when we use the 'draw' module for fallbacks.
11393 * If p is the temp pos in window coords, then the NDC coord q is:
11394 * q.x = (p.x - vp.x_trans) / vp.x_scale * p.w
11395 * q.y = (p.y - vp.y_trans) / vp.y_scale * p.w
11396 * q.z = p.z * p.w
11397 * q.w = p.w
11398 * CONST[vs_viewport_index] contains:
11399 * { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans }
11400 */
11401 struct tgsi_full_dst_register tmp_pos_dst =
11402 make_dst_temp_reg(vs_pos_tmp_index);
11403 struct tgsi_full_dst_register tmp_pos_dst_xy =
11404 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY);
11405 struct tgsi_full_src_register tmp_pos_src_wwww =
11406 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
11407
11408 struct tgsi_full_dst_register pos_dst_xyz =
11409 writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ);
11410 struct tgsi_full_dst_register pos_dst_w =
11411 writemask_dst(&pos_dst, TGSI_WRITEMASK_W);
11412
11413 struct tgsi_full_src_register vp_xyzw =
11414 make_src_const_reg(emit->vs.viewport_index);
11415 struct tgsi_full_src_register vp_zwww =
11416 swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
11417 TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
11418
11419 /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */
11420 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy,
11421 &tmp_pos_src, &vp_zwww);
11422
11423 /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */
11424 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy,
11425 &tmp_pos_src, &vp_xyzw);
11426
11427 /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */
11428 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz,
11429 &tmp_pos_src, &tmp_pos_src_wwww);
11430
11431 /* MOV pos.w, tmp_pos.w */
11432 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, &tmp_pos_src);
11433 }
11434 else if (vs_pos_tmp_index != INVALID_INDEX) {
11435 /* This code is to handle the case where the temporary vertex
11436 * position register is created when the vertex shader has stream
11437 * output and prescale is disabled because rasterization is to be
11438 * discarded.
11439 */
11440 struct tgsi_full_dst_register pos_dst =
11441 make_dst_output_reg(emit->vposition.out_index);
11442
11443 /* MOV pos, tmp_pos */
11444 begin_emit_instruction(emit);
11445 emit_opcode(emit, VGPU10_OPCODE_MOV, false);
11446 emit_dst_register(emit, &pos_dst);
11447 emit_src_register(emit, &tmp_pos_src);
11448 end_emit_instruction(emit);
11449 }
11450
11451 /* Restore original vposition.tmp_index value for the next GS vertex.
11452 * It doesn't matter for VS.
11453 */
11454 emit->vposition.tmp_index = vs_pos_tmp_index;
11455 }
11456
11457 static void
emit_clipping_instructions(struct svga_shader_emitter_v10 * emit)11458 emit_clipping_instructions(struct svga_shader_emitter_v10 *emit)
11459 {
11460 if (emit->clip_mode == CLIP_DISTANCE) {
11461 /* Copy from copy distance temporary to CLIPDIST & the shadow copy */
11462 emit_clip_distance_instructions(emit);
11463
11464 } else if (emit->clip_mode == CLIP_VERTEX &&
11465 emit->key.last_vertex_stage) {
11466 /* Convert TGSI CLIPVERTEX to CLIPDIST */
11467 emit_clip_vertex_instructions(emit);
11468 }
11469
11470 /**
11471 * Emit vertex position and take care of legacy user planes only if
11472 * there is a valid vertex position register index.
11473 * This is to take care of the case
11474 * where the shader doesn't output vertex position. Then in
11475 * this case, don't bother to emit more vertex instructions.
11476 */
11477 if (emit->vposition.out_index == INVALID_INDEX)
11478 return;
11479
11480 /**
11481 * Emit per-vertex clipping instructions for legacy user defined clip planes.
11482 * NOTE: we must emit the clip distance instructions before the
11483 * emit_vpos_instructions() call since the later function will change
11484 * the TEMP[vs_pos_tmp_index] value.
11485 */
11486 if (emit->clip_mode == CLIP_LEGACY && emit->key.last_vertex_stage) {
11487 /* Emit CLIPDIST for legacy user defined clip planes */
11488 emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index);
11489 }
11490 }
11491
11492
11493 /**
11494 * Emit extra per-vertex instructions. This includes clip-coordinate
11495 * space conversion and computing clip distances. This is called for
11496 * each GS emit-vertex instruction and at the end of VS translation.
11497 */
11498 static void
emit_vertex_instructions(struct svga_shader_emitter_v10 * emit)11499 emit_vertex_instructions(struct svga_shader_emitter_v10 *emit)
11500 {
11501 /* Emit clipping instructions based on clipping mode */
11502 emit_clipping_instructions(emit);
11503
11504 /* Emit vertex position instructions */
11505 emit_vpos_instructions(emit);
11506 }
11507
11508
11509 /**
11510 * Translate the TGSI_OPCODE_EMIT GS instruction.
11511 */
11512 static bool
emit_vertex(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)11513 emit_vertex(struct svga_shader_emitter_v10 *emit,
11514 const struct tgsi_full_instruction *inst)
11515 {
11516 unsigned ret = true;
11517
11518 assert(emit->unit == PIPE_SHADER_GEOMETRY);
11519
11520 /**
11521 * Emit the viewport array index for the first vertex.
11522 */
11523 if (emit->gs.viewport_index_out_index != INVALID_INDEX) {
11524 struct tgsi_full_dst_register viewport_index_out =
11525 make_dst_output_reg(emit->gs.viewport_index_out_index);
11526 struct tgsi_full_dst_register viewport_index_out_x =
11527 writemask_dst(&viewport_index_out, TGSI_WRITEMASK_X);
11528 struct tgsi_full_src_register viewport_index_tmp =
11529 make_src_temp_reg(emit->gs.viewport_index_tmp_index);
11530
11531 /* Set the out index to INVALID_INDEX, so it will not
11532 * be assigned to a temp again in emit_dst_register, and
11533 * the viewport index will not be assigned again in the
11534 * subsequent vertices.
11535 */
11536 emit->gs.viewport_index_out_index = INVALID_INDEX;
11537 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
11538 &viewport_index_out_x, &viewport_index_tmp);
11539 }
11540
11541 /**
11542 * Find the stream index associated with this emit vertex instruction.
11543 */
11544 assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE);
11545 unsigned streamIndex = find_stream_index(emit, &inst->Src[0]);
11546
11547 /**
11548 * According to the ARB_gpu_shader5 spec, the built-in geometry shader
11549 * outputs are always associated with vertex stream zero.
11550 * So emit the extra vertex instructions for position or clip distance
11551 * for stream zero only.
11552 */
11553 if (streamIndex == 0) {
11554 /**
11555 * Before emitting vertex instructions, emit the temporaries for
11556 * the prescale constants based on the viewport index if needed.
11557 */
11558 if (emit->vposition.need_prescale && !emit->vposition.have_prescale)
11559 emit_temp_prescale_instructions(emit);
11560
11561 emit_vertex_instructions(emit);
11562 }
11563
11564 begin_emit_instruction(emit);
11565 if (emit->version >= 50) {
11566 if (emit->info.num_stream_output_components[streamIndex] == 0) {
11567 /**
11568 * If there is no output for this stream, discard this instruction.
11569 */
11570 emit->discard_instruction = true;
11571 }
11572 else {
11573 emit_opcode(emit, VGPU10_OPCODE_EMIT_STREAM, false);
11574 emit_stream_register(emit, streamIndex);
11575 }
11576 }
11577 else {
11578 emit_opcode(emit, VGPU10_OPCODE_EMIT, false);
11579 }
11580 end_emit_instruction(emit);
11581
11582 return ret;
11583 }
11584
11585
11586 /**
11587 * Emit the extra code to convert from VGPU10's boolean front-face
11588 * register to TGSI's signed front-face register.
11589 *
11590 * TODO: Make temporary front-face register a scalar.
11591 */
11592 static void
emit_frontface_instructions(struct svga_shader_emitter_v10 * emit)11593 emit_frontface_instructions(struct svga_shader_emitter_v10 *emit)
11594 {
11595 assert(emit->unit == PIPE_SHADER_FRAGMENT);
11596
11597 if (emit->fs.face_input_index != INVALID_INDEX) {
11598 /* convert vgpu10 boolean face register to gallium +/-1 value */
11599 struct tgsi_full_dst_register tmp_dst =
11600 make_dst_temp_reg(emit->fs.face_tmp_index);
11601 struct tgsi_full_src_register one =
11602 make_immediate_reg_float(emit, 1.0f);
11603 struct tgsi_full_src_register neg_one =
11604 make_immediate_reg_float(emit, -1.0f);
11605
11606 /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */
11607 begin_emit_instruction(emit);
11608 emit_opcode(emit, VGPU10_OPCODE_MOVC, false);
11609 emit_dst_register(emit, &tmp_dst);
11610 emit_face_register(emit);
11611 emit_src_register(emit, &one);
11612 emit_src_register(emit, &neg_one);
11613 end_emit_instruction(emit);
11614 }
11615 }
11616
11617
11618 /**
11619 * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w.
11620 */
11621 static void
emit_fragcoord_instructions(struct svga_shader_emitter_v10 * emit)11622 emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit)
11623 {
11624 assert(emit->unit == PIPE_SHADER_FRAGMENT);
11625
11626 if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
11627 struct tgsi_full_dst_register tmp_dst =
11628 make_dst_temp_reg(emit->fs.fragcoord_tmp_index);
11629 struct tgsi_full_dst_register tmp_dst_xyz =
11630 writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ);
11631 struct tgsi_full_dst_register tmp_dst_w =
11632 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
11633 struct tgsi_full_src_register one =
11634 make_immediate_reg_float(emit, 1.0f);
11635 struct tgsi_full_src_register fragcoord =
11636 make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index);
11637
11638 /* save the input index */
11639 unsigned fragcoord_input_index = emit->fs.fragcoord_input_index;
11640 /* set to invalid to prevent substitution in emit_src_register() */
11641 emit->fs.fragcoord_input_index = INVALID_INDEX;
11642
11643 /* MOV fragcoord_tmp.xyz, fragcoord.xyz */
11644 begin_emit_instruction(emit);
11645 emit_opcode(emit, VGPU10_OPCODE_MOV, false);
11646 emit_dst_register(emit, &tmp_dst_xyz);
11647 emit_src_register(emit, &fragcoord);
11648 end_emit_instruction(emit);
11649
11650 /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */
11651 begin_emit_instruction(emit);
11652 emit_opcode(emit, VGPU10_OPCODE_DIV, false);
11653 emit_dst_register(emit, &tmp_dst_w);
11654 emit_src_register(emit, &one);
11655 emit_src_register(emit, &fragcoord);
11656 end_emit_instruction(emit);
11657
11658 /* restore saved value */
11659 emit->fs.fragcoord_input_index = fragcoord_input_index;
11660 }
11661 }
11662
11663
11664 /**
11665 * Emit the extra code to get the current sample position value and
11666 * put it into a temp register.
11667 */
11668 static void
emit_sample_position_instructions(struct svga_shader_emitter_v10 * emit)11669 emit_sample_position_instructions(struct svga_shader_emitter_v10 *emit)
11670 {
11671 assert(emit->unit == PIPE_SHADER_FRAGMENT);
11672
11673 if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
11674 assert(emit->version >= 41);
11675
11676 struct tgsi_full_dst_register tmp_dst =
11677 make_dst_temp_reg(emit->fs.sample_pos_tmp_index);
11678 struct tgsi_full_src_register half =
11679 make_immediate_reg_float4(emit, 0.5, 0.5, 0.0, 0.0);
11680
11681 struct tgsi_full_src_register tmp_src =
11682 make_src_temp_reg(emit->fs.sample_pos_tmp_index);
11683 struct tgsi_full_src_register sample_index_reg =
11684 make_src_scalar_reg(TGSI_FILE_SYSTEM_VALUE,
11685 emit->fs.sample_id_sys_index, TGSI_SWIZZLE_X);
11686
11687 /* The first src register is a shader resource (if we want a
11688 * multisampled resource sample position) or the rasterizer register
11689 * (if we want the current sample position in the color buffer). We
11690 * want the later.
11691 */
11692
11693 /* SAMPLE_POS dst, RASTERIZER, sampleIndex */
11694 begin_emit_instruction(emit);
11695 emit_opcode(emit, VGPU10_OPCODE_SAMPLE_POS, false);
11696 emit_dst_register(emit, &tmp_dst);
11697 emit_rasterizer_register(emit);
11698 emit_src_register(emit, &sample_index_reg);
11699 end_emit_instruction(emit);
11700
11701 /* Convert from D3D coords to GL coords by adding 0.5 bias */
11702 /* ADD dst, dst, half */
11703 begin_emit_instruction(emit);
11704 emit_opcode(emit, VGPU10_OPCODE_ADD, false);
11705 emit_dst_register(emit, &tmp_dst);
11706 emit_src_register(emit, &tmp_src);
11707 emit_src_register(emit, &half);
11708 end_emit_instruction(emit);
11709 }
11710 }
11711
11712
11713 /**
11714 * Emit extra instructions to adjust VS inputs/attributes. This can
11715 * mean casting a vertex attribute from int to float or setting the
11716 * W component to 1, or both.
11717 */
11718 static void
emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 * emit)11719 emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
11720 {
11721 const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1;
11722 const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof;
11723 const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof;
11724 const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra;
11725 const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm;
11726 const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled;
11727 const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled;
11728
11729 unsigned adjust_mask = (save_w_1_mask |
11730 save_itof_mask |
11731 save_utof_mask |
11732 save_is_bgra_mask |
11733 save_puint_to_snorm_mask |
11734 save_puint_to_uscaled_mask |
11735 save_puint_to_sscaled_mask);
11736
11737 assert(emit->unit == PIPE_SHADER_VERTEX);
11738
11739 if (adjust_mask) {
11740 struct tgsi_full_src_register one =
11741 make_immediate_reg_float(emit, 1.0f);
11742
11743 struct tgsi_full_src_register one_int =
11744 make_immediate_reg_int(emit, 1);
11745
11746 /* We need to turn off these bitmasks while emitting the
11747 * instructions below, then restore them afterward.
11748 */
11749 emit->key.vs.adjust_attrib_w_1 = 0;
11750 emit->key.vs.adjust_attrib_itof = 0;
11751 emit->key.vs.adjust_attrib_utof = 0;
11752 emit->key.vs.attrib_is_bgra = 0;
11753 emit->key.vs.attrib_puint_to_snorm = 0;
11754 emit->key.vs.attrib_puint_to_uscaled = 0;
11755 emit->key.vs.attrib_puint_to_sscaled = 0;
11756
11757 while (adjust_mask) {
11758 unsigned index = u_bit_scan(&adjust_mask);
11759
11760 /* skip the instruction if this vertex attribute is not being used */
11761 if (emit->info.input_usage_mask[index] == 0)
11762 continue;
11763
11764 unsigned tmp = emit->vs.adjusted_input[index];
11765 struct tgsi_full_src_register input_src =
11766 make_src_reg(TGSI_FILE_INPUT, index);
11767
11768 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
11769 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
11770 struct tgsi_full_dst_register tmp_dst_w =
11771 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
11772
11773 /* ITOF/UTOF/MOV tmp, input[index] */
11774 if (save_itof_mask & (1 << index)) {
11775 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF,
11776 &tmp_dst, &input_src);
11777 }
11778 else if (save_utof_mask & (1 << index)) {
11779 emit_instruction_op1(emit, VGPU10_OPCODE_UTOF,
11780 &tmp_dst, &input_src);
11781 }
11782 else if (save_puint_to_snorm_mask & (1 << index)) {
11783 emit_puint_to_snorm(emit, &tmp_dst, &input_src);
11784 }
11785 else if (save_puint_to_uscaled_mask & (1 << index)) {
11786 emit_puint_to_uscaled(emit, &tmp_dst, &input_src);
11787 }
11788 else if (save_puint_to_sscaled_mask & (1 << index)) {
11789 emit_puint_to_sscaled(emit, &tmp_dst, &input_src);
11790 }
11791 else {
11792 assert((save_w_1_mask | save_is_bgra_mask) & (1 << index));
11793 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
11794 &tmp_dst, &input_src);
11795 }
11796
11797 if (save_is_bgra_mask & (1 << index)) {
11798 emit_swap_r_b(emit, &tmp_dst, &tmp_src);
11799 }
11800
11801 if (save_w_1_mask & (1 << index)) {
11802 /* MOV tmp.w, 1.0 */
11803 if (emit->key.vs.attrib_is_pure_int & (1 << index)) {
11804 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
11805 &tmp_dst_w, &one_int);
11806 }
11807 else {
11808 emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
11809 &tmp_dst_w, &one);
11810 }
11811 }
11812 }
11813
11814 emit->key.vs.adjust_attrib_w_1 = save_w_1_mask;
11815 emit->key.vs.adjust_attrib_itof = save_itof_mask;
11816 emit->key.vs.adjust_attrib_utof = save_utof_mask;
11817 emit->key.vs.attrib_is_bgra = save_is_bgra_mask;
11818 emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask;
11819 emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask;
11820 emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask;
11821 }
11822 }
11823
11824
11825 /* Find zero-value immedate for default layer index */
11826 static void
emit_default_layer_instructions(struct svga_shader_emitter_v10 * emit)11827 emit_default_layer_instructions(struct svga_shader_emitter_v10 *emit)
11828 {
11829 assert(emit->unit == PIPE_SHADER_FRAGMENT);
11830
11831 /* immediate for default layer index 0 */
11832 if (emit->fs.layer_input_index != INVALID_INDEX) {
11833 union tgsi_immediate_data imm;
11834 imm.Int = 0;
11835 emit->fs.layer_imm_index = find_immediate(emit, imm, 0);
11836 }
11837 }
11838
11839
11840 static void
emit_temp_prescale_from_cbuf(struct svga_shader_emitter_v10 * emit,unsigned cbuf_index,struct tgsi_full_dst_register * scale,struct tgsi_full_dst_register * translate)11841 emit_temp_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit,
11842 unsigned cbuf_index,
11843 struct tgsi_full_dst_register *scale,
11844 struct tgsi_full_dst_register *translate)
11845 {
11846 struct tgsi_full_src_register scale_cbuf = make_src_const_reg(cbuf_index);
11847 struct tgsi_full_src_register trans_cbuf = make_src_const_reg(cbuf_index+1);
11848
11849 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, scale, &scale_cbuf);
11850 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, translate, &trans_cbuf);
11851 }
11852
11853
11854 /**
11855 * A recursive helper function to find the prescale from the constant buffer
11856 */
11857 static void
find_prescale_from_cbuf(struct svga_shader_emitter_v10 * emit,unsigned index,unsigned num_prescale,struct tgsi_full_src_register * vp_index,struct tgsi_full_dst_register * scale,struct tgsi_full_dst_register * translate,struct tgsi_full_src_register * tmp_src,struct tgsi_full_dst_register * tmp_dst)11858 find_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit,
11859 unsigned index, unsigned num_prescale,
11860 struct tgsi_full_src_register *vp_index,
11861 struct tgsi_full_dst_register *scale,
11862 struct tgsi_full_dst_register *translate,
11863 struct tgsi_full_src_register *tmp_src,
11864 struct tgsi_full_dst_register *tmp_dst)
11865 {
11866 if (num_prescale == 0)
11867 return;
11868
11869 if (index > 0) {
11870 /* ELSE */
11871 emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
11872 }
11873
11874 struct tgsi_full_src_register index_src =
11875 make_immediate_reg_int(emit, index);
11876
11877 if (index == 0) {
11878 /* GE tmp, vp_index, index */
11879 emit_instruction_op2(emit, VGPU10_OPCODE_GE, tmp_dst,
11880 vp_index, &index_src);
11881 } else {
11882 /* EQ tmp, vp_index, index */
11883 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, tmp_dst,
11884 vp_index, &index_src);
11885 }
11886
11887 /* IF tmp */
11888 emit_if(emit, tmp_src);
11889 emit_temp_prescale_from_cbuf(emit,
11890 emit->vposition.prescale_cbuf_index + 2 * index,
11891 scale, translate);
11892
11893 find_prescale_from_cbuf(emit, index+1, num_prescale-1,
11894 vp_index, scale, translate,
11895 tmp_src, tmp_dst);
11896
11897 /* ENDIF */
11898 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
11899 }
11900
11901
11902 /**
11903 * This helper function emits instructions to set the prescale
11904 * and translate temporaries to the correct constants from the
11905 * constant buffer according to the designated viewport.
11906 */
11907 static void
emit_temp_prescale_instructions(struct svga_shader_emitter_v10 * emit)11908 emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit)
11909 {
11910 struct tgsi_full_dst_register prescale_scale =
11911 make_dst_temp_reg(emit->vposition.prescale_scale_index);
11912 struct tgsi_full_dst_register prescale_translate =
11913 make_dst_temp_reg(emit->vposition.prescale_trans_index);
11914
11915 unsigned prescale_cbuf_index = emit->vposition.prescale_cbuf_index;
11916
11917 if (emit->vposition.num_prescale == 1) {
11918 emit_temp_prescale_from_cbuf(emit,
11919 prescale_cbuf_index,
11920 &prescale_scale, &prescale_translate);
11921 } else {
11922 /**
11923 * Since SM5 device does not support dynamic indexing, we need
11924 * to do the if-else to find the prescale constants for the
11925 * specified viewport.
11926 */
11927 struct tgsi_full_src_register vp_index_src =
11928 make_src_temp_reg(emit->gs.viewport_index_tmp_index);
11929
11930 struct tgsi_full_src_register vp_index_src_x =
11931 scalar_src(&vp_index_src, TGSI_SWIZZLE_X);
11932
11933 unsigned tmp = get_temp_index(emit);
11934 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
11935 struct tgsi_full_src_register tmp_src_x =
11936 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
11937 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
11938
11939 find_prescale_from_cbuf(emit, 0, emit->vposition.num_prescale,
11940 &vp_index_src_x,
11941 &prescale_scale, &prescale_translate,
11942 &tmp_src_x, &tmp_dst);
11943 }
11944
11945 /* Mark prescale temporaries are emitted */
11946 emit->vposition.have_prescale = 1;
11947 }
11948
11949
11950 /**
11951 * A helper function to emit an instruction in a vertex shader to add a bias
11952 * to the VertexID system value. This patches the VertexID in the SVGA vertex
11953 * shader to include the base vertex of an indexed primitive or the start index
11954 * of a non-indexed primitive.
11955 */
11956 static void
emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10 * emit)11957 emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10 *emit)
11958 {
11959 struct tgsi_full_src_register vertex_id_bias_index =
11960 make_src_const_reg(emit->vs.vertex_id_bias_index);
11961 struct tgsi_full_src_register vertex_id_sys_src =
11962 make_src_reg(TGSI_FILE_SYSTEM_VALUE, emit->vs.vertex_id_sys_index);
11963 struct tgsi_full_src_register vertex_id_sys_src_x =
11964 scalar_src(&vertex_id_sys_src, TGSI_SWIZZLE_X);
11965 struct tgsi_full_dst_register vertex_id_tmp_dst =
11966 make_dst_temp_reg(emit->vs.vertex_id_tmp_index);
11967
11968 /* IADD vertex_id_tmp, vertex_id_sys, vertex_id_bias */
11969 unsigned vertex_id_tmp_index = emit->vs.vertex_id_tmp_index;
11970 emit->vs.vertex_id_tmp_index = INVALID_INDEX;
11971 emit_instruction_opn(emit, VGPU10_OPCODE_IADD, &vertex_id_tmp_dst,
11972 &vertex_id_sys_src_x, &vertex_id_bias_index, NULL, false,
11973 false);
11974 emit->vs.vertex_id_tmp_index = vertex_id_tmp_index;
11975 }
11976
11977 /**
11978 * Hull Shader must have control point outputs. But tessellation
11979 * control shader can return without writing to control point output.
11980 * In this case, the control point output is assumed to be passthrough
11981 * from the control point input.
11982 * This helper function is to write out a control point output first in case
11983 * the tessellation control shader returns before writing a
11984 * control point output.
11985 */
11986 static void
emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 * emit)11987 emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 *emit)
11988 {
11989 assert(emit->unit == PIPE_SHADER_TESS_CTRL);
11990 assert(emit->tcs.control_point_phase);
11991 assert(emit->tcs.control_point_out_index != INVALID_INDEX);
11992 assert(emit->tcs.invocation_id_sys_index != INVALID_INDEX);
11993
11994 struct tgsi_full_dst_register output_control_point;
11995 output_control_point =
11996 make_dst_output_reg(emit->tcs.control_point_out_index);
11997
11998 if (emit->tcs.control_point_input_index == INVALID_INDEX) {
11999 /* MOV OUTPUT 0.0f */
12000 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
12001 begin_emit_instruction(emit);
12002 emit_opcode_precise(emit, VGPU10_OPCODE_MOV, false, false);
12003 emit_dst_register(emit, &output_control_point);
12004 emit_src_register(emit, &zero);
12005 end_emit_instruction(emit);
12006 }
12007 else {
12008 /* UARL ADDR[INDEX].x INVOCATION.xxxx */
12009
12010 struct tgsi_full_src_register invocation_src;
12011 struct tgsi_full_dst_register addr_dst;
12012 struct tgsi_full_dst_register addr_dst_x;
12013 unsigned addr_tmp;
12014
12015 addr_tmp = emit->address_reg_index[emit->tcs.control_point_addr_index];
12016 addr_dst = make_dst_temp_reg(addr_tmp);
12017 addr_dst_x = writemask_dst(&addr_dst, TGSI_WRITEMASK_X);
12018
12019 invocation_src = make_src_reg(TGSI_FILE_SYSTEM_VALUE,
12020 emit->tcs.invocation_id_sys_index);
12021
12022 begin_emit_instruction(emit);
12023 emit_opcode_precise(emit, VGPU10_OPCODE_MOV, false, false);
12024 emit_dst_register(emit, &addr_dst_x);
12025 emit_src_register(emit, &invocation_src);
12026 end_emit_instruction(emit);
12027
12028
12029 /* MOV OUTPUT INPUT[ADDR[INDEX].x][POSITION] */
12030
12031 struct tgsi_full_src_register input_control_point;
12032 input_control_point = make_src_reg(TGSI_FILE_INPUT,
12033 emit->tcs.control_point_input_index);
12034 input_control_point.Register.Dimension = 1;
12035 input_control_point.Dimension.Indirect = 1;
12036 input_control_point.DimIndirect.File = TGSI_FILE_ADDRESS;
12037 input_control_point.DimIndirect.Index =
12038 emit->tcs.control_point_addr_index;
12039
12040 begin_emit_instruction(emit);
12041 emit_opcode_precise(emit, VGPU10_OPCODE_MOV, false, false);
12042 emit_dst_register(emit, &output_control_point);
12043 emit_src_register(emit, &input_control_point);
12044 end_emit_instruction(emit);
12045 }
12046 }
12047
12048 /**
12049 * This functions constructs temporary tessfactor from VGPU10*_TESSFACTOR
12050 * values in domain shader. SM5 has tessfactors as floating point values where
12051 * as tgsi emit them as vector. This function allows to construct temp
12052 * tessfactor vector similar to TGSI_SEMANTIC_TESSINNER/OUTER filled with
12053 * values from VGPU10*_TESSFACTOR. Use this constructed vector whenever
12054 * TGSI_SEMANTIC_TESSINNER/OUTER is used in shader.
12055 */
12056 static void
emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 * emit)12057 emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 *emit)
12058 {
12059 struct tgsi_full_src_register src;
12060 struct tgsi_full_dst_register dst;
12061
12062 if (emit->tes.inner.tgsi_index != INVALID_INDEX) {
12063 dst = make_dst_temp_reg(emit->tes.inner.temp_index);
12064
12065 switch (emit->tes.prim_mode) {
12066 case MESA_PRIM_QUADS:
12067 src = make_src_scalar_reg(TGSI_FILE_INPUT,
12068 emit->tes.inner.in_index + 1, TGSI_SWIZZLE_X);
12069 dst = writemask_dst(&dst, TGSI_WRITEMASK_Y);
12070 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12071 FALLTHROUGH;
12072 case MESA_PRIM_TRIANGLES:
12073 src = make_src_scalar_reg(TGSI_FILE_INPUT,
12074 emit->tes.inner.in_index, TGSI_SWIZZLE_X);
12075 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
12076 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12077 break;
12078 case MESA_PRIM_LINES:
12079 /**
12080 * As per SM5 spec, InsideTessFactor for isolines are unused.
12081 * In fact glsl tessInnerLevel for isolines doesn't mean anything but if
12082 * any application try to read tessInnerLevel in TES when primitive type
12083 * is isolines, then instead of driver throwing segfault for accesing it,
12084 * return atleast vec(1.0f)
12085 */
12086 src = make_immediate_reg_float(emit, 1.0f);
12087 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12088 break;
12089 default:
12090 break;
12091 }
12092 }
12093
12094 if (emit->tes.outer.tgsi_index != INVALID_INDEX) {
12095 dst = make_dst_temp_reg(emit->tes.outer.temp_index);
12096
12097 switch (emit->tes.prim_mode) {
12098 case MESA_PRIM_QUADS:
12099 src = make_src_scalar_reg(TGSI_FILE_INPUT,
12100 emit->tes.outer.in_index + 3, TGSI_SWIZZLE_X);
12101 dst = writemask_dst(&dst, TGSI_WRITEMASK_W);
12102 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12103 FALLTHROUGH;
12104 case MESA_PRIM_TRIANGLES:
12105 src = make_src_scalar_reg(TGSI_FILE_INPUT,
12106 emit->tes.outer.in_index + 2, TGSI_SWIZZLE_X);
12107 dst = writemask_dst(&dst, TGSI_WRITEMASK_Z);
12108 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12109 FALLTHROUGH;
12110 case MESA_PRIM_LINES:
12111 src = make_src_scalar_reg(TGSI_FILE_INPUT,
12112 emit->tes.outer.in_index + 1, TGSI_SWIZZLE_X);
12113 dst = writemask_dst(&dst, TGSI_WRITEMASK_Y);
12114 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12115
12116 src = make_src_scalar_reg(TGSI_FILE_INPUT,
12117 emit->tes.outer.in_index , TGSI_SWIZZLE_X);
12118 dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
12119 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12120
12121 break;
12122 default:
12123 break;
12124 }
12125 }
12126 }
12127
12128
12129 static void
emit_initialize_temp_instruction(struct svga_shader_emitter_v10 * emit)12130 emit_initialize_temp_instruction(struct svga_shader_emitter_v10 *emit)
12131 {
12132 struct tgsi_full_src_register src;
12133 struct tgsi_full_dst_register dst;
12134 unsigned vgpu10_temp_index = remap_temp_index(emit, TGSI_FILE_TEMPORARY,
12135 emit->initialize_temp_index);
12136 src = make_immediate_reg_float(emit, 0.0f);
12137 dst = make_dst_temp_reg(vgpu10_temp_index);
12138 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12139 emit->temp_map[emit->initialize_temp_index].initialized = true;
12140 emit->initialize_temp_index = INVALID_INDEX;
12141 }
12142
12143
12144 /**
12145 * Emit any extra/helper declarations/code that we might need between
12146 * the declaration section and code section.
12147 */
12148 static bool
emit_pre_helpers(struct svga_shader_emitter_v10 * emit)12149 emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
12150 {
12151 /* Properties */
12152 if (emit->unit == PIPE_SHADER_GEOMETRY)
12153 emit_property_instructions(emit);
12154 else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
12155 emit_hull_shader_declarations(emit);
12156
12157 /* Save the position of the first instruction token so that we can
12158 * do a second pass of the instructions for the patch constant phase.
12159 */
12160 emit->tcs.instruction_token_pos = emit->cur_tgsi_token;
12161 emit->tcs.fork_phase_add_signature = false;
12162
12163 if (!emit_hull_shader_control_point_phase(emit)) {
12164 emit->skip_instruction = true;
12165 return true;
12166 }
12167
12168 /* Set the current tcs phase to control point phase */
12169 emit->tcs.control_point_phase = true;
12170 }
12171 else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
12172 emit_domain_shader_declarations(emit);
12173 }
12174 else if (emit->unit == PIPE_SHADER_COMPUTE) {
12175 emit_compute_shader_declarations(emit);
12176 }
12177
12178 /* Declare inputs */
12179 if (!emit_input_declarations(emit))
12180 return false;
12181
12182 /* Declare outputs */
12183 if (!emit_output_declarations(emit))
12184 return false;
12185
12186 /* Declare temporary registers */
12187 emit_temporaries_declaration(emit);
12188
12189 /* For PIPE_SHADER_TESS_CTRL, constants, samplers, resources and immediates
12190 * will already be declared in hs_decls (emit_hull_shader_declarations)
12191 */
12192 if (emit->unit != PIPE_SHADER_TESS_CTRL) {
12193
12194 alloc_common_immediates(emit);
12195
12196 /* Declare constant registers */
12197 emit_constant_declaration(emit);
12198
12199 /* Declare samplers and resources */
12200 emit_sampler_declarations(emit);
12201 emit_resource_declarations(emit);
12202
12203 /* Declare images */
12204 emit_image_declarations(emit);
12205
12206 /* Declare shader buffers */
12207 emit_shader_buf_declarations(emit);
12208
12209 /* Declare atomic buffers */
12210 emit_atomic_buf_declarations(emit);
12211 }
12212
12213 if (emit->unit != PIPE_SHADER_FRAGMENT &&
12214 emit->unit != PIPE_SHADER_COMPUTE) {
12215 /*
12216 * Declare clip distance output registers for ClipVertex or
12217 * user defined planes
12218 */
12219 emit_clip_distance_declarations(emit);
12220 }
12221
12222 if (emit->unit == PIPE_SHADER_COMPUTE) {
12223 emit_memory_declarations(emit);
12224
12225 if (emit->cs.grid_size.tgsi_index != INVALID_INDEX) {
12226 emit->cs.grid_size.imm_index =
12227 alloc_immediate_int4(emit,
12228 emit->key.cs.grid_size[0],
12229 emit->key.cs.grid_size[1],
12230 emit->key.cs.grid_size[2], 0);
12231 }
12232 }
12233
12234 if (emit->unit == PIPE_SHADER_FRAGMENT &&
12235 emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
12236 float alpha = emit->key.fs.alpha_ref;
12237 emit->fs.alpha_ref_index =
12238 alloc_immediate_float4(emit, alpha, alpha, alpha, alpha);
12239 }
12240
12241 if (emit->unit != PIPE_SHADER_TESS_CTRL) {
12242 /**
12243 * For PIPE_SHADER_TESS_CTRL, immediates are already declared in
12244 * hs_decls
12245 */
12246 emit_vgpu10_immediates_block(emit);
12247 }
12248 else {
12249 emit_tcs_default_control_point_output(emit);
12250 }
12251
12252 if (emit->unit == PIPE_SHADER_FRAGMENT) {
12253 emit_frontface_instructions(emit);
12254 emit_fragcoord_instructions(emit);
12255 emit_sample_position_instructions(emit);
12256 emit_default_layer_instructions(emit);
12257 }
12258 else if (emit->unit == PIPE_SHADER_VERTEX) {
12259 emit_vertex_attrib_instructions(emit);
12260
12261 if (emit->info.uses_vertexid)
12262 emit_vertex_id_nobase_instruction(emit);
12263 }
12264 else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
12265 emit_temp_tessfactor_instructions(emit);
12266 }
12267
12268 /**
12269 * For geometry shader that writes to viewport index, the prescale
12270 * temporaries will be done at the first vertex emission.
12271 */
12272 if (emit->vposition.need_prescale && emit->vposition.num_prescale == 1)
12273 emit_temp_prescale_instructions(emit);
12274
12275 return true;
12276 }
12277
12278
12279 /**
12280 * The device has no direct support for the pipe_blend_state::alpha_to_one
12281 * option so we implement it here with shader code.
12282 *
12283 * Note that this is kind of pointless, actually. Here we're clobbering
12284 * the alpha value with 1.0. So if alpha-to-coverage is enabled, we'll wind
12285 * up with 100% coverage. That's almost certainly not what the user wants.
12286 * The work-around is to add extra shader code to compute coverage from alpha
12287 * and write it to the coverage output register (if the user's shader doesn't
12288 * do so already). We'll probably do that in the future.
12289 */
12290 static void
emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)12291 emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 *emit,
12292 unsigned fs_color_tmp_index)
12293 {
12294 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
12295 unsigned i;
12296
12297 /* Note: it's not 100% clear from the spec if we're supposed to clobber
12298 * the alpha for all render targets. But that's what NVIDIA does and
12299 * that's what Piglit tests.
12300 */
12301 for (i = 0; i < emit->fs.num_color_outputs; i++) {
12302 struct tgsi_full_dst_register color_dst;
12303
12304 if (fs_color_tmp_index != INVALID_INDEX && i == 0) {
12305 /* write to the temp color register */
12306 color_dst = make_dst_temp_reg(fs_color_tmp_index);
12307 }
12308 else {
12309 /* write directly to the color[i] output */
12310 color_dst = make_dst_output_reg(emit->fs.color_out_index[i]);
12311 }
12312
12313 color_dst = writemask_dst(&color_dst, TGSI_WRITEMASK_W);
12314
12315 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &one);
12316 }
12317 }
12318
12319
12320 /**
12321 * Emit alpha test code. This compares TEMP[fs_color_tmp_index].w
12322 * against the alpha reference value and discards the fragment if the
12323 * comparison fails.
12324 */
12325 static void
emit_alpha_test_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)12326 emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
12327 unsigned fs_color_tmp_index)
12328 {
12329 /* compare output color's alpha to alpha ref and discard if comparison
12330 * fails.
12331 */
12332 unsigned tmp = get_temp_index(emit);
12333 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
12334 struct tgsi_full_src_register tmp_src_x =
12335 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
12336 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
12337 struct tgsi_full_src_register color_src =
12338 make_src_temp_reg(fs_color_tmp_index);
12339 struct tgsi_full_src_register color_src_w =
12340 scalar_src(&color_src, TGSI_SWIZZLE_W);
12341 struct tgsi_full_src_register ref_src =
12342 make_src_immediate_reg(emit->fs.alpha_ref_index);
12343 struct tgsi_full_dst_register color_dst =
12344 make_dst_output_reg(emit->fs.color_out_index[0]);
12345
12346 assert(emit->unit == PIPE_SHADER_FRAGMENT);
12347
12348 /* dst = src0 'alpha_func' src1 */
12349 emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst,
12350 &color_src_w, &ref_src);
12351
12352 /* DISCARD if dst.x == 0 */
12353 begin_emit_instruction(emit);
12354 emit_discard_opcode(emit, false); /* discard if src0.x is zero */
12355 emit_src_register(emit, &tmp_src_x);
12356 end_emit_instruction(emit);
12357
12358 /* If we don't need to broadcast the color below, emit the final color here.
12359 */
12360 if (emit->key.fs.write_color0_to_n_cbufs <= 1) {
12361 /* MOV output.color, tempcolor */
12362 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src);
12363 }
12364
12365 free_temp_indexes(emit);
12366 }
12367
12368
12369 /**
12370 * Emit instructions for writing a single color output to multiple
12371 * color buffers.
12372 * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or
12373 * when key.fs.white_fragments is true).
12374 * property is set and the number of render targets is greater than one.
12375 * \param fs_color_tmp_index index of the temp register that holds the
12376 * color to broadcast.
12377 */
12378 static void
emit_broadcast_color_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)12379 emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit,
12380 unsigned fs_color_tmp_index)
12381 {
12382 const unsigned n = emit->key.fs.write_color0_to_n_cbufs;
12383 unsigned i;
12384 struct tgsi_full_src_register color_src;
12385
12386 if (emit->key.fs.white_fragments) {
12387 /* set all color outputs to white */
12388 color_src = make_immediate_reg_float(emit, 1.0f);
12389 }
12390 else {
12391 /* set all color outputs to TEMP[fs_color_tmp_index] */
12392 assert(fs_color_tmp_index != INVALID_INDEX);
12393 color_src = make_src_temp_reg(fs_color_tmp_index);
12394 }
12395
12396 assert(emit->unit == PIPE_SHADER_FRAGMENT);
12397
12398 for (i = 0; i < n; i++) {
12399 unsigned output_reg = emit->fs.color_out_index[i];
12400 struct tgsi_full_dst_register color_dst =
12401 make_dst_output_reg(output_reg);
12402
12403 /* Fill in this semantic here since we'll use it later in
12404 * emit_dst_register().
12405 */
12406 emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR;
12407
12408 /* MOV output.color[i], tempcolor */
12409 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src);
12410 }
12411 }
12412
12413
12414 /**
12415 * Emit extra helper code after the original shader code, but before the
12416 * last END/RET instruction.
12417 * For vertex shaders this means emitting the extra code to apply the
12418 * prescale scale/translation.
12419 */
12420 static bool
emit_post_helpers(struct svga_shader_emitter_v10 * emit)12421 emit_post_helpers(struct svga_shader_emitter_v10 *emit)
12422 {
12423 if (emit->unit == PIPE_SHADER_VERTEX) {
12424 emit_vertex_instructions(emit);
12425 }
12426 else if (emit->unit == PIPE_SHADER_FRAGMENT) {
12427 const unsigned fs_color_tmp_index = emit->fs.color_tmp_index;
12428
12429 assert(!(emit->key.fs.white_fragments &&
12430 emit->key.fs.write_color0_to_n_cbufs == 0));
12431
12432 /* We no longer want emit_dst_register() to substitute the
12433 * temporary fragment color register for the real color output.
12434 */
12435 emit->fs.color_tmp_index = INVALID_INDEX;
12436
12437 if (emit->key.fs.alpha_to_one) {
12438 emit_alpha_to_one_instructions(emit, fs_color_tmp_index);
12439 }
12440 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
12441 emit_alpha_test_instructions(emit, fs_color_tmp_index);
12442 }
12443 if (emit->key.fs.write_color0_to_n_cbufs > 1 ||
12444 emit->key.fs.white_fragments) {
12445 emit_broadcast_color_instructions(emit, fs_color_tmp_index);
12446 }
12447 }
12448 else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
12449 if (!emit->tcs.control_point_phase) {
12450 /* store the tessellation levels in the patch constant phase only */
12451 store_tesslevels(emit);
12452 }
12453 else {
12454 emit_clipping_instructions(emit);
12455 }
12456 }
12457 else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
12458 emit_vertex_instructions(emit);
12459 }
12460
12461 return true;
12462 }
12463
12464
12465 /**
12466 * Reemit rawbuf instruction
12467 */
12468 static bool
emit_rawbuf_instruction(struct svga_shader_emitter_v10 * emit,unsigned inst_number,const struct tgsi_full_instruction * inst)12469 emit_rawbuf_instruction(struct svga_shader_emitter_v10 *emit,
12470 unsigned inst_number,
12471 const struct tgsi_full_instruction *inst)
12472 {
12473 bool ret;
12474
12475 /* For all the rawbuf references in this instruction,
12476 * load the rawbuf reference and assign to the designated temporary.
12477 * Then reeemit the instruction.
12478 */
12479 emit->reemit_rawbuf_instruction = REEMIT_IN_PROGRESS;
12480
12481 unsigned offset_tmp = get_temp_index(emit);
12482 struct tgsi_full_dst_register offset_dst = make_dst_temp_reg(offset_tmp);
12483 struct tgsi_full_src_register offset_src = make_src_temp_reg(offset_tmp);
12484 struct tgsi_full_src_register four = make_immediate_reg_int(emit, 4);
12485
12486 for (unsigned i = 0; i < emit->raw_buf_cur_tmp_index; i++) {
12487 struct tgsi_full_src_register element_src;
12488
12489 /* First get the element index register. */
12490
12491 if (emit->raw_buf_tmp[i].indirect) {
12492 unsigned tmp = get_temp_index(emit);
12493 struct tgsi_full_dst_register element_dst = make_dst_temp_reg(tmp);
12494 struct tgsi_full_src_register element_index =
12495 make_src_temp_reg(emit->raw_buf_tmp[i].element_index);
12496 struct tgsi_full_src_register element_rel =
12497 make_immediate_reg_int(emit, emit->raw_buf_tmp[i].element_rel);
12498
12499 element_src = make_src_temp_reg(tmp);
12500 element_src = scalar_src(&element_src, TGSI_SWIZZLE_X);
12501 element_dst = writemask_dst(&element_dst, TGSI_WRITEMASK_X);
12502
12503 /* element index from the indirect register */
12504 element_index = make_src_temp_reg(emit->raw_buf_tmp[i].element_index);
12505 element_index = scalar_src(&element_index, TGSI_SWIZZLE_X);
12506
12507 /* IADD element_src element_index element_index_relative */
12508 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &element_dst,
12509 &element_index, &element_rel);
12510 }
12511 else {
12512 unsigned element_index = emit->raw_buf_tmp[i].element_index;
12513 union tgsi_immediate_data imm;
12514 imm.Int = element_index;
12515 int immpos = find_immediate(emit, imm, 0);
12516 if (immpos < 0) {
12517 UNUSED unsigned element_index_imm =
12518 add_immediate_int(emit, element_index);
12519 }
12520 element_src = make_immediate_reg_int(emit, element_index);
12521 }
12522
12523 /* byte offset = element index << 4 */
12524 emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &offset_dst,
12525 &element_src, &four);
12526
12527 struct tgsi_full_dst_register dst_tmp =
12528 make_dst_temp_reg(i + emit->raw_buf_tmp_index);
12529
12530 /* LD_RAW tmp, rawbuf byte offset, rawbuf */
12531
12532 begin_emit_instruction(emit);
12533 emit_opcode(emit, VGPU10_OPCODE_LD_RAW, false);
12534 emit_dst_register(emit, &dst_tmp);
12535
12536 struct tgsi_full_src_register offset_x =
12537 scalar_src(&offset_src, TGSI_SWIZZLE_X);
12538 emit_src_register(emit, &offset_x);
12539
12540 emit_resource_register(emit,
12541 emit->raw_buf_tmp[i].buffer_index + emit->raw_buf_srv_start_index);
12542 end_emit_instruction(emit);
12543 }
12544
12545 emit->raw_buf_cur_tmp_index = 0;
12546
12547 ret = emit_vgpu10_instruction(emit, inst_number, inst);
12548
12549 /* reset raw buf state */
12550 emit->raw_buf_cur_tmp_index = 0;
12551 emit->reemit_rawbuf_instruction = REEMIT_FALSE;
12552
12553 free_temp_indexes(emit);
12554
12555 return ret;
12556 }
12557
12558
12559 /**
12560 * Translate the TGSI tokens into VGPU10 tokens.
12561 */
12562 static bool
emit_vgpu10_instructions(struct svga_shader_emitter_v10 * emit,const struct tgsi_token * tokens)12563 emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit,
12564 const struct tgsi_token *tokens)
12565 {
12566 struct tgsi_parse_context parse;
12567 bool ret = true;
12568 bool pre_helpers_emitted = false;
12569 unsigned inst_number = 0;
12570
12571 tgsi_parse_init(&parse, tokens);
12572
12573 while (!tgsi_parse_end_of_tokens(&parse)) {
12574
12575 /* Save the current tgsi token starting position */
12576 emit->cur_tgsi_token = parse.Position;
12577
12578 tgsi_parse_token(&parse);
12579
12580 switch (parse.FullToken.Token.Type) {
12581 case TGSI_TOKEN_TYPE_IMMEDIATE:
12582 ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate);
12583 if (!ret)
12584 goto done;
12585 break;
12586
12587 case TGSI_TOKEN_TYPE_DECLARATION:
12588 ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration);
12589 if (!ret)
12590 goto done;
12591 break;
12592
12593 case TGSI_TOKEN_TYPE_INSTRUCTION:
12594 if (!pre_helpers_emitted) {
12595 ret = emit_pre_helpers(emit);
12596 if (!ret)
12597 goto done;
12598 pre_helpers_emitted = true;
12599 }
12600 ret = emit_vgpu10_instruction(emit, inst_number++,
12601 &parse.FullToken.FullInstruction);
12602
12603 /* Usually this applies to TCS only. If shader is reading control
12604 * point outputs in control point phase, we should reemit all
12605 * instructions which are writting into control point output in
12606 * control phase to store results into temporaries.
12607 */
12608 if (emit->reemit_instruction) {
12609 assert(emit->unit == PIPE_SHADER_TESS_CTRL);
12610 ret = emit_vgpu10_instruction(emit, inst_number,
12611 &parse.FullToken.FullInstruction);
12612 }
12613 else if (emit->initialize_temp_index != INVALID_INDEX) {
12614 emit_initialize_temp_instruction(emit);
12615 emit->initialize_temp_index = INVALID_INDEX;
12616 ret = emit_vgpu10_instruction(emit, inst_number - 1,
12617 &parse.FullToken.FullInstruction);
12618 }
12619 else if (emit->reemit_rawbuf_instruction) {
12620 ret = emit_rawbuf_instruction(emit, inst_number - 1,
12621 &parse.FullToken.FullInstruction);
12622 }
12623
12624 if (!ret)
12625 goto done;
12626 break;
12627
12628 case TGSI_TOKEN_TYPE_PROPERTY:
12629 ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty);
12630 if (!ret)
12631 goto done;
12632 break;
12633
12634 default:
12635 break;
12636 }
12637 }
12638
12639 if (emit->unit == PIPE_SHADER_TESS_CTRL) {
12640 ret = emit_hull_shader_patch_constant_phase(emit, &parse);
12641 }
12642
12643 done:
12644 tgsi_parse_free(&parse);
12645 return ret;
12646 }
12647
12648
12649 /**
12650 * Emit the first VGPU10 shader tokens.
12651 */
12652 static bool
emit_vgpu10_header(struct svga_shader_emitter_v10 * emit)12653 emit_vgpu10_header(struct svga_shader_emitter_v10 *emit)
12654 {
12655 VGPU10ProgramToken ptoken;
12656
12657 /* First token: VGPU10ProgramToken (version info, program type (VS,GS,PS)) */
12658
12659 /* Maximum supported shader version is 50 */
12660 unsigned version = MIN2(emit->version, 50);
12661
12662 ptoken.value = 0; /* init whole token to zero */
12663 ptoken.majorVersion = version / 10;
12664 ptoken.minorVersion = version % 10;
12665 ptoken.programType = translate_shader_type(emit->unit);
12666 if (!emit_dword(emit, ptoken.value))
12667 return false;
12668
12669 /* Second token: total length of shader, in tokens. We can't fill this
12670 * in until we're all done. Emit zero for now.
12671 */
12672 if (!emit_dword(emit, 0))
12673 return false;
12674
12675 if (emit->version >= 50) {
12676 VGPU10OpcodeToken0 token;
12677
12678 if (emit->unit == PIPE_SHADER_TESS_CTRL) {
12679 /* For hull shader, we need to start the declarations phase first before
12680 * emitting any declarations including the global flags.
12681 */
12682 token.value = 0;
12683 token.opcodeType = VGPU10_OPCODE_HS_DECLS;
12684 begin_emit_instruction(emit);
12685 emit_dword(emit, token.value);
12686 end_emit_instruction(emit);
12687 }
12688
12689 /* Emit global flags */
12690 token.value = 0; /* init whole token to zero */
12691 token.opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
12692 token.enableDoublePrecisionFloatOps = 1; /* set bit */
12693 token.instructionLength = 1;
12694 if (!emit_dword(emit, token.value))
12695 return false;
12696 }
12697
12698 if (emit->version >= 40) {
12699 VGPU10OpcodeToken0 token;
12700
12701 /* Reserved for global flag such as refactoringAllowed.
12702 * If the shader does not use the precise qualifier, we will set the
12703 * refactoringAllowed global flag; otherwise, we will leave the reserved
12704 * token to NOP.
12705 */
12706 emit->reserved_token = (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0);
12707 token.value = 0;
12708 token.opcodeType = VGPU10_OPCODE_NOP;
12709 token.instructionLength = 1;
12710 if (!emit_dword(emit, token.value))
12711 return false;
12712 }
12713
12714 return true;
12715 }
12716
12717
12718 static bool
emit_vgpu10_tail(struct svga_shader_emitter_v10 * emit)12719 emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit)
12720 {
12721 VGPU10ProgramToken *tokens;
12722
12723 /* Replace the second token with total shader length */
12724 tokens = (VGPU10ProgramToken *) emit->buf;
12725 tokens[1].value = emit_get_num_tokens(emit);
12726
12727 if (emit->version >= 40 && !emit->uses_precise_qualifier) {
12728 /* Replace the reserved token with the RefactoringAllowed global flag */
12729 VGPU10OpcodeToken0 *ptoken;
12730
12731 ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token];
12732 assert(ptoken->opcodeType == VGPU10_OPCODE_NOP);
12733 ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
12734 ptoken->refactoringAllowed = 1;
12735 }
12736
12737 if (emit->version >= 50 && emit->fs.forceEarlyDepthStencil) {
12738 /* Replace the reserved token with the forceEarlyDepthStencil global flag */
12739 VGPU10OpcodeToken0 *ptoken;
12740
12741 ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token];
12742 ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
12743 ptoken->forceEarlyDepthStencil = 1;
12744 }
12745
12746 return true;
12747 }
12748
12749
12750 /**
12751 * Modify the FS to read the BCOLORs and use the FACE register
12752 * to choose between the front/back colors.
12753 */
12754 static const struct tgsi_token *
transform_fs_twoside(const struct tgsi_token * tokens)12755 transform_fs_twoside(const struct tgsi_token *tokens)
12756 {
12757 if (0) {
12758 debug_printf("Before tgsi_add_two_side ------------------\n");
12759 tgsi_dump(tokens,0);
12760 }
12761 tokens = tgsi_add_two_side(tokens);
12762 if (0) {
12763 debug_printf("After tgsi_add_two_side ------------------\n");
12764 tgsi_dump(tokens, 0);
12765 }
12766 return tokens;
12767 }
12768
12769
12770 /**
12771 * Modify the FS to do polygon stipple.
12772 */
12773 static const struct tgsi_token *
transform_fs_pstipple(struct svga_shader_emitter_v10 * emit,const struct tgsi_token * tokens)12774 transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
12775 const struct tgsi_token *tokens)
12776 {
12777 const struct tgsi_token *new_tokens;
12778 unsigned unit;
12779
12780 if (0) {
12781 debug_printf("Before pstipple ------------------\n");
12782 tgsi_dump(tokens,0);
12783 }
12784
12785 new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
12786 TGSI_FILE_INPUT);
12787
12788 emit->fs.pstipple_sampler_unit = unit;
12789
12790 /* The new sampler state is appended to the end of the samplers list */
12791 emit->fs.pstipple_sampler_state_index = emit->key.num_samplers++;
12792
12793 /* Setup texture state for stipple */
12794 emit->sampler_target[unit] = TGSI_TEXTURE_2D;
12795 emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
12796 emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
12797 emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
12798 emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
12799 emit->key.tex[unit].target = PIPE_TEXTURE_2D;
12800 emit->key.tex[unit].sampler_index = emit->fs.pstipple_sampler_state_index;
12801
12802 if (0) {
12803 debug_printf("After pstipple ------------------\n");
12804 tgsi_dump(new_tokens, 0);
12805 }
12806
12807 return new_tokens;
12808 }
12809
12810 /**
12811 * Modify the FS to support anti-aliasing point.
12812 */
12813 static const struct tgsi_token *
transform_fs_aapoint(struct svga_context * svga,const struct tgsi_token * tokens,int aa_coord_index)12814 transform_fs_aapoint(struct svga_context *svga,
12815 const struct tgsi_token *tokens,
12816 int aa_coord_index)
12817 {
12818 bool need_texcoord_semantic =
12819 svga->pipe.screen->get_param(svga->pipe.screen, PIPE_CAP_TGSI_TEXCOORD);
12820
12821 if (0) {
12822 debug_printf("Before tgsi_add_aa_point ------------------\n");
12823 tgsi_dump(tokens,0);
12824 }
12825 tokens = tgsi_add_aa_point(tokens, aa_coord_index, need_texcoord_semantic);
12826 if (0) {
12827 debug_printf("After tgsi_add_aa_point ------------------\n");
12828 tgsi_dump(tokens, 0);
12829 }
12830 return tokens;
12831 }
12832
12833
12834 /**
12835 * A helper function to determine the shader in the previous stage and
12836 * then call the linker function to determine the input mapping for this
12837 * shader to match the output indices from the shader in the previous stage.
12838 */
12839 static void
compute_input_mapping(struct svga_context * svga,struct svga_shader_emitter_v10 * emit,enum pipe_shader_type unit)12840 compute_input_mapping(struct svga_context *svga,
12841 struct svga_shader_emitter_v10 *emit,
12842 enum pipe_shader_type unit)
12843 {
12844 struct svga_shader *prevShader = NULL; /* shader in the previous stage */
12845
12846 if (unit == PIPE_SHADER_FRAGMENT) {
12847 prevShader = svga->curr.gs ?
12848 &svga->curr.gs->base : (svga->curr.tes ?
12849 &svga->curr.tes->base : &svga->curr.vs->base);
12850 } else if (unit == PIPE_SHADER_GEOMETRY) {
12851 prevShader = svga->curr.tes ? &svga->curr.tes->base : &svga->curr.vs->base;
12852 } else if (unit == PIPE_SHADER_TESS_EVAL) {
12853 assert(svga->curr.tcs);
12854 prevShader = &svga->curr.tcs->base;
12855 } else if (unit == PIPE_SHADER_TESS_CTRL) {
12856 assert(svga->curr.vs);
12857 prevShader = &svga->curr.vs->base;
12858 }
12859
12860 if (prevShader != NULL) {
12861 svga_link_shaders(&prevShader->tgsi_info, &emit->info, &emit->linkage);
12862 emit->prevShaderInfo = &prevShader->tgsi_info;
12863 }
12864 else {
12865 /**
12866 * Since vertex shader does not need to go through the linker to
12867 * establish the input map, we need to make sure the highest index
12868 * of input registers is set properly here.
12869 */
12870 emit->linkage.input_map_max = MAX2((int)emit->linkage.input_map_max,
12871 emit->info.file_max[TGSI_FILE_INPUT]);
12872 }
12873 }
12874
12875
12876 /**
12877 * Copies the shader signature info to the shader variant
12878 */
12879 static void
copy_shader_signature(struct svga_shader_signature * sgn,struct svga_shader_variant * variant)12880 copy_shader_signature(struct svga_shader_signature *sgn,
12881 struct svga_shader_variant *variant)
12882 {
12883 SVGA3dDXShaderSignatureHeader *header = &sgn->header;
12884
12885 /* Calculate the signature length */
12886 variant->signatureLen = sizeof(SVGA3dDXShaderSignatureHeader) +
12887 (header->numInputSignatures +
12888 header->numOutputSignatures +
12889 header->numPatchConstantSignatures) *
12890 sizeof(SVGA3dDXShaderSignatureEntry);
12891
12892 /* Allocate buffer for the signature info */
12893 variant->signature =
12894 (SVGA3dDXShaderSignatureHeader *)CALLOC(1, variant->signatureLen);
12895
12896 char *sgnBuf = (char *)variant->signature;
12897 unsigned sgnLen;
12898
12899 /* Copy the signature info to the shader variant structure */
12900 memcpy(sgnBuf, &sgn->header, sizeof(SVGA3dDXShaderSignatureHeader));
12901 sgnBuf += sizeof(SVGA3dDXShaderSignatureHeader);
12902
12903 if (header->numInputSignatures) {
12904 sgnLen =
12905 header->numInputSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
12906 memcpy(sgnBuf, &sgn->inputs[0], sgnLen);
12907 sgnBuf += sgnLen;
12908 }
12909
12910 if (header->numOutputSignatures) {
12911 sgnLen =
12912 header->numOutputSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
12913 memcpy(sgnBuf, &sgn->outputs[0], sgnLen);
12914 sgnBuf += sgnLen;
12915 }
12916
12917 if (header->numPatchConstantSignatures) {
12918 sgnLen =
12919 header->numPatchConstantSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
12920 memcpy(sgnBuf, &sgn->patchConstants[0], sgnLen);
12921 }
12922 }
12923
12924
12925 /**
12926 * This is the main entrypoint for the TGSI -> VPGU10 translator.
12927 */
12928 struct svga_shader_variant *
svga_tgsi_vgpu10_translate(struct svga_context * svga,const struct svga_shader * shader,const struct svga_compile_key * key,enum pipe_shader_type unit)12929 svga_tgsi_vgpu10_translate(struct svga_context *svga,
12930 const struct svga_shader *shader,
12931 const struct svga_compile_key *key,
12932 enum pipe_shader_type unit)
12933 {
12934 struct svga_screen *svgascreen = svga_screen(svga->pipe.screen);
12935 struct svga_shader_variant *variant = NULL;
12936 struct svga_shader_emitter_v10 *emit;
12937 const struct tgsi_token *tokens = shader->tokens;
12938
12939 (void) make_immediate_reg_double; /* unused at this time */
12940
12941 assert(unit == PIPE_SHADER_VERTEX ||
12942 unit == PIPE_SHADER_GEOMETRY ||
12943 unit == PIPE_SHADER_FRAGMENT ||
12944 unit == PIPE_SHADER_TESS_CTRL ||
12945 unit == PIPE_SHADER_TESS_EVAL ||
12946 unit == PIPE_SHADER_COMPUTE);
12947
12948 /* These two flags cannot be used together */
12949 assert(key->vs.need_prescale + key->vs.undo_viewport <= 1);
12950
12951 SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_TGSIVGPU10TRANSLATE);
12952 /*
12953 * Setup the code emitter
12954 */
12955 emit = alloc_emitter();
12956 if (!emit)
12957 goto done;
12958
12959 emit->unit = unit;
12960 if (svga_have_gl43(svga)) {
12961 emit->version = 51;
12962 } else if (svga_have_sm5(svga)) {
12963 emit->version = 50;
12964 } else if (svga_have_sm4_1(svga)) {
12965 emit->version = 41;
12966 } else {
12967 emit->version = 40;
12968 }
12969
12970 emit->use_sampler_state_mapping = emit->key.sampler_state_mapping;
12971
12972 emit->signature.header.headerVersion = SVGADX_SIGNATURE_HEADER_VERSION_0;
12973
12974 emit->key = *key;
12975
12976 emit->vposition.need_prescale = (emit->key.vs.need_prescale ||
12977 emit->key.gs.need_prescale ||
12978 emit->key.tes.need_prescale);
12979
12980 /* Determine how many prescale factors in the constant buffer */
12981 emit->vposition.num_prescale = 1;
12982 if (emit->vposition.need_prescale && emit->key.gs.writes_viewport_index) {
12983 assert(emit->unit == PIPE_SHADER_GEOMETRY);
12984 emit->vposition.num_prescale = emit->key.gs.num_prescale;
12985 }
12986
12987 emit->vposition.tmp_index = INVALID_INDEX;
12988 emit->vposition.so_index = INVALID_INDEX;
12989 emit->vposition.out_index = INVALID_INDEX;
12990
12991 emit->vs.vertex_id_sys_index = INVALID_INDEX;
12992 emit->vs.vertex_id_tmp_index = INVALID_INDEX;
12993 emit->vs.vertex_id_bias_index = INVALID_INDEX;
12994
12995 emit->fs.color_tmp_index = INVALID_INDEX;
12996 emit->fs.face_input_index = INVALID_INDEX;
12997 emit->fs.fragcoord_input_index = INVALID_INDEX;
12998 emit->fs.sample_id_sys_index = INVALID_INDEX;
12999 emit->fs.sample_pos_sys_index = INVALID_INDEX;
13000 emit->fs.sample_mask_in_sys_index = INVALID_INDEX;
13001 emit->fs.layer_input_index = INVALID_INDEX;
13002 emit->fs.layer_imm_index = INVALID_INDEX;
13003
13004 emit->gs.prim_id_index = INVALID_INDEX;
13005 emit->gs.invocation_id_sys_index = INVALID_INDEX;
13006 emit->gs.viewport_index_out_index = INVALID_INDEX;
13007 emit->gs.viewport_index_tmp_index = INVALID_INDEX;
13008
13009 emit->tcs.vertices_per_patch_index = INVALID_INDEX;
13010 emit->tcs.invocation_id_sys_index = INVALID_INDEX;
13011 emit->tcs.control_point_input_index = INVALID_INDEX;
13012 emit->tcs.control_point_addr_index = INVALID_INDEX;
13013 emit->tcs.control_point_out_index = INVALID_INDEX;
13014 emit->tcs.control_point_tmp_index = INVALID_INDEX;
13015 emit->tcs.control_point_out_count = 0;
13016 emit->tcs.inner.out_index = INVALID_INDEX;
13017 emit->tcs.inner.temp_index = INVALID_INDEX;
13018 emit->tcs.inner.tgsi_index = INVALID_INDEX;
13019 emit->tcs.outer.out_index = INVALID_INDEX;
13020 emit->tcs.outer.temp_index = INVALID_INDEX;
13021 emit->tcs.outer.tgsi_index = INVALID_INDEX;
13022 emit->tcs.patch_generic_out_count = 0;
13023 emit->tcs.patch_generic_out_index = INVALID_INDEX;
13024 emit->tcs.patch_generic_tmp_index = INVALID_INDEX;
13025 emit->tcs.prim_id_index = INVALID_INDEX;
13026
13027 emit->tes.tesscoord_sys_index = INVALID_INDEX;
13028 emit->tes.inner.in_index = INVALID_INDEX;
13029 emit->tes.inner.temp_index = INVALID_INDEX;
13030 emit->tes.inner.tgsi_index = INVALID_INDEX;
13031 emit->tes.outer.in_index = INVALID_INDEX;
13032 emit->tes.outer.temp_index = INVALID_INDEX;
13033 emit->tes.outer.tgsi_index = INVALID_INDEX;
13034 emit->tes.prim_id_index = INVALID_INDEX;
13035
13036 emit->cs.thread_id_index = INVALID_INDEX;
13037 emit->cs.block_id_index = INVALID_INDEX;
13038 emit->cs.grid_size.tgsi_index = INVALID_INDEX;
13039 emit->cs.grid_size.imm_index = INVALID_INDEX;
13040 emit->cs.block_width = 1;
13041 emit->cs.block_height = 1;
13042 emit->cs.block_depth = 1;
13043
13044 emit->clip_dist_out_index = INVALID_INDEX;
13045 emit->clip_dist_tmp_index = INVALID_INDEX;
13046 emit->clip_dist_so_index = INVALID_INDEX;
13047 emit->clip_vertex_out_index = INVALID_INDEX;
13048 emit->clip_vertex_tmp_index = INVALID_INDEX;
13049 emit->svga_debug_callback = svga->debug.callback;
13050
13051 emit->index_range.start_index = INVALID_INDEX;
13052 emit->index_range.count = 0;
13053 emit->index_range.required = false;
13054 emit->index_range.operandType = VGPU10_NUM_OPERANDS;
13055 emit->index_range.dim = 0;
13056 emit->index_range.size = 0;
13057
13058 emit->current_loop_depth = 0;
13059
13060 emit->initialize_temp_index = INVALID_INDEX;
13061 emit->image_size_index = INVALID_INDEX;
13062
13063 emit->max_vs_inputs = svgascreen->max_vs_inputs;
13064 emit->max_vs_outputs = svgascreen->max_vs_outputs;
13065 emit->max_gs_inputs = svgascreen->max_gs_inputs;
13066
13067 if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) {
13068 emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS;
13069 }
13070
13071 if (unit == PIPE_SHADER_FRAGMENT) {
13072 if (key->fs.light_twoside) {
13073 tokens = transform_fs_twoside(tokens);
13074 }
13075 if (key->fs.pstipple) {
13076 const struct tgsi_token *new_tokens =
13077 transform_fs_pstipple(emit, tokens);
13078 if (tokens != shader->tokens) {
13079 /* free the two-sided shader tokens */
13080 tgsi_free_tokens(tokens);
13081 }
13082 tokens = new_tokens;
13083 }
13084 if (key->fs.aa_point) {
13085 tokens = transform_fs_aapoint(svga, tokens,
13086 key->fs.aa_point_coord_index);
13087 }
13088 }
13089
13090 if (SVGA_DEBUG & DEBUG_TGSI) {
13091 debug_printf("#####################################\n");
13092 debug_printf("### TGSI Shader %u\n", shader->id);
13093 tgsi_dump(tokens, 0);
13094 }
13095
13096 /**
13097 * Rescan the header if the token string is different from the one
13098 * included in the shader; otherwise, the header info is already up-to-date
13099 */
13100 if (tokens != shader->tokens) {
13101 tgsi_scan_shader(tokens, &emit->info);
13102 } else {
13103 emit->info = shader->tgsi_info;
13104 }
13105
13106 emit->num_outputs = emit->info.num_outputs;
13107
13108 /**
13109 * Compute input mapping to match the outputs from shader
13110 * in the previous stage
13111 */
13112 compute_input_mapping(svga, emit, unit);
13113
13114 determine_clipping_mode(emit);
13115
13116 if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX ||
13117 unit == PIPE_SHADER_TESS_CTRL || unit == PIPE_SHADER_TESS_EVAL) {
13118 if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) {
13119 /* if there is stream output declarations associated
13120 * with this shader or the shader writes to ClipDistance
13121 * then reserve extra registers for the non-adjusted vertex position
13122 * and the ClipDistance shadow copy.
13123 */
13124 emit->vposition.so_index = emit->num_outputs++;
13125
13126 if (emit->clip_mode == CLIP_DISTANCE) {
13127 emit->clip_dist_so_index = emit->num_outputs++;
13128 if (emit->info.num_written_clipdistance > 4)
13129 emit->num_outputs++;
13130 }
13131 }
13132 }
13133
13134 /* Determine if constbuf to rawbuf translation is needed */
13135 emit->raw_buf_srv_start_index = emit->key.srv_raw_constbuf_index;
13136 if (emit->info.const_buffers_declared)
13137 emit->raw_bufs = emit->key.raw_constbufs;
13138
13139 emit->raw_shaderbuf_srv_start_index = emit->key.srv_raw_shaderbuf_index;
13140 if (emit->info.shader_buffers_declared)
13141 emit->raw_shaderbufs = emit->key.raw_shaderbufs;
13142
13143 /*
13144 * Do actual shader translation.
13145 */
13146 if (!emit_vgpu10_header(emit)) {
13147 debug_printf("svga: emit VGPU10 header failed\n");
13148 goto cleanup;
13149 }
13150
13151 if (!emit_vgpu10_instructions(emit, tokens)) {
13152 debug_printf("svga: emit VGPU10 instructions failed\n");
13153 goto cleanup;
13154 }
13155
13156 if (emit->num_new_immediates > 0) {
13157 reemit_immediates_block(emit);
13158 }
13159
13160 if (!emit_vgpu10_tail(emit)) {
13161 debug_printf("svga: emit VGPU10 tail failed\n");
13162 goto cleanup;
13163 }
13164
13165 if (emit->register_overflow) {
13166 goto cleanup;
13167 }
13168
13169 /*
13170 * Create, initialize the 'variant' object.
13171 */
13172 variant = svga_new_shader_variant(svga, unit);
13173 if (!variant)
13174 goto cleanup;
13175
13176 variant->shader = shader;
13177 variant->nr_tokens = emit_get_num_tokens(emit);
13178 variant->tokens = (const unsigned *)emit->buf;
13179
13180 /* Copy shader signature info to the shader variant */
13181 if (svga_have_sm5(svga)) {
13182 copy_shader_signature(&emit->signature, variant);
13183 }
13184
13185 emit->buf = NULL; /* buffer is no longer owed by emitter context */
13186 memcpy(&variant->key, key, sizeof(*key));
13187 variant->id = UTIL_BITMASK_INVALID_INDEX;
13188
13189 /* The extra constant starting offset starts with the number of
13190 * shader constants declared in the shader.
13191 */
13192 variant->extra_const_start = emit->num_shader_consts[0];
13193 if (key->gs.wide_point) {
13194 /**
13195 * The extra constant added in the transformed shader
13196 * for inverse viewport scale is to be supplied by the driver.
13197 * So the extra constant starting offset needs to be reduced by 1.
13198 */
13199 assert(variant->extra_const_start > 0);
13200 variant->extra_const_start--;
13201 }
13202
13203 if (unit == PIPE_SHADER_FRAGMENT) {
13204 struct svga_fs_variant *fs_variant = svga_fs_variant(variant);
13205
13206 fs_variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
13207 fs_variant->pstipple_sampler_state_index =
13208 emit->fs.pstipple_sampler_state_index;
13209
13210 /* If there was exactly one write to a fragment shader output register
13211 * and it came from a constant buffer, we know all fragments will have
13212 * the same color (except for blending).
13213 */
13214 fs_variant->constant_color_output =
13215 emit->constant_color_output && emit->num_output_writes == 1;
13216
13217 /** keep track in the variant if flat interpolation is used
13218 * for any of the varyings.
13219 */
13220 fs_variant->uses_flat_interp = emit->uses_flat_interp;
13221
13222 fs_variant->fs_shadow_compare_units = emit->shadow_compare_units;
13223 }
13224 else if (unit == PIPE_SHADER_TESS_EVAL) {
13225 struct svga_tes_variant *tes_variant = svga_tes_variant(variant);
13226
13227 /* Keep track in the tes variant some of the layout parameters.
13228 * These parameters will be referenced by the tcs to emit
13229 * the necessary declarations for the hull shader.
13230 */
13231 tes_variant->prim_mode = emit->tes.prim_mode;
13232 tes_variant->spacing = emit->tes.spacing;
13233 tes_variant->vertices_order_cw = emit->tes.vertices_order_cw;
13234 tes_variant->point_mode = emit->tes.point_mode;
13235 }
13236
13237
13238 if (tokens != shader->tokens) {
13239 tgsi_free_tokens(tokens);
13240 }
13241
13242 cleanup:
13243 free_emitter(emit);
13244
13245 done:
13246 SVGA_STATS_TIME_POP(svga_sws(svga));
13247 return variant;
13248 }
13249