• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**********************************************************
2  * Copyright 1998-2022 VMware, Inc.  All rights reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person
5  * obtaining a copy of this software and associated documentation
6  * files (the "Software"), to deal in the Software without
7  * restriction, including without limitation the rights to use, copy,
8  * modify, merge, publish, distribute, sublicense, and/or sell copies
9  * of the Software, and to permit persons to whom the Software is
10  * furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be
13  * included in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  **********************************************************/
25 
26 /**
27  * @file svga_tgsi_vgpu10.c
28  *
29  * TGSI -> VGPU10 shader translation.
30  *
31  * \author Mingcheng Chen
32  * \author Brian Paul
33  */
34 
35 #include "util/compiler.h"
36 #include "pipe/p_shader_tokens.h"
37 #include "pipe/p_defines.h"
38 #include "tgsi/tgsi_dump.h"
39 #include "tgsi/tgsi_info.h"
40 #include "tgsi/tgsi_parse.h"
41 #include "tgsi/tgsi_scan.h"
42 #include "tgsi/tgsi_strings.h"
43 #include "tgsi/tgsi_two_side.h"
44 #include "tgsi/tgsi_aa_point.h"
45 #include "tgsi/tgsi_util.h"
46 #include "util/u_math.h"
47 #include "util/u_memory.h"
48 #include "util/u_bitmask.h"
49 #include "util/u_debug.h"
50 #include "util/u_pstipple.h"
51 
52 #include "svga_context.h"
53 #include "svga_debug.h"
54 #include "svga_link.h"
55 #include "svga_shader.h"
56 #include "svga_tgsi.h"
57 
58 #include "VGPU10ShaderTokens.h"
59 
60 
61 #define INVALID_INDEX 99999
62 #define MAX_INTERNAL_TEMPS 4
63 #define MAX_SYSTEM_VALUES 4
64 #define MAX_IMMEDIATE_COUNT \
65         (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4)
66 #define MAX_TEMP_ARRAYS 64  /* Enough? */
67 
68 /**
69  * Clipping is complicated.  There's four different cases which we
70  * handle during VS/GS shader translation:
71  */
72 enum clipping_mode
73 {
74    CLIP_NONE,     /**< No clipping enabled */
75    CLIP_LEGACY,   /**< The shader has no clipping declarations or code but
76                    * one or more user-defined clip planes are enabled.  We
77                    * generate extra code to emit clip distances.
78                    */
79    CLIP_DISTANCE, /**< The shader already declares clip distance output
80                    * registers and has code to write to them.
81                    */
82    CLIP_VERTEX    /**< The shader declares a clip vertex output register and
83                   * has code that writes to the register.  We convert the
84                   * clipvertex position into one or more clip distances.
85                   */
86 };
87 
88 
89 /* Shader signature info */
90 struct svga_shader_signature
91 {
92    SVGA3dDXShaderSignatureHeader header;
93    SVGA3dDXShaderSignatureEntry inputs[PIPE_MAX_SHADER_INPUTS];
94    SVGA3dDXShaderSignatureEntry outputs[PIPE_MAX_SHADER_OUTPUTS];
95    SVGA3dDXShaderSignatureEntry patchConstants[PIPE_MAX_SHADER_OUTPUTS];
96 };
97 
98 static inline void
set_shader_signature_entry(SVGA3dDXShaderSignatureEntry * e,unsigned index,SVGA3dDXSignatureSemanticName sgnName,unsigned mask,SVGA3dDXSignatureRegisterComponentType compType,SVGA3dDXSignatureMinPrecision minPrecision)99 set_shader_signature_entry(SVGA3dDXShaderSignatureEntry *e,
100                            unsigned index,
101                            SVGA3dDXSignatureSemanticName sgnName,
102                            unsigned mask,
103                            SVGA3dDXSignatureRegisterComponentType compType,
104                            SVGA3dDXSignatureMinPrecision minPrecision)
105 {
106    e->registerIndex = index;
107    e->semanticName = sgnName;
108    e->mask = mask;
109    e->componentType = compType;
110    e->minPrecision = minPrecision;
111 };
112 
113 static const SVGA3dDXSignatureSemanticName
114 tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_COUNT] = {
115    SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION,
116    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
117    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
118    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
119    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
120    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
121    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
122    SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE,
123    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
124    SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID,
125    SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID,
126    SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID,
127    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
128    SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE,
129    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
130    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
131    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
132    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
133    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
134    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
135    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
136    SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX,
137    SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX,
138    SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX,
139    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
140    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
141    SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID,
142    SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID,
143    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
144    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
145    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
146    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
147    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
148    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
149    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
150    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
151    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
152    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
153    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
154    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
155    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
156    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
157    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
158    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
159    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
160 };
161 
162 
163 /**
164  * Map tgsi semantic name to SVGA signature semantic name
165  */
166 static inline SVGA3dDXSignatureSemanticName
map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name)167 map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name)
168 {
169    assert(name < TGSI_SEMANTIC_COUNT);
170 
171    /* Do a few asserts here to spot check the mapping */
172    assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_PRIMID] ==
173           SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
174    assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_VIEWPORT_INDEX] ==
175           SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX);
176    assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_INVOCATIONID] ==
177           SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID);
178 
179    return tgsi_semantic_to_sgn_name[name];
180 }
181 
182 enum reemit_mode {
183    REEMIT_FALSE = 0,
184    REEMIT_TRUE = 1,
185    REEMIT_IN_PROGRESS = 2
186 };
187 
188 struct svga_raw_buf_tmp {
189    bool indirect;
190    unsigned buffer_index:8;
191    unsigned element_index:8;
192    unsigned element_rel:8;
193 };
194 
195 struct svga_shader_emitter_v10
196 {
197    /* The token output buffer */
198    unsigned size;
199    char *buf;
200    char *ptr;
201 
202    /* Information about the shader and state (does not change) */
203    struct svga_compile_key key;
204    struct tgsi_shader_info info;
205    unsigned unit;
206    unsigned version; /**< Either 40, 41, 50 or 51 at this time */
207 
208    unsigned cur_tgsi_token;     /**< current tgsi token position */
209    unsigned inst_start_token;
210    bool discard_instruction; /**< throw away current instruction? */
211    bool reemit_instruction;  /**< reemit current instruction */
212    bool reemit_tgsi_instruction;  /**< reemit current tgsi instruction */
213    bool skip_instruction;    /**< skip current instruction */
214    bool use_sampler_state_mapping; /* use sampler state mapping */
215    enum reemit_mode reemit_rawbuf_instruction;
216 
217    union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4];
218    double (*immediates_dbl)[2];
219    unsigned num_immediates;      /**< Number of immediates emitted */
220    unsigned common_immediate_pos[20];  /**< literals for common immediates */
221    unsigned num_common_immediates;
222    unsigned num_immediates_emitted;
223    unsigned num_new_immediates;        /** pending immediates to be declared */
224    unsigned immediates_block_start_token;
225    unsigned immediates_block_next_token;
226 
227    unsigned num_outputs;      /**< include any extra outputs */
228                               /**  The first extra output is reserved for
229                                *   non-adjusted vertex position for
230                                *   stream output purpose
231                                */
232 
233    /* Temporary Registers */
234    unsigned num_shader_temps; /**< num of temps used by original shader */
235    unsigned internal_temp_count;  /**< currently allocated internal temps */
236    struct {
237       unsigned start, size;
238    } temp_arrays[MAX_TEMP_ARRAYS];
239    unsigned num_temp_arrays;
240 
241    /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */
242    struct {
243       unsigned arrayId, index;
244       bool initialized;
245    } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */
246 
247    unsigned initialize_temp_index;
248 
249    /** Number of constants used by original shader for each constant buffer.
250     * The size should probably always match with that of svga_state.constbufs.
251     */
252    unsigned num_shader_consts[SVGA_MAX_CONST_BUFS];
253 
254    /* Raw constant buffers */
255    unsigned raw_buf_srv_start_index;  /* starting srv index for raw buffers */
256    unsigned raw_bufs;                 /* raw buffers bitmask */
257    unsigned raw_buf_tmp_index;        /* starting temp index for raw buffers */
258    unsigned raw_buf_cur_tmp_index;    /* current temp index for raw buffers */
259    struct svga_raw_buf_tmp raw_buf_tmp[3]; /* temporaries for raw buf source */
260 
261    /* Samplers */
262    unsigned num_samplers;
263    bool sampler_view[PIPE_MAX_SAMPLERS];  /**< True if sampler view exists*/
264    uint8_t sampler_target[PIPE_MAX_SAMPLERS];  /**< TGSI_TEXTURE_x */
265    uint8_t sampler_return_type[PIPE_MAX_SAMPLERS];  /**< TGSI_RETURN_TYPE_x */
266 
267    /* Images */
268    unsigned num_images;
269    unsigned image_mask;
270    struct tgsi_declaration_image image[PIPE_MAX_SHADER_IMAGES];
271    unsigned image_size_index;  /* starting index to cbuf for image size */
272 
273    /* Shader buffers */
274    unsigned num_shader_bufs;
275    unsigned raw_shaderbuf_srv_start_index;  /* starting srv index for raw shaderbuf */
276    uint64_t raw_shaderbufs;                 /* raw shader buffers bitmask */
277 
278    /* HW atomic buffers */
279    unsigned num_atomic_bufs;
280    unsigned atomic_bufs_mask;
281    unsigned max_atomic_counter_index;
282    VGPU10_OPCODE_TYPE cur_atomic_opcode;    /* current atomic opcode */
283 
284    bool uav_declared;  /* True if uav is declared */
285 
286    /* Index Range declaration */
287    struct {
288       unsigned start_index;
289       unsigned count;
290       bool required;
291       unsigned operandType;
292       unsigned size;
293       unsigned dim;
294    } index_range;
295 
296    /* Address regs (really implemented with temps) */
297    unsigned num_address_regs;
298    unsigned address_reg_index[MAX_VGPU10_ADDR_REGS];
299 
300    /* Output register usage masks */
301    uint8_t output_usage_mask[PIPE_MAX_SHADER_OUTPUTS];
302 
303    /* To map TGSI system value index to VGPU shader input indexes */
304    uint8_t system_value_indexes[MAX_SYSTEM_VALUES];
305 
306    struct {
307       /* vertex position scale/translation */
308       unsigned out_index;  /**< the real position output reg */
309       unsigned tmp_index;  /**< the fake/temp position output reg */
310       unsigned so_index;   /**< the non-adjusted position output reg */
311       unsigned prescale_cbuf_index;  /* index to the const buf for prescale */
312       unsigned prescale_scale_index, prescale_trans_index;
313       unsigned num_prescale;      /* number of prescale factor in const buf */
314       unsigned viewport_index;
315       unsigned need_prescale:1;
316       unsigned have_prescale:1;
317    } vposition;
318 
319    /* Shader limits */
320    unsigned max_vs_inputs;
321    unsigned max_vs_outputs;
322    unsigned max_gs_inputs;
323 
324    /* For vertex shaders only */
325    struct {
326       /* viewport constant */
327       unsigned viewport_index;
328 
329       unsigned vertex_id_bias_index;
330       unsigned vertex_id_sys_index;
331       unsigned vertex_id_tmp_index;
332 
333       /* temp index of adjusted vertex attributes */
334       unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS];
335    } vs;
336 
337    /* For fragment shaders only */
338    struct {
339       unsigned color_out_index[PIPE_MAX_COLOR_BUFS];  /**< the real color output regs */
340       unsigned num_color_outputs;
341       unsigned color_tmp_index;  /**< fake/temp color output reg */
342       unsigned alpha_ref_index;  /**< immediate constant for alpha ref */
343 
344       /* front-face */
345       unsigned face_input_index; /**< real fragment shader face reg (bool) */
346       unsigned face_tmp_index;   /**< temp face reg converted to -1 / +1 */
347 
348       unsigned pstipple_sampler_unit;
349       unsigned pstipple_sampler_state_index;
350 
351       unsigned fragcoord_input_index;  /**< real fragment position input reg */
352       unsigned fragcoord_tmp_index;    /**< 1/w modified position temp reg */
353 
354       unsigned sample_id_sys_index;  /**< TGSI index of sample id sys value */
355 
356       unsigned sample_pos_sys_index; /**< TGSI index of sample pos sys value */
357       unsigned sample_pos_tmp_index; /**< which temp reg has the sample pos */
358 
359       /** TGSI index of sample mask input sys value */
360       unsigned sample_mask_in_sys_index;
361 
362       /* layer */
363       unsigned layer_input_index;    /**< TGSI index of layer */
364       unsigned layer_imm_index;      /**< immediate for default layer 0 */
365 
366       bool forceEarlyDepthStencil;  /**< true if Early Depth stencil test is enabled */
367    } fs;
368 
369    /* For geometry shaders only */
370    struct {
371       VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */
372       VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */
373       unsigned input_size;       /**< size of input arrays */
374       unsigned prim_id_index;    /**< primitive id register index */
375       unsigned max_out_vertices; /**< maximum number of output vertices */
376       unsigned invocations;
377       unsigned invocation_id_sys_index;
378 
379       unsigned viewport_index_out_index;
380       unsigned viewport_index_tmp_index;
381    } gs;
382 
383    /* For tessellation control shaders only */
384    struct {
385       unsigned vertices_per_patch_index;     /**< vertices_per_patch system value index */
386       unsigned imm_index;                    /**< immediate for tcs */
387       unsigned invocation_id_sys_index;      /**< invocation id */
388       unsigned invocation_id_tmp_index;
389       unsigned instruction_token_pos;        /* token pos for the first instruction */
390       unsigned control_point_input_index;    /* control point input register index */
391       unsigned control_point_addr_index;     /* control point input address register */
392       unsigned control_point_out_index;      /* control point output register index */
393       unsigned control_point_tmp_index;      /* control point temporary register */
394       unsigned control_point_out_count;      /* control point output count */
395       bool  control_point_phase;          /* true if in control point phase */
396       bool  fork_phase_add_signature;     /* true if needs to add signature in fork phase */
397       unsigned patch_generic_out_count;      /* per-patch generic output count */
398       unsigned patch_generic_out_index;      /* per-patch generic output register index*/
399       unsigned patch_generic_tmp_index;      /* per-patch generic temporary register index*/
400       unsigned prim_id_index;                /* primitive id */
401       struct {
402          unsigned out_index;      /* real tessinner output register */
403          unsigned temp_index;     /* tessinner temp register */
404          unsigned tgsi_index;     /* tgsi tessinner output register */
405       } inner;
406       struct {
407          unsigned out_index;      /* real tessouter output register */
408          unsigned temp_index;     /* tessouter temp register */
409          unsigned tgsi_index;     /* tgsi tessouter output register */
410       } outer;
411    } tcs;
412 
413    /* For tessellation evaluation shaders only */
414    struct {
415       enum mesa_prim prim_mode;
416       enum pipe_tess_spacing spacing;
417       bool vertices_order_cw;
418       bool point_mode;
419       unsigned tesscoord_sys_index;
420       unsigned swizzle_max;
421       unsigned prim_id_index;                /* primitive id */
422       struct {
423          unsigned in_index;       /* real tessinner input register */
424          unsigned temp_index;     /* tessinner temp register */
425          unsigned tgsi_index;     /* tgsi tessinner input register */
426       } inner;
427       struct {
428          unsigned in_index;       /* real tessouter input register */
429          unsigned temp_index;     /* tessouter temp register */
430          unsigned tgsi_index;     /* tgsi tessouter input register */
431       } outer;
432    } tes;
433 
434    struct {
435       unsigned block_width;       /* thread group size in x dimension */
436       unsigned block_height;      /* thread group size in y dimension */
437       unsigned block_depth;       /* thread group size in z dimension */
438       unsigned thread_id_index;   /* thread id tgsi index */
439       unsigned block_id_index;    /* block id tgsi index */
440       bool shared_memory_declared;    /* set if shared memory is declared */
441       struct {
442          unsigned tgsi_index;   /* grid size tgsi index */
443          unsigned imm_index;    /* grid size imm index */
444       } grid_size;
445    } cs;
446 
447    /* For vertex or geometry shaders */
448    enum clipping_mode clip_mode;
449    unsigned clip_dist_out_index; /**< clip distance output register index */
450    unsigned clip_dist_tmp_index; /**< clip distance temporary register */
451    unsigned clip_dist_so_index;  /**< clip distance shadow copy */
452 
453    /** Index of temporary holding the clipvertex coordinate */
454    unsigned clip_vertex_out_index; /**< clip vertex output register index */
455    unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */
456 
457    /* user clip plane constant slot indexes */
458    unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES];
459 
460    unsigned num_output_writes;
461    bool constant_color_output;
462 
463    bool uses_flat_interp;
464 
465    unsigned reserved_token;        /* index to the reserved token */
466    bool uses_precise_qualifier;
467 
468    /* For all shaders: const reg index for RECT coord scaling */
469    unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS];
470 
471    /* For all shaders: const reg index for texture buffer size */
472    unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS];
473 
474    /** Which texture units are doing shadow comparison in the shader code */
475    unsigned shadow_compare_units;
476 
477    /* VS/TCS/TES/GS/FS Linkage info */
478    struct shader_linkage linkage;
479    struct tgsi_shader_info *prevShaderInfo;
480 
481    /* Shader signature */
482    struct svga_shader_signature signature;
483 
484    bool register_overflow;  /**< Set if we exceed a VGPU10 register limit */
485 
486    /* For util_debug_message */
487    struct util_debug_callback svga_debug_callback;
488 
489    /* current loop depth in shader */
490    unsigned current_loop_depth;
491 };
492 
493 
494 static void emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit);
495 static void emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit);
496 static bool emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit);
497 static bool emit_constant_declaration(struct svga_shader_emitter_v10 *emit);
498 static bool emit_sampler_declarations(struct svga_shader_emitter_v10 *emit);
499 static bool emit_resource_declarations(struct svga_shader_emitter_v10 *emit);
500 static bool emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit);
501 static bool emit_index_range_declaration(struct svga_shader_emitter_v10 *emit);
502 static void emit_image_declarations(struct svga_shader_emitter_v10 *emit);
503 static void emit_shader_buf_declarations(struct svga_shader_emitter_v10 *emit);
504 static void emit_atomic_buf_declarations(struct svga_shader_emitter_v10 *emit);
505 static void emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit);
506 
507 static bool
508 emit_post_helpers(struct svga_shader_emitter_v10 *emit);
509 
510 static bool
511 emit_vertex(struct svga_shader_emitter_v10 *emit,
512             const struct tgsi_full_instruction *inst);
513 
514 static bool
515 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
516                         unsigned inst_number,
517                         const struct tgsi_full_instruction *inst);
518 
519 static void
520 emit_input_declaration(struct svga_shader_emitter_v10 *emit,
521                        VGPU10_OPCODE_TYPE opcodeType,
522                        VGPU10_OPERAND_TYPE operandType,
523                        VGPU10_OPERAND_INDEX_DIMENSION dim,
524                        unsigned index, unsigned size,
525                        VGPU10_SYSTEM_NAME name,
526                        VGPU10_OPERAND_NUM_COMPONENTS numComp,
527                        VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,
528                        unsigned usageMask,
529                        VGPU10_INTERPOLATION_MODE interpMode,
530                        bool addSignature,
531                        SVGA3dDXSignatureSemanticName sgnName);
532 
533 static bool
534 emit_rawbuf_instruction(struct svga_shader_emitter_v10 *emit,
535                         unsigned inst_number,
536                         const struct tgsi_full_instruction *inst);
537 
538 static void
539 create_temp_array(struct svga_shader_emitter_v10 *emit,
540                   unsigned arrayID, unsigned first, unsigned count,
541                   unsigned startIndex);
542 
543 static char err_buf[128];
544 
545 static bool
expand(struct svga_shader_emitter_v10 * emit)546 expand(struct svga_shader_emitter_v10 *emit)
547 {
548    char *new_buf;
549    unsigned newsize = emit->size * 2;
550 
551    if (emit->buf != err_buf)
552       new_buf = REALLOC(emit->buf, emit->size, newsize);
553    else
554       new_buf = NULL;
555 
556    if (!new_buf) {
557       emit->ptr = err_buf;
558       emit->buf = err_buf;
559       emit->size = sizeof(err_buf);
560       return false;
561    }
562 
563    emit->size = newsize;
564    emit->ptr = new_buf + (emit->ptr - emit->buf);
565    emit->buf = new_buf;
566    return true;
567 }
568 
569 /**
570  * Create and initialize a new svga_shader_emitter_v10 object.
571  */
572 static struct svga_shader_emitter_v10 *
alloc_emitter(void)573 alloc_emitter(void)
574 {
575    struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit));
576 
577    if (!emit)
578       return NULL;
579 
580    /* to initialize the output buffer */
581    emit->size = 512;
582    if (!expand(emit)) {
583       FREE(emit);
584       return NULL;
585    }
586    return emit;
587 }
588 
589 /**
590  * Free an svga_shader_emitter_v10 object.
591  */
592 static void
free_emitter(struct svga_shader_emitter_v10 * emit)593 free_emitter(struct svga_shader_emitter_v10 *emit)
594 {
595    assert(emit);
596    FREE(emit->buf);    /* will be NULL if translation succeeded */
597    FREE(emit);
598 }
599 
600 static inline bool
reserve(struct svga_shader_emitter_v10 * emit,unsigned nr_dwords)601 reserve(struct svga_shader_emitter_v10 *emit,
602         unsigned nr_dwords)
603 {
604    while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) {
605       if (!expand(emit))
606          return false;
607    }
608 
609    return true;
610 }
611 
612 static bool
emit_dword(struct svga_shader_emitter_v10 * emit,uint32 dword)613 emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword)
614 {
615    if (!reserve(emit, 1))
616       return false;
617 
618    *(uint32 *)emit->ptr = dword;
619    emit->ptr += sizeof dword;
620    return true;
621 }
622 
623 static bool
emit_dwords(struct svga_shader_emitter_v10 * emit,const uint32 * dwords,unsigned nr)624 emit_dwords(struct svga_shader_emitter_v10 *emit,
625             const uint32 *dwords,
626             unsigned nr)
627 {
628    if (!reserve(emit, nr))
629       return false;
630 
631    memcpy(emit->ptr, dwords, nr * sizeof *dwords);
632    emit->ptr += nr * sizeof *dwords;
633    return true;
634 }
635 
636 /** Return the number of tokens in the emitter's buffer */
637 static unsigned
emit_get_num_tokens(const struct svga_shader_emitter_v10 * emit)638 emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit)
639 {
640    return (emit->ptr - emit->buf) / sizeof(unsigned);
641 }
642 
643 
644 /**
645  * Check for register overflow.  If we overflow we'll set an
646  * error flag.  This function can be called for register declarations
647  * or use as src/dst instruction operands.
648  * \param type  register type.  One of VGPU10_OPERAND_TYPE_x
649                 or VGPU10_OPCODE_DCL_x
650  * \param index  the register index
651  */
652 static void
check_register_index(struct svga_shader_emitter_v10 * emit,unsigned operandType,unsigned index)653 check_register_index(struct svga_shader_emitter_v10 *emit,
654                      unsigned operandType, unsigned index)
655 {
656    bool overflow_before = emit->register_overflow;
657 
658    switch (operandType) {
659    case VGPU10_OPERAND_TYPE_TEMP:
660    case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP:
661    case VGPU10_OPCODE_DCL_TEMPS:
662       if (index >= VGPU10_MAX_TEMPS) {
663          emit->register_overflow = true;
664       }
665       break;
666    case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER:
667    case VGPU10_OPCODE_DCL_CONSTANT_BUFFER:
668       if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
669          emit->register_overflow = true;
670       }
671       break;
672    case VGPU10_OPERAND_TYPE_INPUT:
673    case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID:
674    case VGPU10_OPCODE_DCL_INPUT:
675    case VGPU10_OPCODE_DCL_INPUT_SGV:
676    case VGPU10_OPCODE_DCL_INPUT_SIV:
677    case VGPU10_OPCODE_DCL_INPUT_PS:
678    case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
679    case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
680       if ((emit->unit == PIPE_SHADER_VERTEX &&
681            index >= emit->max_vs_inputs) ||
682           (emit->unit == PIPE_SHADER_GEOMETRY &&
683            index >= emit->max_gs_inputs) ||
684           (emit->unit == PIPE_SHADER_FRAGMENT &&
685            index >= VGPU10_MAX_FS_INPUTS) ||
686           (emit->unit == PIPE_SHADER_TESS_CTRL &&
687            index >= VGPU11_MAX_HS_INPUT_CONTROL_POINTS) ||
688           (emit->unit == PIPE_SHADER_TESS_EVAL &&
689            index >= VGPU11_MAX_DS_INPUT_CONTROL_POINTS)) {
690          emit->register_overflow = true;
691       }
692       break;
693    case VGPU10_OPERAND_TYPE_OUTPUT:
694    case VGPU10_OPCODE_DCL_OUTPUT:
695    case VGPU10_OPCODE_DCL_OUTPUT_SGV:
696    case VGPU10_OPCODE_DCL_OUTPUT_SIV:
697       /* Note: we are skipping two output indices in tcs for
698        * tessinner/outer levels. Implementation will not exceed
699        * number of output count but it allows index to go beyond
700        * VGPU11_MAX_HS_OUTPUTS.
701        * Index will never be >= index >= VGPU11_MAX_HS_OUTPUTS + 2
702        */
703       if ((emit->unit == PIPE_SHADER_VERTEX &&
704            index >= emit->max_vs_outputs) ||
705           (emit->unit == PIPE_SHADER_GEOMETRY &&
706            index >= VGPU10_MAX_GS_OUTPUTS) ||
707           (emit->unit == PIPE_SHADER_FRAGMENT &&
708            index >= VGPU10_MAX_FS_OUTPUTS) ||
709           (emit->unit == PIPE_SHADER_TESS_CTRL &&
710            index >= VGPU11_MAX_HS_OUTPUTS + 2) ||
711           (emit->unit == PIPE_SHADER_TESS_EVAL &&
712            index >= VGPU11_MAX_DS_OUTPUTS)) {
713          emit->register_overflow = true;
714       }
715       break;
716    case VGPU10_OPERAND_TYPE_SAMPLER:
717    case VGPU10_OPCODE_DCL_SAMPLER:
718       if (index >= VGPU10_MAX_SAMPLERS) {
719          emit->register_overflow = true;
720       }
721       break;
722    case VGPU10_OPERAND_TYPE_RESOURCE:
723    case VGPU10_OPCODE_DCL_RESOURCE:
724       if (index >= VGPU10_MAX_RESOURCES) {
725          emit->register_overflow = true;
726       }
727       break;
728    case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
729       if (index >= MAX_IMMEDIATE_COUNT) {
730          emit->register_overflow = true;
731       }
732       break;
733    case VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
734    case VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID:
735    case VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID:
736    case VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT:
737    case VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT:
738    case VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT:
739    case VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID:
740    case VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP:
741       /* nothing */
742       break;
743    default:
744       assert(0);
745       ; /* nothing */
746    }
747 
748    if (emit->register_overflow && !overflow_before) {
749       debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n",
750                    operandType, index);
751    }
752 }
753 
754 
755 /**
756  * Examine misc state to determine the clipping mode.
757  */
758 static void
determine_clipping_mode(struct svga_shader_emitter_v10 * emit)759 determine_clipping_mode(struct svga_shader_emitter_v10 *emit)
760 {
761    /* num_written_clipdistance in the shader info for tessellation
762     * control shader is always 0 because the TGSI_PROPERTY_NUM_CLIPDIST_ENABLED
763     * is not defined for this shader. So we go through all the output declarations
764     * to set the num_written_clipdistance. This is just to determine the
765     * clipping mode.
766     */
767    if (emit->unit == PIPE_SHADER_TESS_CTRL) {
768       unsigned i;
769       for (i = 0; i < emit->info.num_outputs; i++) {
770          if (emit->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) {
771             emit->info.num_written_clipdistance =
772                4 * (emit->info.output_semantic_index[i] + 1);
773          }
774       }
775    }
776 
777    if (emit->info.num_written_clipdistance > 0) {
778       emit->clip_mode = CLIP_DISTANCE;
779    }
780    else if (emit->info.writes_clipvertex) {
781       emit->clip_mode = CLIP_VERTEX;
782    }
783    else if (emit->key.clip_plane_enable && emit->key.last_vertex_stage) {
784       /*
785        * Only the last shader in the vertex processing stage needs to
786        * handle the legacy clip mode.
787        */
788       emit->clip_mode = CLIP_LEGACY;
789    }
790    else {
791       emit->clip_mode = CLIP_NONE;
792    }
793 }
794 
795 
796 /**
797  * For clip distance register declarations and clip distance register
798  * writes we need to mask the declaration usage or instruction writemask
799  * (respectively) against the set of the really-enabled clipping planes.
800  *
801  * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables
802  * has a VS that writes to all 8 clip distance registers, but the plane enable
803  * flags are a subset of that.
804  *
805  * This function is used to apply the plane enable flags to the register
806  * declaration or instruction writemask.
807  *
808  * \param writemask  the declaration usage mask or instruction writemask
809  * \param clip_reg_index  which clip plane register is being declared/written.
810  *                        The legal values are 0 and 1 (two clip planes per
811  *                        register, for a total of 8 clip planes)
812  */
813 static unsigned
apply_clip_plane_mask(struct svga_shader_emitter_v10 * emit,unsigned writemask,unsigned clip_reg_index)814 apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit,
815                       unsigned writemask, unsigned clip_reg_index)
816 {
817    unsigned shift;
818 
819    assert(clip_reg_index < 2);
820 
821    /* four clip planes per clip register: */
822    shift = clip_reg_index * 4;
823    writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf);
824 
825    return writemask;
826 }
827 
828 
829 /**
830  * Translate gallium shader type into VGPU10 type.
831  */
832 static VGPU10_PROGRAM_TYPE
translate_shader_type(unsigned type)833 translate_shader_type(unsigned type)
834 {
835    switch (type) {
836    case PIPE_SHADER_VERTEX:
837       return VGPU10_VERTEX_SHADER;
838    case PIPE_SHADER_GEOMETRY:
839       return VGPU10_GEOMETRY_SHADER;
840    case PIPE_SHADER_FRAGMENT:
841       return VGPU10_PIXEL_SHADER;
842    case PIPE_SHADER_TESS_CTRL:
843       return VGPU10_HULL_SHADER;
844    case PIPE_SHADER_TESS_EVAL:
845       return VGPU10_DOMAIN_SHADER;
846    case PIPE_SHADER_COMPUTE:
847       return VGPU10_COMPUTE_SHADER;
848    default:
849       assert(!"Unexpected shader type");
850       return VGPU10_VERTEX_SHADER;
851    }
852 }
853 
854 
855 /**
856  * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x
857  * Note: we only need to translate the opcodes for "simple" instructions,
858  * as seen below.  All other opcodes are handled/translated specially.
859  */
860 static VGPU10_OPCODE_TYPE
translate_opcode(enum tgsi_opcode opcode)861 translate_opcode(enum tgsi_opcode opcode)
862 {
863    switch (opcode) {
864    case TGSI_OPCODE_MOV:
865       return VGPU10_OPCODE_MOV;
866    case TGSI_OPCODE_MUL:
867       return VGPU10_OPCODE_MUL;
868    case TGSI_OPCODE_ADD:
869       return VGPU10_OPCODE_ADD;
870    case TGSI_OPCODE_DP3:
871       return VGPU10_OPCODE_DP3;
872    case TGSI_OPCODE_DP4:
873       return VGPU10_OPCODE_DP4;
874    case TGSI_OPCODE_MIN:
875       return VGPU10_OPCODE_MIN;
876    case TGSI_OPCODE_MAX:
877       return VGPU10_OPCODE_MAX;
878    case TGSI_OPCODE_MAD:
879       return VGPU10_OPCODE_MAD;
880    case TGSI_OPCODE_SQRT:
881       return VGPU10_OPCODE_SQRT;
882    case TGSI_OPCODE_FRC:
883       return VGPU10_OPCODE_FRC;
884    case TGSI_OPCODE_FLR:
885       return VGPU10_OPCODE_ROUND_NI;
886    case TGSI_OPCODE_FSEQ:
887       return VGPU10_OPCODE_EQ;
888    case TGSI_OPCODE_FSGE:
889       return VGPU10_OPCODE_GE;
890    case TGSI_OPCODE_FSNE:
891       return VGPU10_OPCODE_NE;
892    case TGSI_OPCODE_DDX:
893       return VGPU10_OPCODE_DERIV_RTX;
894    case TGSI_OPCODE_DDY:
895       return VGPU10_OPCODE_DERIV_RTY;
896    case TGSI_OPCODE_RET:
897       return VGPU10_OPCODE_RET;
898    case TGSI_OPCODE_DIV:
899       return VGPU10_OPCODE_DIV;
900    case TGSI_OPCODE_IDIV:
901       return VGPU10_OPCODE_VMWARE;
902    case TGSI_OPCODE_DP2:
903       return VGPU10_OPCODE_DP2;
904    case TGSI_OPCODE_BRK:
905       return VGPU10_OPCODE_BREAK;
906    case TGSI_OPCODE_IF:
907       return VGPU10_OPCODE_IF;
908    case TGSI_OPCODE_ELSE:
909       return VGPU10_OPCODE_ELSE;
910    case TGSI_OPCODE_ENDIF:
911       return VGPU10_OPCODE_ENDIF;
912    case TGSI_OPCODE_CEIL:
913       return VGPU10_OPCODE_ROUND_PI;
914    case TGSI_OPCODE_I2F:
915       return VGPU10_OPCODE_ITOF;
916    case TGSI_OPCODE_NOT:
917       return VGPU10_OPCODE_NOT;
918    case TGSI_OPCODE_TRUNC:
919       return VGPU10_OPCODE_ROUND_Z;
920    case TGSI_OPCODE_SHL:
921       return VGPU10_OPCODE_ISHL;
922    case TGSI_OPCODE_AND:
923       return VGPU10_OPCODE_AND;
924    case TGSI_OPCODE_OR:
925       return VGPU10_OPCODE_OR;
926    case TGSI_OPCODE_XOR:
927       return VGPU10_OPCODE_XOR;
928    case TGSI_OPCODE_CONT:
929       return VGPU10_OPCODE_CONTINUE;
930    case TGSI_OPCODE_EMIT:
931       return VGPU10_OPCODE_EMIT;
932    case TGSI_OPCODE_ENDPRIM:
933       return VGPU10_OPCODE_CUT;
934    case TGSI_OPCODE_BGNLOOP:
935       return VGPU10_OPCODE_LOOP;
936    case TGSI_OPCODE_ENDLOOP:
937       return VGPU10_OPCODE_ENDLOOP;
938    case TGSI_OPCODE_ENDSUB:
939       return VGPU10_OPCODE_RET;
940    case TGSI_OPCODE_NOP:
941       return VGPU10_OPCODE_NOP;
942    case TGSI_OPCODE_END:
943       return VGPU10_OPCODE_RET;
944    case TGSI_OPCODE_F2I:
945       return VGPU10_OPCODE_FTOI;
946    case TGSI_OPCODE_IMAX:
947       return VGPU10_OPCODE_IMAX;
948    case TGSI_OPCODE_IMIN:
949       return VGPU10_OPCODE_IMIN;
950    case TGSI_OPCODE_UDIV:
951    case TGSI_OPCODE_UMOD:
952    case TGSI_OPCODE_MOD:
953       return VGPU10_OPCODE_UDIV;
954    case TGSI_OPCODE_IMUL_HI:
955       return VGPU10_OPCODE_IMUL;
956    case TGSI_OPCODE_INEG:
957       return VGPU10_OPCODE_INEG;
958    case TGSI_OPCODE_ISHR:
959       return VGPU10_OPCODE_ISHR;
960    case TGSI_OPCODE_ISGE:
961       return VGPU10_OPCODE_IGE;
962    case TGSI_OPCODE_ISLT:
963       return VGPU10_OPCODE_ILT;
964    case TGSI_OPCODE_F2U:
965       return VGPU10_OPCODE_FTOU;
966    case TGSI_OPCODE_UADD:
967       return VGPU10_OPCODE_IADD;
968    case TGSI_OPCODE_U2F:
969       return VGPU10_OPCODE_UTOF;
970    case TGSI_OPCODE_UCMP:
971       return VGPU10_OPCODE_MOVC;
972    case TGSI_OPCODE_UMAD:
973       return VGPU10_OPCODE_UMAD;
974    case TGSI_OPCODE_UMAX:
975       return VGPU10_OPCODE_UMAX;
976    case TGSI_OPCODE_UMIN:
977       return VGPU10_OPCODE_UMIN;
978    case TGSI_OPCODE_UMUL:
979    case TGSI_OPCODE_UMUL_HI:
980       return VGPU10_OPCODE_UMUL;
981    case TGSI_OPCODE_USEQ:
982       return VGPU10_OPCODE_IEQ;
983    case TGSI_OPCODE_USGE:
984       return VGPU10_OPCODE_UGE;
985    case TGSI_OPCODE_USHR:
986       return VGPU10_OPCODE_USHR;
987    case TGSI_OPCODE_USLT:
988       return VGPU10_OPCODE_ULT;
989    case TGSI_OPCODE_USNE:
990       return VGPU10_OPCODE_INE;
991    case TGSI_OPCODE_SWITCH:
992       return VGPU10_OPCODE_SWITCH;
993    case TGSI_OPCODE_CASE:
994       return VGPU10_OPCODE_CASE;
995    case TGSI_OPCODE_DEFAULT:
996       return VGPU10_OPCODE_DEFAULT;
997    case TGSI_OPCODE_ENDSWITCH:
998       return VGPU10_OPCODE_ENDSWITCH;
999    case TGSI_OPCODE_FSLT:
1000       return VGPU10_OPCODE_LT;
1001    case TGSI_OPCODE_ROUND:
1002       return VGPU10_OPCODE_ROUND_NE;
1003    /* Begin SM5 opcodes */
1004    case TGSI_OPCODE_F2D:
1005       return VGPU10_OPCODE_FTOD;
1006    case TGSI_OPCODE_D2F:
1007       return VGPU10_OPCODE_DTOF;
1008    case TGSI_OPCODE_DMUL:
1009       return VGPU10_OPCODE_DMUL;
1010    case TGSI_OPCODE_DADD:
1011       return VGPU10_OPCODE_DADD;
1012    case TGSI_OPCODE_DMAX:
1013       return VGPU10_OPCODE_DMAX;
1014    case TGSI_OPCODE_DMIN:
1015       return VGPU10_OPCODE_DMIN;
1016    case TGSI_OPCODE_DSEQ:
1017       return VGPU10_OPCODE_DEQ;
1018    case TGSI_OPCODE_DSGE:
1019       return VGPU10_OPCODE_DGE;
1020    case TGSI_OPCODE_DSLT:
1021       return VGPU10_OPCODE_DLT;
1022    case TGSI_OPCODE_DSNE:
1023       return VGPU10_OPCODE_DNE;
1024    case TGSI_OPCODE_IBFE:
1025       return VGPU10_OPCODE_IBFE;
1026    case TGSI_OPCODE_UBFE:
1027       return VGPU10_OPCODE_UBFE;
1028    case TGSI_OPCODE_BFI:
1029       return VGPU10_OPCODE_BFI;
1030    case TGSI_OPCODE_BREV:
1031       return VGPU10_OPCODE_BFREV;
1032    case TGSI_OPCODE_POPC:
1033       return VGPU10_OPCODE_COUNTBITS;
1034    case TGSI_OPCODE_LSB:
1035       return VGPU10_OPCODE_FIRSTBIT_LO;
1036    case TGSI_OPCODE_IMSB:
1037       return VGPU10_OPCODE_FIRSTBIT_SHI;
1038    case TGSI_OPCODE_UMSB:
1039       return VGPU10_OPCODE_FIRSTBIT_HI;
1040    case TGSI_OPCODE_INTERP_CENTROID:
1041       return VGPU10_OPCODE_EVAL_CENTROID;
1042    case TGSI_OPCODE_INTERP_SAMPLE:
1043       return VGPU10_OPCODE_EVAL_SAMPLE_INDEX;
1044    case TGSI_OPCODE_BARRIER:
1045       return VGPU10_OPCODE_SYNC;
1046    case TGSI_OPCODE_DFMA:
1047       return VGPU10_OPCODE_DFMA;
1048    case TGSI_OPCODE_FMA:
1049       return VGPU10_OPCODE_MAD;
1050 
1051    /* DX11.1 Opcodes */
1052    case TGSI_OPCODE_DDIV:
1053       return VGPU10_OPCODE_DDIV;
1054    case TGSI_OPCODE_DRCP:
1055       return VGPU10_OPCODE_DRCP;
1056    case TGSI_OPCODE_D2I:
1057       return VGPU10_OPCODE_DTOI;
1058    case TGSI_OPCODE_D2U:
1059       return VGPU10_OPCODE_DTOU;
1060    case TGSI_OPCODE_I2D:
1061       return VGPU10_OPCODE_ITOD;
1062    case TGSI_OPCODE_U2D:
1063       return VGPU10_OPCODE_UTOD;
1064 
1065    case TGSI_OPCODE_SAMPLE_POS:
1066       /* Note: we never actually get this opcode because there's no GLSL
1067        * function to query multisample resource sample positions.  There's
1068        * only the TGSI_SEMANTIC_SAMPLEPOS system value which contains the
1069        * position of the current sample in the render target.
1070        */
1071       FALLTHROUGH;
1072    case TGSI_OPCODE_SAMPLE_INFO:
1073       /* NOTE: we never actually get this opcode because the GLSL compiler
1074        * implements the gl_NumSamples variable with a simple constant in the
1075        * constant buffer.
1076        */
1077       FALLTHROUGH;
1078    default:
1079       assert(!"Unexpected TGSI opcode in translate_opcode()");
1080       return VGPU10_OPCODE_NOP;
1081    }
1082 }
1083 
1084 
1085 /**
1086  * Translate a TGSI register file type into a VGPU10 operand type.
1087  * \param array  is the TGSI_FILE_TEMPORARY register an array?
1088  */
1089 static VGPU10_OPERAND_TYPE
translate_register_file(enum tgsi_file_type file,bool array)1090 translate_register_file(enum tgsi_file_type file, bool array)
1091 {
1092    switch (file) {
1093    case TGSI_FILE_CONSTANT:
1094       return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
1095    case TGSI_FILE_INPUT:
1096       return VGPU10_OPERAND_TYPE_INPUT;
1097    case TGSI_FILE_OUTPUT:
1098       return VGPU10_OPERAND_TYPE_OUTPUT;
1099    case TGSI_FILE_TEMPORARY:
1100       return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP
1101                    : VGPU10_OPERAND_TYPE_TEMP;
1102    case TGSI_FILE_IMMEDIATE:
1103       /* all immediates are 32-bit values at this time so
1104        * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time.
1105        */
1106       return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER;
1107    case TGSI_FILE_SAMPLER:
1108       return VGPU10_OPERAND_TYPE_SAMPLER;
1109    case TGSI_FILE_SYSTEM_VALUE:
1110       return VGPU10_OPERAND_TYPE_INPUT;
1111 
1112    /* XXX TODO more cases to finish */
1113 
1114    default:
1115       assert(!"Bad tgsi register file!");
1116       return VGPU10_OPERAND_TYPE_NULL;
1117    }
1118 }
1119 
1120 
1121 /**
1122  * Emit a null dst register
1123  */
1124 static void
emit_null_dst_register(struct svga_shader_emitter_v10 * emit)1125 emit_null_dst_register(struct svga_shader_emitter_v10 *emit)
1126 {
1127    VGPU10OperandToken0 operand;
1128 
1129    operand.value = 0;
1130    operand.operandType = VGPU10_OPERAND_TYPE_NULL;
1131    operand.numComponents = VGPU10_OPERAND_0_COMPONENT;
1132 
1133    emit_dword(emit, operand.value);
1134 }
1135 
1136 
1137 /**
1138  * If the given register is a temporary, return the array ID.
1139  * Else return zero.
1140  */
1141 static unsigned
get_temp_array_id(const struct svga_shader_emitter_v10 * emit,enum tgsi_file_type file,unsigned index)1142 get_temp_array_id(const struct svga_shader_emitter_v10 *emit,
1143                   enum tgsi_file_type file, unsigned index)
1144 {
1145    if (file == TGSI_FILE_TEMPORARY) {
1146       return emit->temp_map[index].arrayId;
1147    }
1148    else {
1149       return 0;
1150    }
1151 }
1152 
1153 
1154 /**
1155  * If the given register is a temporary, convert the index from a TGSI
1156  * TEMPORARY index to a VGPU10 temp index.
1157  */
1158 static unsigned
remap_temp_index(const struct svga_shader_emitter_v10 * emit,enum tgsi_file_type file,unsigned index)1159 remap_temp_index(const struct svga_shader_emitter_v10 *emit,
1160                  enum tgsi_file_type file, unsigned index)
1161 {
1162    if (file == TGSI_FILE_TEMPORARY) {
1163       return emit->temp_map[index].index;
1164    }
1165    else {
1166       return index;
1167    }
1168 }
1169 
1170 
1171 /**
1172  * Setup the operand0 fields related to indexing (1D, 2D, relative, etc).
1173  * Note: the operandType field must already be initialized.
1174  * \param file  the register file being accessed
1175  * \param indirect  using indirect addressing of the register file?
1176  * \param index2D  if true, 2-D indexing is being used (const or temp registers)
1177  * \param indirect2D  if true, 2-D indirect indexing being used (for const buf)
1178  */
1179 static VGPU10OperandToken0
setup_operand0_indexing(struct svga_shader_emitter_v10 * emit,VGPU10OperandToken0 operand0,enum tgsi_file_type file,bool indirect,bool index2D,bool indirect2D)1180 setup_operand0_indexing(struct svga_shader_emitter_v10 *emit,
1181                         VGPU10OperandToken0 operand0,
1182                         enum tgsi_file_type file,
1183                         bool indirect,
1184                         bool index2D, bool indirect2D)
1185 {
1186    VGPU10_OPERAND_INDEX_REPRESENTATION index0Rep, index1Rep;
1187    VGPU10_OPERAND_INDEX_DIMENSION indexDim;
1188 
1189    /*
1190     * Compute index dimensions
1191     */
1192    if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 ||
1193        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID ||
1194        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID ||
1195        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID ||
1196        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP ||
1197        operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) {
1198       /* there's no swizzle for in-line immediates */
1199       indexDim = VGPU10_OPERAND_INDEX_0D;
1200       assert(operand0.selectionMode == 0);
1201    }
1202    else if (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT) {
1203       indexDim = VGPU10_OPERAND_INDEX_0D;
1204    }
1205    else {
1206       indexDim = index2D ? VGPU10_OPERAND_INDEX_2D : VGPU10_OPERAND_INDEX_1D;
1207    }
1208 
1209    /*
1210     * Compute index representation(s) (immediate vs relative).
1211     */
1212    if (indexDim == VGPU10_OPERAND_INDEX_2D) {
1213       index0Rep = indirect2D ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1214          : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1215 
1216       index1Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1217          : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1218    }
1219    else if (indexDim == VGPU10_OPERAND_INDEX_1D) {
1220       index0Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1221          : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1222 
1223       index1Rep = 0;
1224    }
1225    else {
1226       index0Rep = 0;
1227       index1Rep = 0;
1228    }
1229 
1230    operand0.indexDimension = indexDim;
1231    operand0.index0Representation = index0Rep;
1232    operand0.index1Representation = index1Rep;
1233 
1234    return operand0;
1235 }
1236 
1237 
1238 /**
1239  * Emit the operand for expressing an address register for indirect indexing.
1240  * Note that the address register is really just a temp register.
1241  * \param addr_reg_index  which address register to use
1242  */
1243 static void
emit_indirect_register(struct svga_shader_emitter_v10 * emit,unsigned addr_reg_index)1244 emit_indirect_register(struct svga_shader_emitter_v10 *emit,
1245                        unsigned addr_reg_index)
1246 {
1247    unsigned tmp_reg_index;
1248    VGPU10OperandToken0 operand0;
1249 
1250    assert(addr_reg_index < MAX_VGPU10_ADDR_REGS);
1251 
1252    tmp_reg_index = emit->address_reg_index[addr_reg_index];
1253 
1254    /* operand0 is a simple temporary register, selecting one component */
1255    operand0.value = 0;
1256    operand0.operandType = VGPU10_OPERAND_TYPE_TEMP;
1257    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1258    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1259    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
1260    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1261    operand0.swizzleX = 0;
1262    operand0.swizzleY = 1;
1263    operand0.swizzleZ = 2;
1264    operand0.swizzleW = 3;
1265 
1266    emit_dword(emit, operand0.value);
1267    emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index));
1268 }
1269 
1270 
1271 /**
1272  * Translate the dst register of a TGSI instruction and emit VGPU10 tokens.
1273  * \param emit  the emitter context
1274  * \param reg  the TGSI dst register to translate
1275  */
1276 static void
emit_dst_register(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * reg)1277 emit_dst_register(struct svga_shader_emitter_v10 *emit,
1278                   const struct tgsi_full_dst_register *reg)
1279 {
1280    enum tgsi_file_type file = reg->Register.File;
1281    unsigned index = reg->Register.Index;
1282    const enum tgsi_semantic sem_name = emit->info.output_semantic_name[index];
1283    const unsigned sem_index = emit->info.output_semantic_index[index];
1284    unsigned writemask = reg->Register.WriteMask;
1285    const bool indirect = reg->Register.Indirect;
1286    unsigned tempArrayId = get_temp_array_id(emit, file, index);
1287    bool index2d = reg->Register.Dimension || tempArrayId > 0;
1288    VGPU10OperandToken0 operand0;
1289 
1290    if (file == TGSI_FILE_TEMPORARY) {
1291       emit->temp_map[index].initialized = true;
1292    }
1293 
1294    if (file == TGSI_FILE_OUTPUT) {
1295       if (emit->unit == PIPE_SHADER_VERTEX ||
1296           emit->unit == PIPE_SHADER_GEOMETRY ||
1297           emit->unit == PIPE_SHADER_TESS_EVAL) {
1298          if (index == emit->vposition.out_index &&
1299              emit->vposition.tmp_index != INVALID_INDEX) {
1300             /* replace OUTPUT[POS] with TEMP[POS].  We need to store the
1301              * vertex position result in a temporary so that we can modify
1302              * it in the post_helper() code.
1303              */
1304             file = TGSI_FILE_TEMPORARY;
1305             index = emit->vposition.tmp_index;
1306          }
1307          else if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
1308                   emit->clip_dist_tmp_index != INVALID_INDEX) {
1309             /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
1310              * We store the clip distance in a temporary first, then
1311              * we'll copy it to the shadow copy and to CLIPDIST with the
1312              * enabled planes mask in emit_clip_distance_instructions().
1313              */
1314             file = TGSI_FILE_TEMPORARY;
1315             index = emit->clip_dist_tmp_index + sem_index;
1316          }
1317          else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
1318                   emit->clip_vertex_tmp_index != INVALID_INDEX) {
1319             /* replace the CLIPVERTEX output register with a temporary */
1320             assert(emit->clip_mode == CLIP_VERTEX);
1321             assert(sem_index == 0);
1322             file = TGSI_FILE_TEMPORARY;
1323             index = emit->clip_vertex_tmp_index;
1324          }
1325          else if (sem_name == TGSI_SEMANTIC_COLOR &&
1326                   emit->key.clamp_vertex_color) {
1327 
1328             /* set the saturate modifier of the instruction
1329              * to clamp the vertex color.
1330              */
1331             VGPU10OpcodeToken0 *token =
1332                (VGPU10OpcodeToken0 *)emit->buf + emit->inst_start_token;
1333             token->saturate = true;
1334          }
1335          else if (sem_name == TGSI_SEMANTIC_VIEWPORT_INDEX &&
1336                   emit->gs.viewport_index_out_index != INVALID_INDEX) {
1337             file = TGSI_FILE_TEMPORARY;
1338             index = emit->gs.viewport_index_tmp_index;
1339          }
1340       }
1341       else if (emit->unit == PIPE_SHADER_FRAGMENT) {
1342          if (sem_name == TGSI_SEMANTIC_POSITION) {
1343             /* Fragment depth output register */
1344             operand0.value = 0;
1345             operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
1346             operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1347             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1348             emit_dword(emit, operand0.value);
1349             return;
1350          }
1351          else if (sem_name == TGSI_SEMANTIC_SAMPLEMASK) {
1352             /* Fragment sample mask output */
1353             operand0.value = 0;
1354             operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
1355             operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1356             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1357             emit_dword(emit, operand0.value);
1358             return;
1359          }
1360          else if (index == emit->fs.color_out_index[0] &&
1361              emit->fs.color_tmp_index != INVALID_INDEX) {
1362             /* replace OUTPUT[COLOR] with TEMP[COLOR].  We need to store the
1363              * fragment color result in a temporary so that we can read it
1364              * it in the post_helper() code.
1365              */
1366             file = TGSI_FILE_TEMPORARY;
1367             index = emit->fs.color_tmp_index;
1368          }
1369          else {
1370             /* Typically, for fragment shaders, the output register index
1371              * matches the color semantic index.  But not when we write to
1372              * the fragment depth register.  In that case, OUT[0] will be
1373              * fragdepth and OUT[1] will be the 0th color output.  We need
1374              * to use the semantic index for color outputs.
1375              */
1376             assert(sem_name == TGSI_SEMANTIC_COLOR);
1377             index = emit->info.output_semantic_index[index];
1378 
1379             emit->num_output_writes++;
1380          }
1381       }
1382       else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
1383          if (index == emit->tcs.inner.tgsi_index) {
1384             /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
1385              * in temporary for now so that will be store into appropriate
1386              * registers in post_helper() in patch constant phase.
1387              */
1388             if (emit->tcs.control_point_phase) {
1389                /* Discard writing into tessfactor in control point phase */
1390                emit->discard_instruction =  true;
1391             }
1392             else {
1393                file = TGSI_FILE_TEMPORARY;
1394                index = emit->tcs.inner.temp_index;
1395             }
1396          }
1397          else if (index == emit->tcs.outer.tgsi_index) {
1398             /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
1399              * in temporary for now so that will be store into appropriate
1400              * registers in post_helper().
1401              */
1402             if (emit->tcs.control_point_phase) {
1403                /* Discard writing into tessfactor in control point phase */
1404                emit->discard_instruction =  true;
1405             }
1406             else {
1407                file = TGSI_FILE_TEMPORARY;
1408                index = emit->tcs.outer.temp_index;
1409             }
1410          }
1411          else if (index >= emit->tcs.patch_generic_out_index &&
1412                   index < (emit->tcs.patch_generic_out_index +
1413                           emit->tcs.patch_generic_out_count)) {
1414             if (emit->tcs.control_point_phase) {
1415                /* Discard writing into generic patch constant outputs in
1416                   control point phase */
1417                emit->discard_instruction =  true;
1418             }
1419             else {
1420                if (emit->reemit_instruction) {
1421                   /* Store results of reemitted instruction in temporary register. */
1422                   file = TGSI_FILE_TEMPORARY;
1423                   index = emit->tcs.patch_generic_tmp_index +
1424                           (index - emit->tcs.patch_generic_out_index);
1425                   /**
1426                    * Temporaries for patch constant data can be done
1427                    * as indexable temporaries.
1428                    */
1429                   tempArrayId = get_temp_array_id(emit, file, index);
1430                   index2d = tempArrayId > 0;
1431 
1432                   emit->reemit_instruction = false;
1433                }
1434                else {
1435                   /* If per-patch outputs is been read in shader, we
1436                    * reemit instruction and store results in temporaries in
1437                    * patch constant phase. */
1438                   if (emit->info.reads_perpatch_outputs) {
1439                      emit->reemit_instruction = true;
1440                   }
1441                }
1442             }
1443          }
1444          else if (reg->Register.Dimension) {
1445             /* Only control point outputs are declared 2D in tgsi */
1446             if (emit->tcs.control_point_phase) {
1447                if (emit->reemit_instruction) {
1448                   /* Store results of reemitted instruction in temporary register. */
1449                   index2d = false;
1450                   file = TGSI_FILE_TEMPORARY;
1451                   index = emit->tcs.control_point_tmp_index +
1452                           (index - emit->tcs.control_point_out_index);
1453                   emit->reemit_instruction = false;
1454                }
1455                else {
1456                   /* The mapped control point outputs are 1-D */
1457                   index2d = false;
1458                   if (emit->info.reads_pervertex_outputs) {
1459                      /* If per-vertex outputs is been read in shader, we
1460                       * reemit instruction and store results in temporaries
1461                       * control point phase. */
1462                      emit->reemit_instruction = true;
1463                   }
1464                }
1465 
1466                if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
1467                    emit->clip_dist_tmp_index != INVALID_INDEX) {
1468                   /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
1469                    * We store the clip distance in a temporary first, then
1470                    * we'll copy it to the shadow copy and to CLIPDIST with the
1471                    * enabled planes mask in emit_clip_distance_instructions().
1472                    */
1473                   file = TGSI_FILE_TEMPORARY;
1474                   index = emit->clip_dist_tmp_index + sem_index;
1475                }
1476                else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
1477                         emit->clip_vertex_tmp_index != INVALID_INDEX) {
1478                   /* replace the CLIPVERTEX output register with a temporary */
1479                   assert(emit->clip_mode == CLIP_VERTEX);
1480                   assert(sem_index == 0);
1481                   file = TGSI_FILE_TEMPORARY;
1482                   index = emit->clip_vertex_tmp_index;
1483                }
1484             }
1485             else {
1486                /* Discard writing into control point outputs in
1487                   patch constant phase */
1488                emit->discard_instruction =  true;
1489             }
1490          }
1491       }
1492    }
1493 
1494    /* init operand tokens to all zero */
1495    operand0.value = 0;
1496 
1497    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1498 
1499    /* the operand has a writemask */
1500    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
1501 
1502    /* Which of the four dest components to write to. Note that we can use a
1503     * simple assignment here since TGSI writemasks match VGPU10 writemasks.
1504     */
1505    STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X);
1506    operand0.mask = writemask;
1507 
1508    /* translate TGSI register file type to VGPU10 operand type */
1509    operand0.operandType = translate_register_file(file, tempArrayId > 0);
1510 
1511    check_register_index(emit, operand0.operandType, index);
1512 
1513    operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1514                                       index2d, false);
1515 
1516    /* Emit tokens */
1517    emit_dword(emit, operand0.value);
1518    if (tempArrayId > 0) {
1519       emit_dword(emit, tempArrayId);
1520    }
1521 
1522    emit_dword(emit, remap_temp_index(emit, file, index));
1523 
1524    if (indirect) {
1525       emit_indirect_register(emit, reg->Indirect.Index);
1526    }
1527 }
1528 
1529 
1530 /**
1531  * Check if temporary register needs to be initialize when
1532  * shader is not using indirect addressing for temporary and uninitialized
1533  * temporary is not used in loop. In these two scenarios, we cannot
1534  * determine if temporary is initialized or not.
1535  */
1536 static bool
need_temp_reg_initialization(struct svga_shader_emitter_v10 * emit,unsigned index)1537 need_temp_reg_initialization(struct svga_shader_emitter_v10 *emit,
1538                              unsigned index)
1539 {
1540    if (!(emit->info.indirect_files & (1u << TGSI_FILE_TEMPORARY))
1541        && emit->current_loop_depth == 0) {
1542       if (!emit->temp_map[index].initialized &&
1543           emit->temp_map[index].index < emit->num_shader_temps) {
1544          return true;
1545       }
1546    }
1547 
1548    return false;
1549 }
1550 
1551 
1552 /**
1553  * Translate a src register of a TGSI instruction and emit VGPU10 tokens.
1554  * In quite a few cases, we do register substitution.  For example, if
1555  * the TGSI register is the front/back-face register, we replace that with
1556  * a temp register containing a value we computed earlier.
1557  */
1558 static void
emit_src_register(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * reg)1559 emit_src_register(struct svga_shader_emitter_v10 *emit,
1560                   const struct tgsi_full_src_register *reg)
1561 {
1562    enum tgsi_file_type file = reg->Register.File;
1563    unsigned index = reg->Register.Index;
1564    bool indirect = reg->Register.Indirect;
1565    unsigned tempArrayId = get_temp_array_id(emit, file, index);
1566    bool index2d = (reg->Register.Dimension ||
1567                             tempArrayId > 0 ||
1568                             file == TGSI_FILE_CONSTANT);
1569    unsigned index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index;
1570    bool indirect2d = reg->Dimension.Indirect;
1571    unsigned swizzleX = reg->Register.SwizzleX;
1572    unsigned swizzleY = reg->Register.SwizzleY;
1573    unsigned swizzleZ = reg->Register.SwizzleZ;
1574    unsigned swizzleW = reg->Register.SwizzleW;
1575    const bool absolute = reg->Register.Absolute;
1576    const bool negate = reg->Register.Negate;
1577    VGPU10OperandToken0 operand0;
1578    VGPU10OperandToken1 operand1;
1579 
1580    operand0.value = operand1.value = 0;
1581 
1582    if (emit->unit == PIPE_SHADER_FRAGMENT){
1583       if (file == TGSI_FILE_INPUT) {
1584          if (index == emit->fs.face_input_index) {
1585             /* Replace INPUT[FACE] with TEMP[FACE] */
1586             file = TGSI_FILE_TEMPORARY;
1587             index = emit->fs.face_tmp_index;
1588          }
1589          else if (index == emit->fs.fragcoord_input_index) {
1590             /* Replace INPUT[POSITION] with TEMP[POSITION] */
1591             file = TGSI_FILE_TEMPORARY;
1592             index = emit->fs.fragcoord_tmp_index;
1593          }
1594          else if (index == emit->fs.layer_input_index) {
1595             /* Replace INPUT[LAYER] with zero.x */
1596             file = TGSI_FILE_IMMEDIATE;
1597             index = emit->fs.layer_imm_index;
1598             swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1599          }
1600          else {
1601             /* We remap fragment shader inputs to that FS input indexes
1602              * match up with VS/GS output indexes.
1603              */
1604             index = emit->linkage.input_map[index];
1605          }
1606       }
1607       else if (file == TGSI_FILE_SYSTEM_VALUE) {
1608          if (index == emit->fs.sample_pos_sys_index) {
1609             assert(emit->version >= 41);
1610             /* Current sample position is in a temp register */
1611             file = TGSI_FILE_TEMPORARY;
1612             index = emit->fs.sample_pos_tmp_index;
1613          }
1614          else if (index == emit->fs.sample_mask_in_sys_index) {
1615             /* Emitted as vCoverage0.x */
1616             /* According to GLSL spec, the gl_SampleMaskIn array has ceil(s / 32)
1617              * elements where s is the maximum number of color samples supported
1618              * by the implementation.
1619              */
1620             operand0.value = 0;
1621             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK;
1622             operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1623             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1624             operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1625             emit_dword(emit, operand0.value);
1626             return;
1627          }
1628          else {
1629             /* Map the TGSI system value to a VGPU10 input register */
1630             assert(index < ARRAY_SIZE(emit->system_value_indexes));
1631             file = TGSI_FILE_INPUT;
1632             index = emit->system_value_indexes[index];
1633          }
1634       }
1635    }
1636    else if (emit->unit == PIPE_SHADER_GEOMETRY) {
1637       if (file == TGSI_FILE_INPUT) {
1638          if (index == emit->gs.prim_id_index) {
1639             operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
1640             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1641          }
1642          index = emit->linkage.input_map[index];
1643       }
1644       else if (file == TGSI_FILE_SYSTEM_VALUE &&
1645                index == emit->gs.invocation_id_sys_index) {
1646          /* Emitted as vGSInstanceID0.x */
1647          operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1648          operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID;
1649          index = 0;
1650       }
1651    }
1652    else if (emit->unit == PIPE_SHADER_VERTEX) {
1653       if (file == TGSI_FILE_INPUT) {
1654          /* if input is adjusted... */
1655          if ((emit->key.vs.adjust_attrib_w_1 |
1656               emit->key.vs.adjust_attrib_itof |
1657               emit->key.vs.adjust_attrib_utof |
1658               emit->key.vs.attrib_is_bgra |
1659               emit->key.vs.attrib_puint_to_snorm |
1660               emit->key.vs.attrib_puint_to_uscaled |
1661               emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) {
1662             file = TGSI_FILE_TEMPORARY;
1663             index = emit->vs.adjusted_input[index];
1664          }
1665       }
1666       else if (file == TGSI_FILE_SYSTEM_VALUE) {
1667          if (index == emit->vs.vertex_id_sys_index &&
1668              emit->vs.vertex_id_tmp_index != INVALID_INDEX) {
1669             file = TGSI_FILE_TEMPORARY;
1670             index = emit->vs.vertex_id_tmp_index;
1671             swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1672          }
1673          else {
1674             /* Map the TGSI system value to a VGPU10 input register */
1675             assert(index < ARRAY_SIZE(emit->system_value_indexes));
1676             file = TGSI_FILE_INPUT;
1677             index = emit->system_value_indexes[index];
1678          }
1679       }
1680    }
1681    else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
1682 
1683       if (file == TGSI_FILE_SYSTEM_VALUE) {
1684          if (index == emit->tcs.vertices_per_patch_index) {
1685             /**
1686              * if source register is the system value for vertices_per_patch,
1687              * replace it with the immediate.
1688              */
1689             file = TGSI_FILE_IMMEDIATE;
1690             index = emit->tcs.imm_index;
1691             swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1692          }
1693          else if (index == emit->tcs.invocation_id_sys_index) {
1694             if (emit->tcs.control_point_phase) {
1695                /**
1696                 * Emitted as vOutputControlPointID.x
1697                 */
1698                operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1699                operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID;
1700                operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
1701                operand0.mask = 0;
1702                emit_dword(emit, operand0.value);
1703                return;
1704             }
1705             else {
1706                /* There is no control point ID input declaration in
1707                 * the patch constant phase in hull shader.
1708                 * Since for now we are emitting all instructions in
1709                 * the patch constant phase, we are replacing the
1710                 * control point ID reference with the immediate 0.
1711                 */
1712                file = TGSI_FILE_IMMEDIATE;
1713                index = emit->tcs.imm_index;
1714                swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_W;
1715             }
1716          }
1717          else if (index == emit->tcs.prim_id_index) {
1718             /**
1719              * Emitted as vPrim.x
1720              */
1721             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1722             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1723             index = 0;
1724          }
1725       }
1726       else if (file == TGSI_FILE_INPUT) {
1727          index = emit->linkage.input_map[index];
1728          if (!emit->tcs.control_point_phase) {
1729             /* Emitted as vicp */
1730             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1731             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
1732             assert(reg->Register.Dimension);
1733          }
1734       }
1735       else if (file == TGSI_FILE_OUTPUT) {
1736          if ((index >= emit->tcs.patch_generic_out_index &&
1737              index < (emit->tcs.patch_generic_out_index +
1738                       emit->tcs.patch_generic_out_count)) ||
1739              index == emit->tcs.inner.tgsi_index ||
1740              index == emit->tcs.outer.tgsi_index) {
1741             if (emit->tcs.control_point_phase) {
1742                emit->discard_instruction = true;
1743             }
1744             else {
1745                /* Device doesn't allow reading from output so
1746                 * use corresponding temporary register as source */
1747                file = TGSI_FILE_TEMPORARY;
1748                if (index == emit->tcs.inner.tgsi_index) {
1749                   index = emit->tcs.inner.temp_index;
1750                }
1751                else if (index == emit->tcs.outer.tgsi_index) {
1752                   index = emit->tcs.outer.temp_index;
1753                }
1754                else {
1755                   index = emit->tcs.patch_generic_tmp_index +
1756                           (index - emit->tcs.patch_generic_out_index);
1757                }
1758 
1759                /**
1760                 * Temporaries for patch constant data can be done
1761                 * as indexable temporaries.
1762                 */
1763                tempArrayId = get_temp_array_id(emit, file, index);
1764                index2d = tempArrayId > 0;
1765                index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index;
1766             }
1767          }
1768          else if (index2d) {
1769             if (emit->tcs.control_point_phase) {
1770                /* Device doesn't allow reading from output so
1771                 * use corresponding temporary register as source */
1772                file = TGSI_FILE_TEMPORARY;
1773                index2d = false;
1774                index = emit->tcs.control_point_tmp_index +
1775                        (index - emit->tcs.control_point_out_index);
1776             }
1777             else {
1778                emit->discard_instruction = true;
1779             }
1780          }
1781       }
1782    }
1783    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
1784       if (file == TGSI_FILE_SYSTEM_VALUE) {
1785          if (index == emit->tes.tesscoord_sys_index) {
1786             /**
1787              * Emitted as vDomain
1788              */
1789             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1790             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT;
1791             index = 0;
1792 
1793             /* Make sure swizzles are of those components allowed according
1794              * to the tessellator domain.
1795              */
1796             swizzleX = MIN2(swizzleX, emit->tes.swizzle_max);
1797             swizzleY = MIN2(swizzleY, emit->tes.swizzle_max);
1798             swizzleZ = MIN2(swizzleZ, emit->tes.swizzle_max);
1799             swizzleW = MIN2(swizzleW, emit->tes.swizzle_max);
1800          }
1801          else if (index == emit->tes.inner.tgsi_index) {
1802             file = TGSI_FILE_TEMPORARY;
1803             index = emit->tes.inner.temp_index;
1804          }
1805          else if (index == emit->tes.outer.tgsi_index) {
1806             file = TGSI_FILE_TEMPORARY;
1807             index = emit->tes.outer.temp_index;
1808          }
1809          else if (index == emit->tes.prim_id_index) {
1810             /**
1811              * Emitted as vPrim.x
1812              */
1813             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1814             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1815             index = 0;
1816          }
1817 
1818       }
1819       else if (file == TGSI_FILE_INPUT) {
1820          if (index2d) {
1821             /* 2D input is emitted as vcp (input control point). */
1822             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
1823             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1824 
1825             /* index specifies the element index and is remapped
1826              * to align with the tcs output index.
1827              */
1828             index = emit->linkage.input_map[index];
1829 
1830             assert(index2 < emit->key.tes.vertices_per_patch);
1831          }
1832          else {
1833             if (index < emit->key.tes.tessfactor_index)
1834                /* index specifies the generic patch index.
1835                 * Remapped to match up with the tcs output index.
1836                 */
1837                index = emit->linkage.input_map[index];
1838 
1839             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT;
1840             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1841          }
1842       }
1843    }
1844    else if (emit->unit == PIPE_SHADER_COMPUTE) {
1845       if (file == TGSI_FILE_SYSTEM_VALUE) {
1846          if (index == emit->cs.thread_id_index) {
1847             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1848             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP;
1849             index = 0;
1850          } else if (index == emit->cs.block_id_index) {
1851             operand0.value = 0;
1852             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1853             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID;
1854             operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1855             operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1856             operand0.swizzleX = swizzleX;
1857             operand0.swizzleY = swizzleY;
1858             operand0.swizzleZ = swizzleZ;
1859             operand0.swizzleW = swizzleW;
1860             emit_dword(emit, operand0.value);
1861             return;
1862          } else if (index == emit->cs.grid_size.tgsi_index) {
1863             file = TGSI_FILE_IMMEDIATE;
1864             index = emit->cs.grid_size.imm_index;
1865          }
1866       }
1867    }
1868 
1869    if (file == TGSI_FILE_ADDRESS) {
1870       index = emit->address_reg_index[index];
1871       file = TGSI_FILE_TEMPORARY;
1872    }
1873 
1874    if (file == TGSI_FILE_CONSTANT) {
1875       /**
1876        * If this constant buffer is to be bound as srv raw buffer,
1877        * then we have to load the constant to a temp first before
1878        * it can be used as a source in the instruction.
1879        * This is accomplished in two passes. The first pass is to
1880        * identify if there is any constbuf to rawbuf translation.
1881        * If there isn't, emit the instruction as usual.
1882        * If there is, then we save the constant buffer reference info,
1883        * and then instead of emitting the instruction at the end
1884        * of the instruction, it will trigger a second pass of parsing
1885        * this instruction. Before it starts the parsing, it will
1886        * load the referenced raw buffer elements to temporaries.
1887        * Then it will emit the instruction that replaces the
1888        * constant buffer replaces with the corresponding temporaries.
1889        */
1890       if (emit->raw_bufs & (1 << index2)) {
1891          if (emit->reemit_rawbuf_instruction != REEMIT_IN_PROGRESS) {
1892             unsigned tmpIdx = emit->raw_buf_cur_tmp_index;
1893 
1894             emit->raw_buf_tmp[tmpIdx].buffer_index = index2;
1895 
1896             /* Save whether the element index is indirect indexing */
1897             emit->raw_buf_tmp[tmpIdx].indirect = indirect;
1898 
1899             /* If it is indirect index, save the temporary
1900              * address index, otherwise, save the immediate index.
1901              */
1902             if (indirect) {
1903                emit->raw_buf_tmp[tmpIdx].element_index =
1904                   emit->address_reg_index[reg->Indirect.Index];
1905                emit->raw_buf_tmp[tmpIdx].element_rel =
1906                   reg->Register.Index;
1907             }
1908             else {
1909                emit->raw_buf_tmp[tmpIdx].element_index = index;
1910                emit->raw_buf_tmp[tmpIdx].element_rel = 0;
1911             }
1912 
1913             emit->raw_buf_cur_tmp_index++;
1914             emit->reemit_rawbuf_instruction = REEMIT_TRUE;
1915             emit->discard_instruction = true;
1916             emit->reemit_tgsi_instruction = true;
1917          }
1918          else {
1919             /* In the reemitting process, replace the constant buffer
1920              * reference with temporary.
1921              */
1922             file = TGSI_FILE_TEMPORARY;
1923             index = emit->raw_buf_cur_tmp_index + emit->raw_buf_tmp_index;
1924             index2d = false;
1925             indirect = false;
1926             emit->raw_buf_cur_tmp_index++;
1927          }
1928       }
1929    }
1930 
1931    if (file == TGSI_FILE_TEMPORARY) {
1932       if (need_temp_reg_initialization(emit, index)) {
1933          emit->initialize_temp_index = index;
1934          emit->discard_instruction = true;
1935       }
1936    }
1937 
1938    if (operand0.value == 0) {
1939       /* if operand0 was not set above for a special case, do the general
1940        * case now.
1941        */
1942       operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1943       operand0.operandType = translate_register_file(file, tempArrayId > 0);
1944    }
1945    operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1946                                       index2d, indirect2d);
1947 
1948    if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 &&
1949        operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
1950       /* there's no swizzle for in-line immediates */
1951       if (swizzleX == swizzleY &&
1952           swizzleX == swizzleZ &&
1953           swizzleX == swizzleW) {
1954          operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1955       }
1956       else {
1957          operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1958       }
1959 
1960       operand0.swizzleX = swizzleX;
1961       operand0.swizzleY = swizzleY;
1962       operand0.swizzleZ = swizzleZ;
1963       operand0.swizzleW = swizzleW;
1964 
1965       if (absolute || negate) {
1966          operand0.extended = 1;
1967          operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER;
1968          if (absolute && !negate)
1969             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS;
1970          if (!absolute && negate)
1971             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG;
1972          if (absolute && negate)
1973             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG;
1974       }
1975    }
1976 
1977    check_register_index(emit, operand0.operandType, index);
1978 
1979    /* Emit the operand tokens */
1980    emit_dword(emit, operand0.value);
1981    if (operand0.extended)
1982       emit_dword(emit, operand1.value);
1983 
1984    if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) {
1985       /* Emit the four float/int in-line immediate values */
1986       unsigned *c;
1987       assert(index < ARRAY_SIZE(emit->immediates));
1988       assert(file == TGSI_FILE_IMMEDIATE);
1989       assert(swizzleX < 4);
1990       assert(swizzleY < 4);
1991       assert(swizzleZ < 4);
1992       assert(swizzleW < 4);
1993       c = (unsigned *) emit->immediates[index];
1994       emit_dword(emit, c[swizzleX]);
1995       emit_dword(emit, c[swizzleY]);
1996       emit_dword(emit, c[swizzleZ]);
1997       emit_dword(emit, c[swizzleW]);
1998    }
1999    else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) {
2000       /* Emit the register index(es) */
2001       if (index2d) {
2002          emit_dword(emit, index2);
2003 
2004          if (indirect2d) {
2005             emit_indirect_register(emit, reg->DimIndirect.Index);
2006          }
2007       }
2008 
2009       emit_dword(emit, remap_temp_index(emit, file, index));
2010 
2011       if (indirect) {
2012          assert(operand0.operandType != VGPU10_OPERAND_TYPE_TEMP);
2013          emit_indirect_register(emit, reg->Indirect.Index);
2014       }
2015    }
2016 }
2017 
2018 
2019 /**
2020  * Emit a resource operand (for use with a SAMPLE instruction).
2021  */
2022 static void
emit_resource_register(struct svga_shader_emitter_v10 * emit,unsigned resource_number)2023 emit_resource_register(struct svga_shader_emitter_v10 *emit,
2024                        unsigned resource_number)
2025 {
2026    VGPU10OperandToken0 operand0;
2027 
2028    check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number);
2029 
2030    /* init */
2031    operand0.value = 0;
2032 
2033    operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
2034    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2035    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
2036    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
2037    operand0.swizzleX = VGPU10_COMPONENT_X;
2038    operand0.swizzleY = VGPU10_COMPONENT_Y;
2039    operand0.swizzleZ = VGPU10_COMPONENT_Z;
2040    operand0.swizzleW = VGPU10_COMPONENT_W;
2041 
2042    emit_dword(emit, operand0.value);
2043    emit_dword(emit, resource_number);
2044 }
2045 
2046 
2047 /**
2048  * Emit a sampler operand (for use with a SAMPLE instruction).
2049  */
2050 static void
emit_sampler_register(struct svga_shader_emitter_v10 * emit,unsigned unit)2051 emit_sampler_register(struct svga_shader_emitter_v10 *emit,
2052                       unsigned unit)
2053 {
2054    VGPU10OperandToken0 operand0;
2055    unsigned sampler_number;
2056 
2057    sampler_number = emit->key.tex[unit].sampler_index;
2058 
2059    if ((emit->shadow_compare_units & (1 << unit)) && emit->use_sampler_state_mapping)
2060       sampler_number++;
2061 
2062    check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number);
2063 
2064    /* init */
2065    operand0.value = 0;
2066 
2067    operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
2068    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2069 
2070    emit_dword(emit, operand0.value);
2071    emit_dword(emit, sampler_number);
2072 }
2073 
2074 
2075 /**
2076  * Emit an operand which reads the IS_FRONT_FACING register.
2077  */
2078 static void
emit_face_register(struct svga_shader_emitter_v10 * emit)2079 emit_face_register(struct svga_shader_emitter_v10 *emit)
2080 {
2081    VGPU10OperandToken0 operand0;
2082    unsigned index = emit->linkage.input_map[emit->fs.face_input_index];
2083 
2084    /* init */
2085    operand0.value = 0;
2086 
2087    operand0.operandType = VGPU10_OPERAND_TYPE_INPUT;
2088    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2089    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
2090    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
2091 
2092    operand0.swizzleX = VGPU10_COMPONENT_X;
2093    operand0.swizzleY = VGPU10_COMPONENT_X;
2094    operand0.swizzleZ = VGPU10_COMPONENT_X;
2095    operand0.swizzleW = VGPU10_COMPONENT_X;
2096 
2097    emit_dword(emit, operand0.value);
2098    emit_dword(emit, index);
2099 }
2100 
2101 
2102 /**
2103  * Emit tokens for the "rasterizer" register used by the SAMPLE_POS
2104  * instruction.
2105  */
2106 static void
emit_rasterizer_register(struct svga_shader_emitter_v10 * emit)2107 emit_rasterizer_register(struct svga_shader_emitter_v10 *emit)
2108 {
2109    VGPU10OperandToken0 operand0;
2110 
2111    /* init */
2112    operand0.value = 0;
2113 
2114    /* No register index for rasterizer index (there's only one) */
2115    operand0.operandType = VGPU10_OPERAND_TYPE_RASTERIZER;
2116    operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
2117    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
2118    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
2119    operand0.swizzleX = VGPU10_COMPONENT_X;
2120    operand0.swizzleY = VGPU10_COMPONENT_Y;
2121    operand0.swizzleZ = VGPU10_COMPONENT_Z;
2122    operand0.swizzleW = VGPU10_COMPONENT_W;
2123 
2124    emit_dword(emit, operand0.value);
2125 }
2126 
2127 
2128 /**
2129  * Emit tokens for the "stream" register used by the
2130  * DCL_STREAM, CUT_STREAM, EMIT_STREAM instructions.
2131  */
2132 static void
emit_stream_register(struct svga_shader_emitter_v10 * emit,unsigned index)2133 emit_stream_register(struct svga_shader_emitter_v10 *emit, unsigned index)
2134 {
2135    VGPU10OperandToken0 operand0;
2136 
2137    /* init */
2138    operand0.value = 0;
2139 
2140    /* No register index for rasterizer index (there's only one) */
2141    operand0.operandType = VGPU10_OPERAND_TYPE_STREAM;
2142    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2143    operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
2144 
2145    emit_dword(emit, operand0.value);
2146    emit_dword(emit, index);
2147 }
2148 
2149 
2150 /**
2151  * Emit the token for a VGPU10 opcode, with precise parameter.
2152  * \param saturate   clamp result to [0,1]?
2153  */
2154 static void
emit_opcode_precise(struct svga_shader_emitter_v10 * emit,unsigned vgpu10_opcode,bool saturate,bool precise)2155 emit_opcode_precise(struct svga_shader_emitter_v10 *emit,
2156                     unsigned vgpu10_opcode, bool saturate, bool precise)
2157 {
2158    VGPU10OpcodeToken0 token0;
2159 
2160    token0.value = 0;  /* init all fields to zero */
2161    token0.opcodeType = vgpu10_opcode;
2162    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2163    token0.saturate = saturate;
2164 
2165    /* Mesa's GLSL IR -> TGSI translator will set the TGSI precise flag for
2166     * 'invariant' declarations.  Only set preciseValues=1 if we have SM5.
2167     */
2168    token0.preciseValues = precise && emit->version >= 50;
2169 
2170    emit_dword(emit, token0.value);
2171 
2172    emit->uses_precise_qualifier |= token0.preciseValues;
2173 }
2174 
2175 
2176 /**
2177  * Emit the token for a VGPU10 opcode.
2178  * \param saturate   clamp result to [0,1]?
2179  */
2180 static void
emit_opcode(struct svga_shader_emitter_v10 * emit,unsigned vgpu10_opcode,bool saturate)2181 emit_opcode(struct svga_shader_emitter_v10 *emit,
2182             unsigned vgpu10_opcode, bool saturate)
2183 {
2184    emit_opcode_precise(emit, vgpu10_opcode, saturate, false);
2185 }
2186 
2187 
2188 /**
2189  * Emit the token for a VGPU10 resinfo instruction.
2190  * \param modifier   return type modifier, _uint or _rcpFloat.
2191  *                   TODO: We may want to remove this parameter if it will
2192  *                   only ever be used as _uint.
2193  */
2194 static void
emit_opcode_resinfo(struct svga_shader_emitter_v10 * emit,VGPU10_RESINFO_RETURN_TYPE modifier)2195 emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit,
2196                     VGPU10_RESINFO_RETURN_TYPE modifier)
2197 {
2198    VGPU10OpcodeToken0 token0;
2199 
2200    token0.value = 0;  /* init all fields to zero */
2201    token0.opcodeType = VGPU10_OPCODE_RESINFO;
2202    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2203    token0.resinfoReturnType = modifier;
2204 
2205    emit_dword(emit, token0.value);
2206 }
2207 
2208 
2209 /**
2210  * Emit opcode tokens for a texture sample instruction.  Texture instructions
2211  * can be rather complicated (texel offsets, etc) so we have this specialized
2212  * function.
2213  */
2214 static void
emit_sample_opcode(struct svga_shader_emitter_v10 * emit,unsigned vgpu10_opcode,bool saturate,const int offsets[3])2215 emit_sample_opcode(struct svga_shader_emitter_v10 *emit,
2216                    unsigned vgpu10_opcode, bool saturate,
2217                    const int offsets[3])
2218 {
2219    VGPU10OpcodeToken0 token0;
2220    VGPU10OpcodeToken1 token1;
2221 
2222    token0.value = 0;  /* init all fields to zero */
2223    token0.opcodeType = vgpu10_opcode;
2224    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2225    token0.saturate = saturate;
2226 
2227    if (offsets[0] || offsets[1] || offsets[2]) {
2228       assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2229       assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2230       assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2231       assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2232       assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2233       assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2234 
2235       token0.extended = 1;
2236       token1.value = 0;
2237       token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS;
2238       token1.offsetU = offsets[0];
2239       token1.offsetV = offsets[1];
2240       token1.offsetW = offsets[2];
2241    }
2242 
2243    emit_dword(emit, token0.value);
2244    if (token0.extended) {
2245       emit_dword(emit, token1.value);
2246    }
2247 }
2248 
2249 
2250 /**
2251  * Emit a DISCARD opcode token.
2252  * If nonzero is set, we'll discard the fragment if the X component is not 0.
2253  * Otherwise, we'll discard the fragment if the X component is 0.
2254  */
2255 static void
emit_discard_opcode(struct svga_shader_emitter_v10 * emit,bool nonzero)2256 emit_discard_opcode(struct svga_shader_emitter_v10 *emit, bool nonzero)
2257 {
2258    VGPU10OpcodeToken0 opcode0;
2259 
2260    opcode0.value = 0;
2261    opcode0.opcodeType = VGPU10_OPCODE_DISCARD;
2262    if (nonzero)
2263       opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
2264 
2265    emit_dword(emit, opcode0.value);
2266 }
2267 
2268 
2269 /**
2270  * We need to call this before we begin emitting a VGPU10 instruction.
2271  */
2272 static void
begin_emit_instruction(struct svga_shader_emitter_v10 * emit)2273 begin_emit_instruction(struct svga_shader_emitter_v10 *emit)
2274 {
2275    assert(emit->inst_start_token == 0);
2276    /* Save location of the instruction's VGPU10OpcodeToken0 token.
2277     * Note, we can't save a pointer because it would become invalid if
2278     * we have to realloc the output buffer.
2279     */
2280    emit->inst_start_token = emit_get_num_tokens(emit);
2281 }
2282 
2283 
2284 /**
2285  * We need to call this after we emit the last token of a VGPU10 instruction.
2286  * This function patches in the opcode token's instructionLength field.
2287  */
2288 static void
end_emit_instruction(struct svga_shader_emitter_v10 * emit)2289 end_emit_instruction(struct svga_shader_emitter_v10 *emit)
2290 {
2291    VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
2292    unsigned inst_length;
2293 
2294    assert(emit->inst_start_token > 0);
2295 
2296    if (emit->discard_instruction) {
2297       /* Back up the emit->ptr to where this instruction started so
2298        * that we discard the current instruction.
2299        */
2300       emit->ptr = (char *) (tokens + emit->inst_start_token);
2301    }
2302    else {
2303       /* Compute instruction length and patch that into the start of
2304        * the instruction.
2305        */
2306       inst_length = emit_get_num_tokens(emit) - emit->inst_start_token;
2307 
2308       assert(inst_length > 0);
2309 
2310       tokens[emit->inst_start_token].instructionLength = inst_length;
2311    }
2312 
2313    emit->inst_start_token = 0; /* reset to zero for error checking */
2314    emit->discard_instruction = false;
2315 }
2316 
2317 
2318 /**
2319  * Return index for a free temporary register.
2320  */
2321 static unsigned
get_temp_index(struct svga_shader_emitter_v10 * emit)2322 get_temp_index(struct svga_shader_emitter_v10 *emit)
2323 {
2324    assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS);
2325    return emit->num_shader_temps + emit->internal_temp_count++;
2326 }
2327 
2328 
2329 /**
2330  * Release the temporaries which were generated by get_temp_index().
2331  */
2332 static void
free_temp_indexes(struct svga_shader_emitter_v10 * emit)2333 free_temp_indexes(struct svga_shader_emitter_v10 *emit)
2334 {
2335    emit->internal_temp_count = 0;
2336 }
2337 
2338 
2339 /**
2340  * Create a tgsi_full_src_register.
2341  */
2342 static struct tgsi_full_src_register
make_src_reg(enum tgsi_file_type file,unsigned index)2343 make_src_reg(enum tgsi_file_type file, unsigned index)
2344 {
2345    struct tgsi_full_src_register reg;
2346 
2347    memset(&reg, 0, sizeof(reg));
2348    reg.Register.File = file;
2349    reg.Register.Index = index;
2350    reg.Register.SwizzleX = TGSI_SWIZZLE_X;
2351    reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
2352    reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
2353    reg.Register.SwizzleW = TGSI_SWIZZLE_W;
2354    return reg;
2355 }
2356 
2357 
2358 /**
2359  * Create a tgsi_full_src_register with a swizzle such that all four
2360  * vector components have the same scalar value.
2361  */
2362 static struct tgsi_full_src_register
make_src_scalar_reg(enum tgsi_file_type file,unsigned index,unsigned component)2363 make_src_scalar_reg(enum tgsi_file_type file, unsigned index, unsigned component)
2364 {
2365    struct tgsi_full_src_register reg;
2366 
2367    assert(component >= TGSI_SWIZZLE_X);
2368    assert(component <= TGSI_SWIZZLE_W);
2369 
2370    memset(&reg, 0, sizeof(reg));
2371    reg.Register.File = file;
2372    reg.Register.Index = index;
2373    reg.Register.SwizzleX =
2374    reg.Register.SwizzleY =
2375    reg.Register.SwizzleZ =
2376    reg.Register.SwizzleW = component;
2377    return reg;
2378 }
2379 
2380 
2381 /**
2382  * Create a tgsi_full_src_register for a temporary.
2383  */
2384 static struct tgsi_full_src_register
make_src_temp_reg(unsigned index)2385 make_src_temp_reg(unsigned index)
2386 {
2387    return make_src_reg(TGSI_FILE_TEMPORARY, index);
2388 }
2389 
2390 
2391 /**
2392  * Create a tgsi_full_src_register for a constant.
2393  */
2394 static struct tgsi_full_src_register
make_src_const_reg(unsigned index)2395 make_src_const_reg(unsigned index)
2396 {
2397    return make_src_reg(TGSI_FILE_CONSTANT, index);
2398 }
2399 
2400 
2401 /**
2402  * Create a tgsi_full_src_register for an immediate constant.
2403  */
2404 static struct tgsi_full_src_register
make_src_immediate_reg(unsigned index)2405 make_src_immediate_reg(unsigned index)
2406 {
2407    return make_src_reg(TGSI_FILE_IMMEDIATE, index);
2408 }
2409 
2410 
2411 /**
2412  * Create a tgsi_full_dst_register.
2413  */
2414 static struct tgsi_full_dst_register
make_dst_reg(enum tgsi_file_type file,unsigned index)2415 make_dst_reg(enum tgsi_file_type file, unsigned index)
2416 {
2417    struct tgsi_full_dst_register reg;
2418 
2419    memset(&reg, 0, sizeof(reg));
2420    reg.Register.File = file;
2421    reg.Register.Index = index;
2422    reg.Register.WriteMask = TGSI_WRITEMASK_XYZW;
2423    return reg;
2424 }
2425 
2426 
2427 /**
2428  * Create a tgsi_full_dst_register for a temporary.
2429  */
2430 static struct tgsi_full_dst_register
make_dst_temp_reg(unsigned index)2431 make_dst_temp_reg(unsigned index)
2432 {
2433    return make_dst_reg(TGSI_FILE_TEMPORARY, index);
2434 }
2435 
2436 
2437 /**
2438  * Create a tgsi_full_dst_register for an output.
2439  */
2440 static struct tgsi_full_dst_register
make_dst_output_reg(unsigned index)2441 make_dst_output_reg(unsigned index)
2442 {
2443    return make_dst_reg(TGSI_FILE_OUTPUT, index);
2444 }
2445 
2446 
2447 /**
2448  * Create negated tgsi_full_src_register.
2449  */
2450 static struct tgsi_full_src_register
negate_src(const struct tgsi_full_src_register * reg)2451 negate_src(const struct tgsi_full_src_register *reg)
2452 {
2453    struct tgsi_full_src_register neg = *reg;
2454    neg.Register.Negate = !reg->Register.Negate;
2455    return neg;
2456 }
2457 
2458 /**
2459  * Create absolute value of a tgsi_full_src_register.
2460  */
2461 static struct tgsi_full_src_register
absolute_src(const struct tgsi_full_src_register * reg)2462 absolute_src(const struct tgsi_full_src_register *reg)
2463 {
2464    struct tgsi_full_src_register absolute = *reg;
2465    absolute.Register.Absolute = 1;
2466    return absolute;
2467 }
2468 
2469 
2470 /** Return the named swizzle term from the src register */
2471 static inline unsigned
get_swizzle(const struct tgsi_full_src_register * reg,enum tgsi_swizzle term)2472 get_swizzle(const struct tgsi_full_src_register *reg, enum tgsi_swizzle term)
2473 {
2474    switch (term) {
2475    case TGSI_SWIZZLE_X:
2476       return reg->Register.SwizzleX;
2477    case TGSI_SWIZZLE_Y:
2478       return reg->Register.SwizzleY;
2479    case TGSI_SWIZZLE_Z:
2480       return reg->Register.SwizzleZ;
2481    case TGSI_SWIZZLE_W:
2482       return reg->Register.SwizzleW;
2483    default:
2484       assert(!"Bad swizzle");
2485       return TGSI_SWIZZLE_X;
2486    }
2487 }
2488 
2489 
2490 /**
2491  * Create swizzled tgsi_full_src_register.
2492  */
2493 static struct tgsi_full_src_register
swizzle_src(const struct tgsi_full_src_register * reg,enum tgsi_swizzle swizzleX,enum tgsi_swizzle swizzleY,enum tgsi_swizzle swizzleZ,enum tgsi_swizzle swizzleW)2494 swizzle_src(const struct tgsi_full_src_register *reg,
2495             enum tgsi_swizzle swizzleX, enum tgsi_swizzle swizzleY,
2496             enum tgsi_swizzle swizzleZ, enum tgsi_swizzle swizzleW)
2497 {
2498    struct tgsi_full_src_register swizzled = *reg;
2499    /* Note: we swizzle the current swizzle */
2500    swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX);
2501    swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY);
2502    swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ);
2503    swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW);
2504    return swizzled;
2505 }
2506 
2507 
2508 /**
2509  * Create swizzled tgsi_full_src_register where all the swizzle
2510  * terms are the same.
2511  */
2512 static struct tgsi_full_src_register
scalar_src(const struct tgsi_full_src_register * reg,enum tgsi_swizzle swizzle)2513 scalar_src(const struct tgsi_full_src_register *reg, enum tgsi_swizzle swizzle)
2514 {
2515    struct tgsi_full_src_register swizzled = *reg;
2516    /* Note: we swizzle the current swizzle */
2517    swizzled.Register.SwizzleX =
2518    swizzled.Register.SwizzleY =
2519    swizzled.Register.SwizzleZ =
2520    swizzled.Register.SwizzleW = get_swizzle(reg, swizzle);
2521    return swizzled;
2522 }
2523 
2524 
2525 /**
2526  * Create new tgsi_full_dst_register with writemask.
2527  * \param mask  bitmask of TGSI_WRITEMASK_[XYZW]
2528  */
2529 static struct tgsi_full_dst_register
writemask_dst(const struct tgsi_full_dst_register * reg,unsigned mask)2530 writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask)
2531 {
2532    struct tgsi_full_dst_register masked = *reg;
2533    masked.Register.WriteMask = mask;
2534    return masked;
2535 }
2536 
2537 
2538 /**
2539  * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW.
2540  */
2541 static bool
same_swizzle_terms(const struct tgsi_full_src_register * reg)2542 same_swizzle_terms(const struct tgsi_full_src_register *reg)
2543 {
2544    return (reg->Register.SwizzleX == reg->Register.SwizzleY &&
2545            reg->Register.SwizzleY == reg->Register.SwizzleZ &&
2546            reg->Register.SwizzleZ == reg->Register.SwizzleW);
2547 }
2548 
2549 
2550 /**
2551  * Search the vector for the value 'x' and return its position.
2552  */
2553 static int
find_imm_in_vec4(const union tgsi_immediate_data vec[4],union tgsi_immediate_data x)2554 find_imm_in_vec4(const union tgsi_immediate_data vec[4],
2555                  union tgsi_immediate_data x)
2556 {
2557    unsigned i;
2558    for (i = 0; i < 4; i++) {
2559       if (vec[i].Int == x.Int)
2560          return i;
2561    }
2562    return -1;
2563 }
2564 
2565 
2566 /**
2567  * Helper used by make_immediate_reg(), make_immediate_reg_4().
2568  */
2569 static int
find_immediate(struct svga_shader_emitter_v10 * emit,union tgsi_immediate_data x,unsigned startIndex)2570 find_immediate(struct svga_shader_emitter_v10 *emit,
2571                union tgsi_immediate_data x, unsigned startIndex)
2572 {
2573    const unsigned endIndex = emit->num_immediates;
2574    unsigned i;
2575 
2576    assert(emit->num_immediates_emitted > 0);
2577 
2578    /* Search immediates for x, y, z, w */
2579    for (i = startIndex; i < endIndex; i++) {
2580       if (x.Int == emit->immediates[i][0].Int ||
2581           x.Int == emit->immediates[i][1].Int ||
2582           x.Int == emit->immediates[i][2].Int ||
2583           x.Int == emit->immediates[i][3].Int) {
2584          return i;
2585       }
2586    }
2587    /* immediate not declared yet */
2588    return -1;
2589 }
2590 
2591 
2592 /**
2593  * As above, but search for a double[2] pair.
2594  */
2595 static int
find_immediate_dbl(struct svga_shader_emitter_v10 * emit,double x,double y)2596 find_immediate_dbl(struct svga_shader_emitter_v10 *emit,
2597                    double x, double y)
2598 {
2599    const unsigned endIndex = emit->num_immediates;
2600    unsigned i;
2601 
2602    assert(emit->num_immediates_emitted > 0);
2603 
2604    /* Search immediates for x, y, z, w */
2605    for (i = 0; i < endIndex; i++) {
2606       if (x == emit->immediates_dbl[i][0] &&
2607           y == emit->immediates_dbl[i][1]) {
2608          return i;
2609       }
2610    }
2611    /* Should never try to use an immediate value that wasn't pre-declared */
2612    assert(!"find_immediate_dbl() failed!");
2613    return -1;
2614 }
2615 
2616 
2617 
2618 /**
2619  * Return a tgsi_full_src_register for an immediate/literal
2620  * union tgsi_immediate_data[4] value.
2621  * Note: the values must have been previously declared/allocated in
2622  * emit_pre_helpers().  And, all of x,y,z,w must be located in the same
2623  * vec4 immediate.
2624  */
2625 static struct tgsi_full_src_register
make_immediate_reg_4(struct svga_shader_emitter_v10 * emit,const union tgsi_immediate_data imm[4])2626 make_immediate_reg_4(struct svga_shader_emitter_v10 *emit,
2627                      const union tgsi_immediate_data imm[4])
2628 {
2629    struct tgsi_full_src_register reg;
2630    unsigned i;
2631 
2632    for (i = 0; i < emit->num_common_immediates; i++) {
2633       /* search for first component value */
2634       int immpos = find_immediate(emit, imm[0], i);
2635       int x, y, z, w;
2636 
2637       assert(immpos >= 0);
2638 
2639       /* find remaining components within the immediate vector */
2640       x = find_imm_in_vec4(emit->immediates[immpos], imm[0]);
2641       y = find_imm_in_vec4(emit->immediates[immpos], imm[1]);
2642       z = find_imm_in_vec4(emit->immediates[immpos], imm[2]);
2643       w = find_imm_in_vec4(emit->immediates[immpos], imm[3]);
2644 
2645       if (x >=0 &&  y >= 0 && z >= 0 && w >= 0) {
2646          /* found them all */
2647          memset(&reg, 0, sizeof(reg));
2648          reg.Register.File = TGSI_FILE_IMMEDIATE;
2649          reg.Register.Index = immpos;
2650          reg.Register.SwizzleX = x;
2651          reg.Register.SwizzleY = y;
2652          reg.Register.SwizzleZ = z;
2653          reg.Register.SwizzleW = w;
2654          return reg;
2655       }
2656       /* else, keep searching */
2657    }
2658 
2659    assert(!"Failed to find immediate register!");
2660 
2661    /* Just return IMM[0].xxxx */
2662    memset(&reg, 0, sizeof(reg));
2663    reg.Register.File = TGSI_FILE_IMMEDIATE;
2664    return reg;
2665 }
2666 
2667 
2668 /**
2669  * Return a tgsi_full_src_register for an immediate/literal
2670  * union tgsi_immediate_data value of the form {value, value, value, value}.
2671  * \sa make_immediate_reg_4() regarding allowed values.
2672  */
2673 static struct tgsi_full_src_register
make_immediate_reg(struct svga_shader_emitter_v10 * emit,union tgsi_immediate_data value)2674 make_immediate_reg(struct svga_shader_emitter_v10 *emit,
2675                    union tgsi_immediate_data value)
2676 {
2677    struct tgsi_full_src_register reg;
2678    int immpos = find_immediate(emit, value, 0);
2679 
2680    assert(immpos >= 0);
2681 
2682    memset(&reg, 0, sizeof(reg));
2683    reg.Register.File = TGSI_FILE_IMMEDIATE;
2684    reg.Register.Index = immpos;
2685    reg.Register.SwizzleX =
2686    reg.Register.SwizzleY =
2687    reg.Register.SwizzleZ =
2688    reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value);
2689 
2690    return reg;
2691 }
2692 
2693 
2694 /**
2695  * Return a tgsi_full_src_register for an immediate/literal float[4] value.
2696  * \sa make_immediate_reg_4() regarding allowed values.
2697  */
2698 static struct tgsi_full_src_register
make_immediate_reg_float4(struct svga_shader_emitter_v10 * emit,float x,float y,float z,float w)2699 make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit,
2700                           float x, float y, float z, float w)
2701 {
2702    union tgsi_immediate_data imm[4];
2703    imm[0].Float = x;
2704    imm[1].Float = y;
2705    imm[2].Float = z;
2706    imm[3].Float = w;
2707    return make_immediate_reg_4(emit, imm);
2708 }
2709 
2710 
2711 /**
2712  * Return a tgsi_full_src_register for an immediate/literal float value
2713  * of the form {value, value, value, value}.
2714  * \sa make_immediate_reg_4() regarding allowed values.
2715  */
2716 static struct tgsi_full_src_register
make_immediate_reg_float(struct svga_shader_emitter_v10 * emit,float value)2717 make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value)
2718 {
2719    union tgsi_immediate_data imm;
2720    imm.Float = value;
2721    return make_immediate_reg(emit, imm);
2722 }
2723 
2724 
2725 /**
2726  * Return a tgsi_full_src_register for an immediate/literal int[4] vector.
2727  */
2728 static struct tgsi_full_src_register
make_immediate_reg_int4(struct svga_shader_emitter_v10 * emit,int x,int y,int z,int w)2729 make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit,
2730                         int x, int y, int z, int w)
2731 {
2732    union tgsi_immediate_data imm[4];
2733    imm[0].Int = x;
2734    imm[1].Int = y;
2735    imm[2].Int = z;
2736    imm[3].Int = w;
2737    return make_immediate_reg_4(emit, imm);
2738 }
2739 
2740 
2741 /**
2742  * Return a tgsi_full_src_register for an immediate/literal int value
2743  * of the form {value, value, value, value}.
2744  * \sa make_immediate_reg_4() regarding allowed values.
2745  */
2746 static struct tgsi_full_src_register
make_immediate_reg_int(struct svga_shader_emitter_v10 * emit,int value)2747 make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value)
2748 {
2749    union tgsi_immediate_data imm;
2750    imm.Int = value;
2751    return make_immediate_reg(emit, imm);
2752 }
2753 
2754 
2755 static struct tgsi_full_src_register
make_immediate_reg_double(struct svga_shader_emitter_v10 * emit,double value)2756 make_immediate_reg_double(struct svga_shader_emitter_v10 *emit, double value)
2757 {
2758    struct tgsi_full_src_register reg;
2759    int immpos = find_immediate_dbl(emit, value, value);
2760 
2761    assert(immpos >= 0);
2762 
2763    memset(&reg, 0, sizeof(reg));
2764    reg.Register.File = TGSI_FILE_IMMEDIATE;
2765    reg.Register.Index = immpos;
2766    reg.Register.SwizzleX = TGSI_SWIZZLE_X;
2767    reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
2768    reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
2769    reg.Register.SwizzleW = TGSI_SWIZZLE_W;
2770 
2771    return reg;
2772 }
2773 
2774 
2775 /**
2776  * Allocate space for a union tgsi_immediate_data[4] immediate.
2777  * \return  the index/position of the immediate.
2778  */
2779 static unsigned
alloc_immediate_4(struct svga_shader_emitter_v10 * emit,const union tgsi_immediate_data imm[4])2780 alloc_immediate_4(struct svga_shader_emitter_v10 *emit,
2781                   const union tgsi_immediate_data imm[4])
2782 {
2783    unsigned n = emit->num_immediates++;
2784    assert(n < ARRAY_SIZE(emit->immediates));
2785    emit->immediates[n][0] = imm[0];
2786    emit->immediates[n][1] = imm[1];
2787    emit->immediates[n][2] = imm[2];
2788    emit->immediates[n][3] = imm[3];
2789    return n;
2790 }
2791 
2792 
2793 /**
2794  * Allocate space for a float[4] immediate.
2795  * \return  the index/position of the immediate.
2796  */
2797 static unsigned
alloc_immediate_float4(struct svga_shader_emitter_v10 * emit,float x,float y,float z,float w)2798 alloc_immediate_float4(struct svga_shader_emitter_v10 *emit,
2799                        float x, float y, float z, float w)
2800 {
2801    union tgsi_immediate_data imm[4];
2802    imm[0].Float = x;
2803    imm[1].Float = y;
2804    imm[2].Float = z;
2805    imm[3].Float = w;
2806    return alloc_immediate_4(emit, imm);
2807 }
2808 
2809 
2810 /**
2811  * Allocate space for an int[4] immediate.
2812  * \return  the index/position of the immediate.
2813  */
2814 static unsigned
alloc_immediate_int4(struct svga_shader_emitter_v10 * emit,int x,int y,int z,int w)2815 alloc_immediate_int4(struct svga_shader_emitter_v10 *emit,
2816                        int x, int y, int z, int w)
2817 {
2818    union tgsi_immediate_data imm[4];
2819    imm[0].Int = x;
2820    imm[1].Int = y;
2821    imm[2].Int = z;
2822    imm[3].Int = w;
2823    return alloc_immediate_4(emit, imm);
2824 }
2825 
2826 
2827 /**
2828  * Add a new immediate after the immediate block has been declared.
2829  * Any new immediates will be appended to the immediate block after the
2830  * shader has been parsed.
2831  * \return  the index/position of the immediate.
2832  */
2833 static unsigned
add_immediate_int(struct svga_shader_emitter_v10 * emit,int x)2834 add_immediate_int(struct svga_shader_emitter_v10 *emit, int x)
2835 {
2836    union tgsi_immediate_data imm[4];
2837    imm[0].Int = x;
2838    imm[1].Int = x+1;
2839    imm[2].Int = x+2;
2840    imm[3].Int = x+3;
2841 
2842    unsigned immpos = alloc_immediate_4(emit, imm);
2843    emit->num_new_immediates++;
2844 
2845    return immpos;
2846 }
2847 
2848 
2849 static unsigned
alloc_immediate_double2(struct svga_shader_emitter_v10 * emit,double x,double y)2850 alloc_immediate_double2(struct svga_shader_emitter_v10 *emit,
2851                         double x, double y)
2852 {
2853    unsigned n = emit->num_immediates++;
2854    assert(!emit->num_immediates_emitted);
2855    assert(n < ARRAY_SIZE(emit->immediates));
2856    emit->immediates_dbl[n][0] = x;
2857    emit->immediates_dbl[n][1] = y;
2858    return n;
2859 
2860 }
2861 
2862 
2863 /**
2864  * Allocate a shader input to store a system value.
2865  */
2866 static unsigned
alloc_system_value_index(struct svga_shader_emitter_v10 * emit,unsigned index)2867 alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index)
2868 {
2869    const unsigned n = emit->linkage.input_map_max + 1 + index;
2870    assert(index < ARRAY_SIZE(emit->system_value_indexes));
2871    emit->system_value_indexes[index] = n;
2872    return n;
2873 }
2874 
2875 
2876 /**
2877  * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10.
2878  */
2879 static bool
emit_vgpu10_immediate(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_immediate * imm)2880 emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit,
2881                       const struct tgsi_full_immediate *imm)
2882 {
2883    /* We don't actually emit any code here.  We just save the
2884     * immediate values and emit them later.
2885     */
2886    alloc_immediate_4(emit, imm->u);
2887    return true;
2888 }
2889 
2890 
2891 /**
2892  * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block
2893  * containing all the immediate values previously allocated
2894  * with alloc_immediate_4().
2895  */
2896 static bool
emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 * emit)2897 emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit)
2898 {
2899    VGPU10OpcodeToken0 token;
2900 
2901    assert(!emit->num_immediates_emitted);
2902 
2903    token.value = 0;
2904    token.opcodeType = VGPU10_OPCODE_CUSTOMDATA;
2905    token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER;
2906 
2907    emit->immediates_block_start_token =
2908       (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0);
2909 
2910    /* Note: no begin/end_emit_instruction() calls */
2911    emit_dword(emit, token.value);
2912    emit_dword(emit, 2 + 4 * emit->num_immediates);
2913    emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates);
2914 
2915    emit->num_immediates_emitted = emit->num_immediates;
2916 
2917    emit->immediates_block_next_token =
2918       (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0);
2919 
2920    return true;
2921 }
2922 
2923 
2924 /**
2925  * Reemit the immediate constant buffer block to include the new
2926  * immediates that are allocated after the block is declared. Those
2927  * immediates are used as constant indices to constant buffers.
2928  */
2929 static bool
reemit_immediates_block(struct svga_shader_emitter_v10 * emit)2930 reemit_immediates_block(struct svga_shader_emitter_v10 *emit)
2931 {
2932    unsigned num_tokens = emit_get_num_tokens(emit);
2933    unsigned num_new_immediates = emit->num_new_immediates;
2934 
2935    /* Reserve room for the new immediates */
2936    if (!reserve(emit, 4 * num_new_immediates))
2937       return false;
2938 
2939    /* Move the tokens after the immediates block to make room for the
2940     * new immediates.
2941     */
2942    VGPU10ProgramToken *tokens = (VGPU10ProgramToken *)emit->buf;
2943    char *next = (char *) (tokens + emit->immediates_block_next_token);
2944    char *new_next = (char *) (tokens + emit->immediates_block_next_token +
2945                                  num_new_immediates * 4);
2946 
2947    char *end = emit->ptr;
2948    unsigned len = end - next;
2949    memmove(new_next, next, len);
2950 
2951    /* Append the new immediates to the end of the immediates block */
2952    char *start = (char *) (tokens + emit->immediates_block_start_token+1);
2953    unsigned immediates_block_size = *(uint32 *)start;
2954 
2955    char *new_immediates = (char *)&emit->immediates[emit->num_immediates_emitted][0];
2956    *(uint32 *)start = immediates_block_size + 4 * num_new_immediates;
2957    memcpy(next, new_immediates, 4 * num_new_immediates * sizeof(uint32));
2958 
2959    emit->ptr = (char *) (tokens + num_tokens + 4 * num_new_immediates);
2960 
2961    return true;
2962 }
2963 
2964 
2965 
2966 /**
2967  * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10
2968  * interpolation mode.
2969  * \return a VGPU10_INTERPOLATION_x value
2970  */
2971 static unsigned
translate_interpolation(const struct svga_shader_emitter_v10 * emit,enum tgsi_interpolate_mode interp,enum tgsi_interpolate_loc interpolate_loc)2972 translate_interpolation(const struct svga_shader_emitter_v10 *emit,
2973                         enum tgsi_interpolate_mode interp,
2974                         enum tgsi_interpolate_loc interpolate_loc)
2975 {
2976    if (interp == TGSI_INTERPOLATE_COLOR) {
2977       interp = emit->key.fs.flatshade ?
2978          TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
2979    }
2980 
2981    switch (interp) {
2982    case TGSI_INTERPOLATE_CONSTANT:
2983       return VGPU10_INTERPOLATION_CONSTANT;
2984    case TGSI_INTERPOLATE_LINEAR:
2985       if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
2986          return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID;
2987       } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
2988                  emit->version >= 41) {
2989          return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE;
2990       } else {
2991          return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE;
2992       }
2993       break;
2994    case TGSI_INTERPOLATE_PERSPECTIVE:
2995       if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
2996          return VGPU10_INTERPOLATION_LINEAR_CENTROID;
2997       } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
2998                  emit->version >= 41) {
2999          return VGPU10_INTERPOLATION_LINEAR_SAMPLE;
3000       } else {
3001          return VGPU10_INTERPOLATION_LINEAR;
3002       }
3003       break;
3004    default:
3005       assert(!"Unexpected interpolation mode");
3006       return VGPU10_INTERPOLATION_CONSTANT;
3007    }
3008 }
3009 
3010 
3011 /**
3012  * Translate a TGSI property to VGPU10.
3013  * Don't emit any instructions yet, only need to gather the primitive property
3014  * information.  The output primitive topology might be changed later. The
3015  * final property instructions will be emitted as part of the pre-helper code.
3016  */
3017 static bool
emit_vgpu10_property(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_property * prop)3018 emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
3019                      const struct tgsi_full_property *prop)
3020 {
3021    static const VGPU10_PRIMITIVE primType[] = {
3022       VGPU10_PRIMITIVE_POINT,           /* MESA_PRIM_POINTS */
3023       VGPU10_PRIMITIVE_LINE,            /* MESA_PRIM_LINES */
3024       VGPU10_PRIMITIVE_LINE,            /* MESA_PRIM_LINE_LOOP */
3025       VGPU10_PRIMITIVE_LINE,            /* MESA_PRIM_LINE_STRIP */
3026       VGPU10_PRIMITIVE_TRIANGLE,        /* MESA_PRIM_TRIANGLES */
3027       VGPU10_PRIMITIVE_TRIANGLE,        /* MESA_PRIM_TRIANGLE_STRIP */
3028       VGPU10_PRIMITIVE_TRIANGLE,        /* MESA_PRIM_TRIANGLE_FAN */
3029       VGPU10_PRIMITIVE_UNDEFINED,       /* MESA_PRIM_QUADS */
3030       VGPU10_PRIMITIVE_UNDEFINED,       /* MESA_PRIM_QUAD_STRIP */
3031       VGPU10_PRIMITIVE_UNDEFINED,       /* MESA_PRIM_POLYGON */
3032       VGPU10_PRIMITIVE_LINE_ADJ,        /* MESA_PRIM_LINES_ADJACENCY */
3033       VGPU10_PRIMITIVE_LINE_ADJ,        /* MESA_PRIM_LINE_STRIP_ADJACENCY */
3034       VGPU10_PRIMITIVE_TRIANGLE_ADJ,    /* MESA_PRIM_TRIANGLES_ADJACENCY */
3035       VGPU10_PRIMITIVE_TRIANGLE_ADJ     /* MESA_PRIM_TRIANGLE_STRIP_ADJACENCY */
3036    };
3037 
3038    static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = {
3039       VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST,     /* MESA_PRIM_POINTS */
3040       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* MESA_PRIM_LINES */
3041       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* MESA_PRIM_LINE_LOOP */
3042       VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP,     /* MESA_PRIM_LINE_STRIP */
3043       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST,  /* MESA_PRIM_TRIANGLES */
3044       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* MESA_PRIM_TRIANGLE_STRIP */
3045       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* MESA_PRIM_TRIANGLE_FAN */
3046       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* MESA_PRIM_QUADS */
3047       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* MESA_PRIM_QUAD_STRIP */
3048       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* MESA_PRIM_POLYGON */
3049       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* MESA_PRIM_LINES_ADJACENCY */
3050       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* MESA_PRIM_LINE_STRIP_ADJACENCY */
3051       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* MESA_PRIM_TRIANGLES_ADJACENCY */
3052       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* MESA_PRIM_TRIANGLE_STRIP_ADJACENCY */
3053    };
3054 
3055    static const unsigned inputArraySize[] = {
3056       0,       /* VGPU10_PRIMITIVE_UNDEFINED */
3057       1,       /* VGPU10_PRIMITIVE_POINT */
3058       2,       /* VGPU10_PRIMITIVE_LINE */
3059       3,       /* VGPU10_PRIMITIVE_TRIANGLE */
3060       0,
3061       0,
3062       4,       /* VGPU10_PRIMITIVE_LINE_ADJ */
3063       6        /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */
3064    };
3065 
3066    switch (prop->Property.PropertyName) {
3067    case TGSI_PROPERTY_GS_INPUT_PRIM:
3068       assert(prop->u[0].Data < ARRAY_SIZE(primType));
3069       emit->gs.prim_type = primType[prop->u[0].Data];
3070       assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED);
3071       emit->gs.input_size = inputArraySize[emit->gs.prim_type];
3072       break;
3073 
3074    case TGSI_PROPERTY_GS_OUTPUT_PRIM:
3075       assert(prop->u[0].Data < ARRAY_SIZE(primTopology));
3076       emit->gs.prim_topology = primTopology[prop->u[0].Data];
3077       assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED);
3078       break;
3079 
3080    case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
3081       emit->gs.max_out_vertices = prop->u[0].Data;
3082       break;
3083 
3084    case TGSI_PROPERTY_GS_INVOCATIONS:
3085       emit->gs.invocations = prop->u[0].Data;
3086       break;
3087 
3088    case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
3089    case TGSI_PROPERTY_NEXT_SHADER:
3090    case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
3091       /* no-op */
3092       break;
3093 
3094    case TGSI_PROPERTY_TCS_VERTICES_OUT:
3095       /* This info is already captured in the shader key */
3096       break;
3097 
3098    case TGSI_PROPERTY_TES_PRIM_MODE:
3099       emit->tes.prim_mode = prop->u[0].Data;
3100       break;
3101 
3102    case TGSI_PROPERTY_TES_SPACING:
3103       emit->tes.spacing = prop->u[0].Data;
3104       break;
3105 
3106    case TGSI_PROPERTY_TES_VERTEX_ORDER_CW:
3107       emit->tes.vertices_order_cw = prop->u[0].Data;
3108       break;
3109 
3110    case TGSI_PROPERTY_TES_POINT_MODE:
3111       emit->tes.point_mode = prop->u[0].Data;
3112       break;
3113 
3114    case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH:
3115       emit->cs.block_width = prop->u[0].Data;
3116       break;
3117 
3118    case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT:
3119       emit->cs.block_height = prop->u[0].Data;
3120       break;
3121 
3122    case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH:
3123       emit->cs.block_depth = prop->u[0].Data;
3124       break;
3125 
3126    case TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL:
3127       emit->fs.forceEarlyDepthStencil = true;
3128       break;
3129 
3130    default:
3131       debug_printf("Unexpected TGSI property %s\n",
3132                    tgsi_property_names[prop->Property.PropertyName]);
3133    }
3134 
3135    return true;
3136 }
3137 
3138 
3139 static void
emit_property_instruction(struct svga_shader_emitter_v10 * emit,VGPU10OpcodeToken0 opcode0,unsigned nData,unsigned data)3140 emit_property_instruction(struct svga_shader_emitter_v10 *emit,
3141                           VGPU10OpcodeToken0 opcode0, unsigned nData,
3142                           unsigned data)
3143 {
3144    begin_emit_instruction(emit);
3145    emit_dword(emit, opcode0.value);
3146    if (nData)
3147       emit_dword(emit, data);
3148    end_emit_instruction(emit);
3149 }
3150 
3151 
3152 /**
3153  * Emit property instructions
3154  */
3155 static void
emit_property_instructions(struct svga_shader_emitter_v10 * emit)3156 emit_property_instructions(struct svga_shader_emitter_v10 *emit)
3157 {
3158    VGPU10OpcodeToken0 opcode0;
3159 
3160    assert(emit->unit == PIPE_SHADER_GEOMETRY);
3161 
3162    /* emit input primitive type declaration */
3163    opcode0.value = 0;
3164    opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE;
3165    opcode0.primitive = emit->gs.prim_type;
3166    emit_property_instruction(emit, opcode0, 0, 0);
3167 
3168    /* emit max output vertices */
3169    opcode0.value = 0;
3170    opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT;
3171    emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices);
3172 
3173    if (emit->version >= 50 && emit->gs.invocations > 0) {
3174       opcode0.value = 0;
3175       opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT;
3176       emit_property_instruction(emit, opcode0, 1, emit->gs.invocations);
3177    }
3178 }
3179 
3180 
3181 /**
3182  * A helper function to declare tessellator domain in a hull shader or
3183  * in the domain shader.
3184  */
3185 static void
emit_tessellator_domain(struct svga_shader_emitter_v10 * emit,enum mesa_prim prim_mode)3186 emit_tessellator_domain(struct svga_shader_emitter_v10 *emit,
3187                         enum mesa_prim prim_mode)
3188 {
3189    VGPU10OpcodeToken0 opcode0;
3190 
3191    opcode0.value = 0;
3192    opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_DOMAIN;
3193    switch (prim_mode) {
3194    case MESA_PRIM_QUADS:
3195    case MESA_PRIM_LINES:
3196       opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_QUAD;
3197       break;
3198    case MESA_PRIM_TRIANGLES:
3199       opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_TRI;
3200       break;
3201    default:
3202       debug_printf("Invalid tessellator prim mode %d\n", prim_mode);
3203       opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_UNDEFINED;
3204    }
3205    begin_emit_instruction(emit);
3206    emit_dword(emit, opcode0.value);
3207    end_emit_instruction(emit);
3208 }
3209 
3210 
3211 /**
3212  * Emit domain shader declarations.
3213  */
3214 static void
emit_domain_shader_declarations(struct svga_shader_emitter_v10 * emit)3215 emit_domain_shader_declarations(struct svga_shader_emitter_v10 *emit)
3216 {
3217    VGPU10OpcodeToken0 opcode0;
3218 
3219    assert(emit->unit == PIPE_SHADER_TESS_EVAL);
3220 
3221    /* Emit the input control point count */
3222    assert(emit->key.tes.vertices_per_patch >= 0 &&
3223           emit->key.tes.vertices_per_patch <= 32);
3224 
3225    opcode0.value = 0;
3226    opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT;
3227    opcode0.controlPointCount = emit->key.tes.vertices_per_patch;
3228    begin_emit_instruction(emit);
3229    emit_dword(emit, opcode0.value);
3230    end_emit_instruction(emit);
3231 
3232    emit_tessellator_domain(emit, emit->tes.prim_mode);
3233 
3234    /* Specify a max for swizzles of the domain point according to the
3235     * tessellator domain type.
3236     */
3237    emit->tes.swizzle_max = emit->tes.prim_mode == MESA_PRIM_TRIANGLES ?
3238                               TGSI_SWIZZLE_Z : TGSI_SWIZZLE_Y;
3239 }
3240 
3241 
3242 /**
3243  * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed
3244  * to implement some instructions.  We pre-allocate those values here
3245  * in the immediate constant buffer.
3246  */
3247 static void
alloc_common_immediates(struct svga_shader_emitter_v10 * emit)3248 alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
3249 {
3250    unsigned n = 0;
3251 
3252    emit->common_immediate_pos[n++] =
3253       alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f);
3254 
3255    if (emit->info.opcode_count[TGSI_OPCODE_LIT] > 0) {
3256       emit->common_immediate_pos[n++] =
3257          alloc_immediate_float4(emit, 128.0f, -128.0f, 0.0f, 0.0f);
3258    }
3259 
3260    emit->common_immediate_pos[n++] =
3261       alloc_immediate_int4(emit, 0, 1, 2, -1);
3262 
3263    emit->common_immediate_pos[n++] =
3264       alloc_immediate_int4(emit, 3, 4, 5, 6);
3265 
3266    if (emit->info.opcode_count[TGSI_OPCODE_IMSB] > 0 ||
3267        emit->info.opcode_count[TGSI_OPCODE_UMSB] > 0) {
3268       emit->common_immediate_pos[n++] =
3269          alloc_immediate_int4(emit, 31, 0, 0, 0);
3270    }
3271 
3272    if (emit->info.opcode_count[TGSI_OPCODE_UBFE] > 0 ||
3273        emit->info.opcode_count[TGSI_OPCODE_IBFE] > 0 ||
3274        emit->info.opcode_count[TGSI_OPCODE_BFI] > 0) {
3275       emit->common_immediate_pos[n++] =
3276          alloc_immediate_int4(emit, 32, 0, 0, 0);
3277    }
3278 
3279    if (emit->key.vs.attrib_puint_to_snorm) {
3280       emit->common_immediate_pos[n++] =
3281          alloc_immediate_float4(emit, -2.0f, 2.0f, 3.0f, -1.66666f);
3282    }
3283 
3284    if (emit->key.vs.attrib_puint_to_uscaled) {
3285       emit->common_immediate_pos[n++] =
3286          alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f);
3287    }
3288 
3289    if (emit->key.vs.attrib_puint_to_sscaled) {
3290       emit->common_immediate_pos[n++] =
3291          alloc_immediate_int4(emit, 22, 12, 2, 0);
3292 
3293       emit->common_immediate_pos[n++] =
3294          alloc_immediate_int4(emit, 22, 30, 0, 0);
3295    }
3296 
3297    if (emit->vposition.num_prescale > 1) {
3298       unsigned i;
3299       for (i = 0; i < emit->vposition.num_prescale; i+=4) {
3300          emit->common_immediate_pos[n++] =
3301             alloc_immediate_int4(emit, i, i+1, i+2, i+3);
3302       }
3303    }
3304 
3305    emit->immediates_dbl = (double (*)[2]) emit->immediates;
3306 
3307    if (emit->info.opcode_count[TGSI_OPCODE_DNEG] > 0) {
3308       emit->common_immediate_pos[n++] =
3309          alloc_immediate_double2(emit, -1.0, -1.0);
3310    }
3311 
3312    if (emit->info.opcode_count[TGSI_OPCODE_DSQRT] > 0 ||
3313        emit->info.opcode_count[TGSI_OPCODE_DTRUNC] > 0) {
3314       emit->common_immediate_pos[n++] =
3315          alloc_immediate_double2(emit, 0.0, 0.0);
3316       emit->common_immediate_pos[n++] =
3317          alloc_immediate_double2(emit, 1.0, 1.0);
3318    }
3319 
3320    if (emit->info.opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0) {
3321       emit->common_immediate_pos[n++] =
3322          alloc_immediate_float4(emit, 16.0f, -16.0f, 0.0, 0.0);
3323    }
3324 
3325    assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
3326 
3327    unsigned i;
3328 
3329    for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
3330       if (emit->key.tex[i].texel_bias) {
3331          /* Replace 0.0f if more immediate float value is needed */
3332          emit->common_immediate_pos[n++] =
3333             alloc_immediate_float4(emit, 0.0001f, 0.0f, 0.0f, 0.0f);
3334          break;
3335       }
3336    }
3337 
3338    /** TODO: allocate immediates for all possible element byte offset?
3339     */
3340    if (emit->raw_bufs) {
3341       unsigned i;
3342       for (i = 7; i < 12; i+=4) {
3343          emit->common_immediate_pos[n++] =
3344             alloc_immediate_int4(emit, i, (i+1), (i+2), (i+3));
3345       }
3346    }
3347 
3348    if (emit->info.indirect_files &
3349        (1 << TGSI_FILE_IMAGE | 1 << TGSI_FILE_BUFFER)) {
3350       unsigned i;
3351       for (i = 7; i < 8; i+=4) {
3352          emit->common_immediate_pos[n++] =
3353             alloc_immediate_int4(emit, i, (i+1), (i+2), (i+3));
3354       }
3355    }
3356 
3357    assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
3358    emit->num_common_immediates = n;
3359 }
3360 
3361 
3362 /**
3363  * Emit hull shader declarations.
3364 */
3365 static void
emit_hull_shader_declarations(struct svga_shader_emitter_v10 * emit)3366 emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit)
3367 {
3368    VGPU10OpcodeToken0 opcode0;
3369 
3370    /* Emit the input control point count */
3371    assert(emit->key.tcs.vertices_per_patch > 0 &&
3372           emit->key.tcs.vertices_per_patch <= 32);
3373 
3374    opcode0.value = 0;
3375    opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT;
3376    opcode0.controlPointCount = emit->key.tcs.vertices_per_patch;
3377    begin_emit_instruction(emit);
3378    emit_dword(emit, opcode0.value);
3379    end_emit_instruction(emit);
3380 
3381    /* Emit the output control point count */
3382    assert(emit->key.tcs.vertices_out >= 0 && emit->key.tcs.vertices_out <= 32);
3383 
3384    opcode0.value = 0;
3385    opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT;
3386    opcode0.controlPointCount = emit->key.tcs.vertices_out;
3387    begin_emit_instruction(emit);
3388    emit_dword(emit, opcode0.value);
3389    end_emit_instruction(emit);
3390 
3391    /* Emit tessellator domain */
3392    emit_tessellator_domain(emit, emit->key.tcs.prim_mode);
3393 
3394    /* Emit tessellator output primitive */
3395    opcode0.value = 0;
3396    opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE;
3397    if (emit->key.tcs.point_mode) {
3398       opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_POINT;
3399    }
3400    else if (emit->key.tcs.prim_mode == MESA_PRIM_LINES) {
3401       opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_LINE;
3402    }
3403    else {
3404       assert(emit->key.tcs.prim_mode == MESA_PRIM_QUADS ||
3405              emit->key.tcs.prim_mode == MESA_PRIM_TRIANGLES);
3406 
3407       if (emit->key.tcs.vertices_order_cw)
3408          opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CCW;
3409       else
3410          opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CW;
3411    }
3412    begin_emit_instruction(emit);
3413    emit_dword(emit, opcode0.value);
3414    end_emit_instruction(emit);
3415 
3416    /* Emit tessellator partitioning */
3417    opcode0.value = 0;
3418    opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_PARTITIONING;
3419    switch (emit->key.tcs.spacing) {
3420    case PIPE_TESS_SPACING_FRACTIONAL_ODD:
3421       opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD;
3422       break;
3423    case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
3424       opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN;
3425       break;
3426    case PIPE_TESS_SPACING_EQUAL:
3427       opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_INTEGER;
3428       break;
3429    default:
3430       debug_printf("invalid tessellator spacing %d\n", emit->key.tcs.spacing);
3431       opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_UNDEFINED;
3432    }
3433    begin_emit_instruction(emit);
3434    emit_dword(emit, opcode0.value);
3435    end_emit_instruction(emit);
3436 
3437    alloc_common_immediates(emit);
3438 
3439    /* Declare constant registers */
3440    emit_constant_declaration(emit);
3441 
3442    /* Declare samplers and resources */
3443    emit_sampler_declarations(emit);
3444    emit_resource_declarations(emit);
3445 
3446    /* Declare images */
3447    emit_image_declarations(emit);
3448 
3449    /* Declare shader buffers */
3450    emit_shader_buf_declarations(emit);
3451 
3452    /* Declare atomic buffers */
3453    emit_atomic_buf_declarations(emit);
3454 
3455    int nVertices = emit->key.tcs.vertices_per_patch;
3456    emit->tcs.imm_index =
3457       alloc_immediate_int4(emit, nVertices, nVertices, nVertices, 0);
3458 
3459    /* Now, emit the constant block containing all the immediates
3460     * declared by shader, as well as the extra ones seen above.
3461     */
3462    emit_vgpu10_immediates_block(emit);
3463 
3464 }
3465 
3466 
3467 /**
3468  * A helper function to determine if control point phase is needed.
3469  * Returns TRUE if there is control point output.
3470  */
3471 static bool
needs_control_point_phase(struct svga_shader_emitter_v10 * emit)3472 needs_control_point_phase(struct svga_shader_emitter_v10 *emit)
3473 {
3474    unsigned i;
3475 
3476    assert(emit->unit == PIPE_SHADER_TESS_CTRL);
3477 
3478    /* If output control point count does not match the input count,
3479     * we need a control point phase to explicitly set the output control
3480     * points.
3481     */
3482    if ((emit->key.tcs.vertices_per_patch != emit->key.tcs.vertices_out) &&
3483        emit->key.tcs.vertices_out)
3484       return true;
3485 
3486    for (i = 0; i < emit->info.num_outputs; i++) {
3487       switch (emit->info.output_semantic_name[i]) {
3488       case TGSI_SEMANTIC_PATCH:
3489       case TGSI_SEMANTIC_TESSOUTER:
3490       case TGSI_SEMANTIC_TESSINNER:
3491          break;
3492       default:
3493          return true;
3494       }
3495    }
3496    return false;
3497 }
3498 
3499 
3500 /**
3501  * A helper function to add shader signature for passthrough control point
3502  * phase. This signature is also generated for passthrough control point
3503  * phase from HLSL compiler and is needed by Metal Renderer.
3504  */
3505 static void
emit_passthrough_control_point_signature(struct svga_shader_emitter_v10 * emit)3506 emit_passthrough_control_point_signature(struct svga_shader_emitter_v10 *emit)
3507 {
3508    struct svga_shader_signature *sgn = &emit->signature;
3509    SVGA3dDXShaderSignatureEntry *sgnEntry;
3510    unsigned i;
3511 
3512    for (i = 0; i < emit->info.num_inputs; i++) {
3513       unsigned index = emit->linkage.input_map[i];
3514       enum tgsi_semantic sem_name = emit->info.input_semantic_name[i];
3515 
3516       sgnEntry = &sgn->inputs[sgn->header.numInputSignatures++];
3517 
3518       set_shader_signature_entry(sgnEntry, index,
3519                                  tgsi_semantic_to_sgn_name[sem_name],
3520                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3521                                  SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3522                                  SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3523 
3524       sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++];
3525 
3526       set_shader_signature_entry(sgnEntry, i,
3527                                  tgsi_semantic_to_sgn_name[sem_name],
3528                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3529                                  SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3530                                  SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3531    }
3532 }
3533 
3534 
3535 /**
3536  * A helper function to emit an instruction to start the control point phase
3537  * in the hull shader.
3538  */
3539 static void
emit_control_point_phase_instruction(struct svga_shader_emitter_v10 * emit)3540 emit_control_point_phase_instruction(struct svga_shader_emitter_v10 *emit)
3541 {
3542    VGPU10OpcodeToken0 opcode0;
3543 
3544    opcode0.value = 0;
3545    opcode0.opcodeType = VGPU10_OPCODE_HS_CONTROL_POINT_PHASE;
3546    begin_emit_instruction(emit);
3547    emit_dword(emit, opcode0.value);
3548    end_emit_instruction(emit);
3549 }
3550 
3551 
3552 /**
3553  * Start the hull shader control point phase
3554  */
3555 static bool
emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 * emit)3556 emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit)
3557 {
3558    /* If there is no control point output, skip the control point phase. */
3559    if (!needs_control_point_phase(emit)) {
3560       if (!emit->key.tcs.vertices_out) {
3561          /**
3562           * If the tcs does not explicitly generate any control point output
3563           * and the tes does not use any input control point, then
3564           * emit an empty control point phase with zero output control
3565           * point count.
3566           */
3567          emit_control_point_phase_instruction(emit);
3568 
3569          /**
3570           * Since this is an empty control point phase, we will need to
3571           * add input signatures when we parse the tcs again in the
3572           * patch constant phase.
3573           */
3574          emit->tcs.fork_phase_add_signature = true;
3575       }
3576       else {
3577          /**
3578           * Before skipping the control point phase, add the signature for
3579           * the passthrough control point.
3580           */
3581          emit_passthrough_control_point_signature(emit);
3582       }
3583       return false;
3584    }
3585 
3586    /* Start the control point phase in the hull shader */
3587    emit_control_point_phase_instruction(emit);
3588 
3589    /* Declare the output control point ID */
3590    if (emit->tcs.invocation_id_sys_index == INVALID_INDEX) {
3591       /* Add invocation id declaration if it does not exist */
3592       emit->tcs.invocation_id_sys_index = emit->info.num_system_values + 1;
3593    }
3594 
3595    emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3596                           VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID,
3597                           VGPU10_OPERAND_INDEX_0D,
3598                           0, 1,
3599                           VGPU10_NAME_UNDEFINED,
3600                           VGPU10_OPERAND_0_COMPONENT, 0,
3601                           0,
3602                           VGPU10_INTERPOLATION_CONSTANT, true,
3603                           SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
3604 
3605    if (emit->tcs.prim_id_index != INVALID_INDEX) {
3606       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3607                              VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
3608                              VGPU10_OPERAND_INDEX_0D,
3609                              0, 1,
3610                              VGPU10_NAME_UNDEFINED,
3611                              VGPU10_OPERAND_0_COMPONENT,
3612                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
3613                              0,
3614                              VGPU10_INTERPOLATION_UNDEFINED, true,
3615                              SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
3616    }
3617 
3618    return true;
3619 }
3620 
3621 
3622 /**
3623  * Start the hull shader patch constant phase and
3624  * do the second pass of the tcs translation and emit
3625  * the relevant declarations and instructions for this phase.
3626  */
3627 static bool
emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 * emit,struct tgsi_parse_context * parse)3628 emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 *emit,
3629                                       struct tgsi_parse_context *parse)
3630 {
3631    unsigned inst_number = 0;
3632    bool ret = true;
3633    VGPU10OpcodeToken0 opcode0;
3634 
3635    emit->skip_instruction = false;
3636 
3637    /* Start the patch constant phase */
3638    opcode0.value = 0;
3639    opcode0.opcodeType = VGPU10_OPCODE_HS_FORK_PHASE;
3640    begin_emit_instruction(emit);
3641    emit_dword(emit, opcode0.value);
3642    end_emit_instruction(emit);
3643 
3644    /* Set the current phase to patch constant phase */
3645    emit->tcs.control_point_phase = false;
3646 
3647    if (emit->tcs.prim_id_index != INVALID_INDEX) {
3648       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3649                              VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
3650                              VGPU10_OPERAND_INDEX_0D,
3651                              0, 1,
3652                              VGPU10_NAME_UNDEFINED,
3653                              VGPU10_OPERAND_0_COMPONENT,
3654                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
3655                              0,
3656                              VGPU10_INTERPOLATION_UNDEFINED, true,
3657                              SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
3658    }
3659 
3660    /* Emit declarations for this phase */
3661    emit->index_range.required =
3662       emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? true : false;
3663    emit_tcs_input_declarations(emit);
3664 
3665    if (emit->index_range.start_index != INVALID_INDEX) {
3666       emit_index_range_declaration(emit);
3667    }
3668 
3669    emit->index_range.required =
3670       emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? true : false;
3671    emit_tcs_output_declarations(emit);
3672 
3673    if (emit->index_range.start_index != INVALID_INDEX) {
3674       emit_index_range_declaration(emit);
3675    }
3676    emit->index_range.required = false;
3677 
3678    emit_temporaries_declaration(emit);
3679 
3680    /* Reset the token position to the first instruction token
3681     * in preparation for the second pass of the shader
3682     */
3683    parse->Position = emit->tcs.instruction_token_pos;
3684 
3685    while (!tgsi_parse_end_of_tokens(parse)) {
3686       tgsi_parse_token(parse);
3687 
3688       assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
3689       ret = emit_vgpu10_instruction(emit, inst_number++,
3690                                     &parse->FullToken.FullInstruction);
3691 
3692       /* Usually this applies to TCS only. If shader is reading output of
3693        * patch constant in fork phase, we should reemit all instructions
3694        * which are writting into output of patch constant in fork phase
3695        * to store results into temporaries.
3696        */
3697       assert(!(emit->reemit_instruction && emit->reemit_rawbuf_instruction));
3698       if (emit->reemit_instruction) {
3699          assert(emit->unit == PIPE_SHADER_TESS_CTRL);
3700          ret = emit_vgpu10_instruction(emit, inst_number,
3701                                        &parse->FullToken.FullInstruction);
3702       } else if (emit->reemit_rawbuf_instruction) {
3703          ret = emit_rawbuf_instruction(emit, inst_number,
3704                                        &parse->FullToken.FullInstruction);
3705       }
3706 
3707       if (!ret)
3708          return false;
3709    }
3710 
3711    return true;
3712 }
3713 
3714 
3715 /**
3716  * Emit the thread group declaration for compute shader.
3717  */
3718 static void
emit_compute_shader_declarations(struct svga_shader_emitter_v10 * emit)3719 emit_compute_shader_declarations(struct svga_shader_emitter_v10 *emit)
3720 {
3721    VGPU10OpcodeToken0 opcode0;
3722 
3723    opcode0.value = 0;
3724    opcode0.opcodeType = VGPU10_OPCODE_DCL_THREAD_GROUP;
3725    begin_emit_instruction(emit);
3726    emit_dword(emit, opcode0.value);
3727    emit_dword(emit, emit->cs.block_width);
3728    emit_dword(emit, emit->cs.block_height);
3729    emit_dword(emit, emit->cs.block_depth);
3730    end_emit_instruction(emit);
3731 }
3732 
3733 
3734 /**
3735  * Emit index range declaration.
3736  */
3737 static bool
emit_index_range_declaration(struct svga_shader_emitter_v10 * emit)3738 emit_index_range_declaration(struct svga_shader_emitter_v10 *emit)
3739 {
3740    if (emit->version < 50)
3741       return true;
3742 
3743    assert(emit->index_range.start_index != INVALID_INDEX);
3744    assert(emit->index_range.count != 0);
3745    assert(emit->index_range.required);
3746    assert(emit->index_range.operandType != VGPU10_NUM_OPERANDS);
3747    assert(emit->index_range.dim != 0);
3748    assert(emit->index_range.size != 0);
3749 
3750    VGPU10OpcodeToken0 opcode0;
3751    VGPU10OperandToken0 operand0;
3752 
3753    opcode0.value = 0;
3754    opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEX_RANGE;
3755 
3756    operand0.value = 0;
3757    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
3758    operand0.indexDimension = emit->index_range.dim;
3759    operand0.operandType = emit->index_range.operandType;
3760    operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
3761    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3762 
3763    if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D)
3764       operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3765 
3766    begin_emit_instruction(emit);
3767    emit_dword(emit, opcode0.value);
3768    emit_dword(emit, operand0.value);
3769 
3770    if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D) {
3771       emit_dword(emit, emit->index_range.size);
3772       emit_dword(emit, emit->index_range.start_index);
3773       emit_dword(emit, emit->index_range.count);
3774    }
3775    else {
3776       emit_dword(emit, emit->index_range.start_index);
3777       emit_dword(emit, emit->index_range.count);
3778    }
3779 
3780    end_emit_instruction(emit);
3781 
3782    /* Reset fields in emit->index_range struct except
3783     * emit->index_range.required which will be reset afterwards
3784     */
3785    emit->index_range.count = 0;
3786    emit->index_range.operandType = VGPU10_NUM_OPERANDS;
3787    emit->index_range.start_index = INVALID_INDEX;
3788    emit->index_range.size = 0;
3789    emit->index_range.dim = 0;
3790 
3791    return true;
3792 }
3793 
3794 
3795 /**
3796  * Emit a vgpu10 declaration "instruction".
3797  * \param index  the register index
3798  * \param size   array size of the operand. In most cases, it is 1,
3799  *               but for inputs to geometry shader, the array size varies
3800  *               depending on the primitive type.
3801  */
3802 static void
emit_decl_instruction(struct svga_shader_emitter_v10 * emit,VGPU10OpcodeToken0 opcode0,VGPU10OperandToken0 operand0,VGPU10NameToken name_token,unsigned index,unsigned size)3803 emit_decl_instruction(struct svga_shader_emitter_v10 *emit,
3804                       VGPU10OpcodeToken0 opcode0,
3805                       VGPU10OperandToken0 operand0,
3806                       VGPU10NameToken name_token,
3807                       unsigned index, unsigned size)
3808 {
3809    assert(opcode0.opcodeType);
3810    assert(operand0.mask ||
3811           (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT) ||
3812           (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH) ||
3813           (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK) ||
3814           (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) ||
3815           (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) ||
3816           (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID) ||
3817           (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK) ||
3818           (operand0.operandType == VGPU10_OPERAND_TYPE_STREAM));
3819 
3820    begin_emit_instruction(emit);
3821    emit_dword(emit, opcode0.value);
3822 
3823    emit_dword(emit, operand0.value);
3824 
3825    if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) {
3826       /* Next token is the index of the register to declare */
3827       emit_dword(emit, index);
3828    }
3829    else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) {
3830       /* Next token is the size of the register */
3831       emit_dword(emit, size);
3832 
3833       /* Followed by the index of the register */
3834       emit_dword(emit, index);
3835    }
3836 
3837    if (name_token.value) {
3838       emit_dword(emit, name_token.value);
3839    }
3840 
3841    end_emit_instruction(emit);
3842 }
3843 
3844 
3845 /**
3846  * Emit the declaration for a shader input.
3847  * \param opcodeType  opcode type, one of VGPU10_OPCODE_DCL_INPUTx
3848  * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x
3849  * \param dim         index dimension
3850  * \param index       the input register index
3851  * \param size        array size of the operand. In most cases, it is 1,
3852  *                    but for inputs to geometry shader, the array size varies
3853  *                    depending on the primitive type. For tessellation control
3854  *                    shader, the array size is the vertex count per patch.
3855  * \param name        one of VGPU10_NAME_x
3856  * \parma numComp     number of components
3857  * \param selMode     component selection mode
3858  * \param usageMask   bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
3859  * \param interpMode  interpolation mode
3860  */
3861 static void
emit_input_declaration(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcodeType,VGPU10_OPERAND_TYPE operandType,VGPU10_OPERAND_INDEX_DIMENSION dim,unsigned index,unsigned size,VGPU10_SYSTEM_NAME name,VGPU10_OPERAND_NUM_COMPONENTS numComp,VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,unsigned usageMask,VGPU10_INTERPOLATION_MODE interpMode,bool addSignature,SVGA3dDXSignatureSemanticName sgnName)3862 emit_input_declaration(struct svga_shader_emitter_v10 *emit,
3863                        VGPU10_OPCODE_TYPE opcodeType,
3864                        VGPU10_OPERAND_TYPE operandType,
3865                        VGPU10_OPERAND_INDEX_DIMENSION dim,
3866                        unsigned index, unsigned size,
3867                        VGPU10_SYSTEM_NAME name,
3868                        VGPU10_OPERAND_NUM_COMPONENTS numComp,
3869                        VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,
3870                        unsigned usageMask,
3871                        VGPU10_INTERPOLATION_MODE interpMode,
3872                        bool addSignature,
3873                        SVGA3dDXSignatureSemanticName sgnName)
3874 {
3875    VGPU10OpcodeToken0 opcode0;
3876    VGPU10OperandToken0 operand0;
3877    VGPU10NameToken name_token;
3878 
3879    assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
3880    assert(opcodeType == VGPU10_OPCODE_DCL_INPUT ||
3881           opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV ||
3882           opcodeType == VGPU10_OPCODE_DCL_INPUT_SGV ||
3883           opcodeType == VGPU10_OPCODE_DCL_INPUT_PS ||
3884           opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SIV ||
3885           opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV);
3886    assert(operandType == VGPU10_OPERAND_TYPE_INPUT ||
3887           operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID ||
3888           operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK ||
3889           operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID ||
3890           operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID ||
3891           operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT ||
3892           operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT ||
3893           operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT ||
3894           operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID ||
3895           operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID ||
3896           operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP);
3897 
3898    assert(numComp <= VGPU10_OPERAND_4_COMPONENT);
3899    assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
3900    assert(dim <= VGPU10_OPERAND_INDEX_3D);
3901    assert(name == VGPU10_NAME_UNDEFINED ||
3902           name == VGPU10_NAME_POSITION ||
3903           name == VGPU10_NAME_INSTANCE_ID ||
3904           name == VGPU10_NAME_VERTEX_ID ||
3905           name == VGPU10_NAME_PRIMITIVE_ID ||
3906           name == VGPU10_NAME_IS_FRONT_FACE ||
3907           name == VGPU10_NAME_SAMPLE_INDEX ||
3908           name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
3909           name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX);
3910 
3911    assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED ||
3912           interpMode == VGPU10_INTERPOLATION_CONSTANT ||
3913           interpMode == VGPU10_INTERPOLATION_LINEAR ||
3914           interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID ||
3915           interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE ||
3916           interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID ||
3917           interpMode == VGPU10_INTERPOLATION_LINEAR_SAMPLE ||
3918           interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE);
3919 
3920    check_register_index(emit, opcodeType, index);
3921 
3922    opcode0.value = operand0.value = name_token.value = 0;
3923 
3924    opcode0.opcodeType = opcodeType;
3925    opcode0.interpolationMode = interpMode;
3926 
3927    operand0.operandType = operandType;
3928    operand0.numComponents = numComp;
3929    operand0.selectionMode = selMode;
3930    operand0.mask = usageMask;
3931    operand0.indexDimension = dim;
3932    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3933    if (dim == VGPU10_OPERAND_INDEX_2D)
3934       operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3935 
3936    name_token.name = name;
3937 
3938    emit_decl_instruction(emit, opcode0, operand0, name_token, index, size);
3939 
3940    if (addSignature) {
3941       struct svga_shader_signature *sgn = &emit->signature;
3942       if (operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT) {
3943          /* Set patch constant signature */
3944          SVGA3dDXShaderSignatureEntry *sgnEntry =
3945             &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
3946          set_shader_signature_entry(sgnEntry, index,
3947                                     sgnName, usageMask,
3948                                     SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3949                                     SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3950 
3951       } else if (operandType == VGPU10_OPERAND_TYPE_INPUT ||
3952                  operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT) {
3953          /* Set input signature */
3954          SVGA3dDXShaderSignatureEntry *sgnEntry =
3955             &sgn->inputs[sgn->header.numInputSignatures++];
3956          set_shader_signature_entry(sgnEntry, index,
3957                                     sgnName, usageMask,
3958                                     SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3959                                     SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3960       }
3961    }
3962 
3963    if (emit->index_range.required) {
3964       /* Here, index_range declaration is only applicable for opcodeType
3965        * VGPU10_OPCODE_DCL_INPUT and VGPU10_OPCODE_DCL_INPUT_PS and
3966        * for operandType VGPU10_OPERAND_TYPE_INPUT,
3967        * VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT and
3968        * VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT.
3969        */
3970       if ((opcodeType != VGPU10_OPCODE_DCL_INPUT &&
3971            opcodeType != VGPU10_OPCODE_DCL_INPUT_PS) ||
3972           (operandType != VGPU10_OPERAND_TYPE_INPUT &&
3973            operandType != VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT &&
3974            operandType != VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT)) {
3975          if (emit->index_range.start_index != INVALID_INDEX) {
3976             emit_index_range_declaration(emit);
3977          }
3978          return;
3979       }
3980 
3981       if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) {
3982          /* Need record new index_range */
3983          emit->index_range.count = 1;
3984          emit->index_range.operandType = operandType;
3985          emit->index_range.start_index = index;
3986          emit->index_range.size = size;
3987          emit->index_range.dim = dim;
3988       }
3989       else if (index !=
3990                (emit->index_range.start_index + emit->index_range.count) ||
3991                emit->index_range.operandType != operandType) {
3992          /* Input index is not contiguous with index range or operandType is
3993           * different from index range's operandType. We need to emit current
3994           * index_range first and then start recording next index range.
3995           */
3996          emit_index_range_declaration(emit);
3997 
3998          emit->index_range.count = 1;
3999          emit->index_range.operandType = operandType;
4000          emit->index_range.start_index = index;
4001          emit->index_range.size = size;
4002          emit->index_range.dim = dim;
4003       }
4004       else if (emit->index_range.operandType == operandType) {
4005          /* Since input index is contiguous with index range and operandType
4006           * is same as index range's operandType, increment index range count.
4007           */
4008          emit->index_range.count++;
4009       }
4010    }
4011 }
4012 
4013 
4014 /**
4015  * Emit the declaration for a shader output.
4016  * \param type  one of VGPU10_OPCODE_DCL_OUTPUTx
4017  * \param index  the output register index
4018  * \param name  one of VGPU10_NAME_x
4019  * \param usageMask  bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
4020  */
4021 static void
emit_output_declaration(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE type,unsigned index,VGPU10_SYSTEM_NAME name,unsigned writemask,bool addSignature,SVGA3dDXSignatureSemanticName sgnName)4022 emit_output_declaration(struct svga_shader_emitter_v10 *emit,
4023                         VGPU10_OPCODE_TYPE type, unsigned index,
4024                         VGPU10_SYSTEM_NAME name,
4025                         unsigned writemask,
4026                         bool addSignature,
4027                         SVGA3dDXSignatureSemanticName sgnName)
4028 {
4029    VGPU10OpcodeToken0 opcode0;
4030    VGPU10OperandToken0 operand0;
4031    VGPU10NameToken name_token;
4032 
4033    assert(writemask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
4034    assert(type == VGPU10_OPCODE_DCL_OUTPUT ||
4035           type == VGPU10_OPCODE_DCL_OUTPUT_SGV ||
4036           type == VGPU10_OPCODE_DCL_OUTPUT_SIV);
4037    assert(name == VGPU10_NAME_UNDEFINED ||
4038           name == VGPU10_NAME_POSITION ||
4039           name == VGPU10_NAME_PRIMITIVE_ID ||
4040           name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
4041           name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX ||
4042           name == VGPU10_NAME_CLIP_DISTANCE);
4043 
4044    check_register_index(emit, type, index);
4045 
4046    opcode0.value = operand0.value = name_token.value = 0;
4047 
4048    opcode0.opcodeType = type;
4049    operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
4050    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
4051    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
4052    operand0.mask = writemask;
4053    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
4054    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
4055 
4056    name_token.name = name;
4057 
4058    emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
4059 
4060    /* Capture output signature */
4061    if (addSignature) {
4062       struct svga_shader_signature *sgn = &emit->signature;
4063       SVGA3dDXShaderSignatureEntry *sgnEntry =
4064          &sgn->outputs[sgn->header.numOutputSignatures++];
4065       set_shader_signature_entry(sgnEntry, index,
4066                                  sgnName, writemask,
4067                                  SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4068                                  SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4069    }
4070 
4071    if (emit->index_range.required) {
4072       /* Here, index_range declaration is only applicable for opcodeType
4073        * VGPU10_OPCODE_DCL_OUTPUT and for operandType
4074        * VGPU10_OPERAND_TYPE_OUTPUT.
4075        */
4076       if (type != VGPU10_OPCODE_DCL_OUTPUT) {
4077          if (emit->index_range.start_index != INVALID_INDEX) {
4078             emit_index_range_declaration(emit);
4079          }
4080          return;
4081       }
4082 
4083       if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) {
4084          /* Need record new index_range */
4085          emit->index_range.count = 1;
4086          emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
4087          emit->index_range.start_index = index;
4088          emit->index_range.size = 1;
4089          emit->index_range.dim = VGPU10_OPERAND_INDEX_1D;
4090       }
4091       else if (index !=
4092                (emit->index_range.start_index + emit->index_range.count)) {
4093          /* Output index is not contiguous with index range. We need to
4094           * emit current index_range first and then start recording next
4095           * index range.
4096           */
4097          emit_index_range_declaration(emit);
4098 
4099          emit->index_range.count = 1;
4100          emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
4101          emit->index_range.start_index = index;
4102          emit->index_range.size = 1;
4103          emit->index_range.dim = VGPU10_OPERAND_INDEX_1D;
4104       }
4105       else {
4106          /* Since output index is contiguous with index range, increment
4107           * index range count.
4108           */
4109          emit->index_range.count++;
4110       }
4111    }
4112 }
4113 
4114 
4115 /**
4116  * Emit the declaration for the fragment depth output.
4117  */
4118 static void
emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 * emit)4119 emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit)
4120 {
4121    VGPU10OpcodeToken0 opcode0;
4122    VGPU10OperandToken0 operand0;
4123    VGPU10NameToken name_token;
4124 
4125    assert(emit->unit == PIPE_SHADER_FRAGMENT);
4126 
4127    opcode0.value = operand0.value = name_token.value = 0;
4128 
4129    opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
4130    operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
4131    operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
4132    operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
4133    operand0.mask = 0;
4134 
4135    emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
4136 }
4137 
4138 
4139 /**
4140  * Emit the declaration for the fragment sample mask/coverage output.
4141  */
4142 static void
emit_samplemask_output_declaration(struct svga_shader_emitter_v10 * emit)4143 emit_samplemask_output_declaration(struct svga_shader_emitter_v10 *emit)
4144 {
4145    VGPU10OpcodeToken0 opcode0;
4146    VGPU10OperandToken0 operand0;
4147    VGPU10NameToken name_token;
4148 
4149    assert(emit->unit == PIPE_SHADER_FRAGMENT);
4150    assert(emit->version >= 41);
4151 
4152    opcode0.value = operand0.value = name_token.value = 0;
4153 
4154    opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
4155    operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
4156    operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
4157    operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
4158    operand0.mask = 0;
4159 
4160    emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
4161 }
4162 
4163 
4164 /**
4165  * Emit output declarations for fragment shader.
4166  */
4167 static void
emit_fs_output_declarations(struct svga_shader_emitter_v10 * emit)4168 emit_fs_output_declarations(struct svga_shader_emitter_v10 *emit)
4169 {
4170    unsigned int i;
4171 
4172    for (i = 0; i < emit->info.num_outputs; i++) {
4173       /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
4174       const enum tgsi_semantic semantic_name =
4175          emit->info.output_semantic_name[i];
4176       const unsigned semantic_index = emit->info.output_semantic_index[i];
4177       unsigned index = i;
4178 
4179       if (semantic_name == TGSI_SEMANTIC_COLOR) {
4180          assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index));
4181 
4182          emit->fs.color_out_index[semantic_index] = index;
4183 
4184          emit->fs.num_color_outputs = MAX2(emit->fs.num_color_outputs,
4185                                               index + 1);
4186 
4187          /* The semantic index is the shader's color output/buffer index */
4188          emit_output_declaration(emit,
4189                                  VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
4190                                  VGPU10_NAME_UNDEFINED,
4191                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4192                                  true,
4193                                  map_tgsi_semantic_to_sgn_name(semantic_name));
4194 
4195          if (semantic_index == 0) {
4196             if (emit->key.fs.write_color0_to_n_cbufs > 1) {
4197                /* Emit declarations for the additional color outputs
4198                 * for broadcasting.
4199                 */
4200                unsigned j;
4201                for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) {
4202                   /* Allocate a new output index */
4203                   unsigned idx = emit->info.num_outputs + j - 1;
4204                   emit->fs.color_out_index[j] = idx;
4205                   emit_output_declaration(emit,
4206                                         VGPU10_OPCODE_DCL_OUTPUT, idx,
4207                                         VGPU10_NAME_UNDEFINED,
4208                                         VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4209                                         true,
4210                                         map_tgsi_semantic_to_sgn_name(semantic_name));
4211                   emit->info.output_semantic_index[idx] = j;
4212                }
4213 
4214                emit->fs.num_color_outputs =
4215                      emit->key.fs.write_color0_to_n_cbufs;
4216             }
4217          }
4218       }
4219       else if (semantic_name == TGSI_SEMANTIC_POSITION) {
4220          /* Fragment depth output */
4221          emit_fragdepth_output_declaration(emit);
4222       }
4223       else if (semantic_name == TGSI_SEMANTIC_SAMPLEMASK) {
4224          /* Sample mask output */
4225          emit_samplemask_output_declaration(emit);
4226       }
4227       else {
4228          assert(!"Bad output semantic name");
4229       }
4230    }
4231 }
4232 
4233 
4234 /**
4235  * Emit common output declaration for vertex processing.
4236  */
4237 static void
emit_vertex_output_declaration(struct svga_shader_emitter_v10 * emit,unsigned index,unsigned writemask,bool addSignature)4238 emit_vertex_output_declaration(struct svga_shader_emitter_v10 *emit,
4239                                unsigned index, unsigned writemask,
4240                                bool addSignature)
4241 {
4242    const enum tgsi_semantic semantic_name =
4243          emit->info.output_semantic_name[index];
4244    const unsigned semantic_index = emit->info.output_semantic_index[index];
4245    unsigned name, type;
4246    unsigned final_mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
4247 
4248    assert(emit->unit != PIPE_SHADER_FRAGMENT &&
4249           emit->unit != PIPE_SHADER_COMPUTE);
4250 
4251    switch (semantic_name) {
4252    case TGSI_SEMANTIC_POSITION:
4253       if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4254          /* position will be declared in control point only */
4255          assert(emit->tcs.control_point_phase);
4256          type = VGPU10_OPCODE_DCL_OUTPUT;
4257          name = VGPU10_NAME_UNDEFINED;
4258          emit_output_declaration(emit, type, index, name, final_mask, true,
4259                                  SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4260          return;
4261       }
4262       else {
4263          type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
4264          name = VGPU10_NAME_POSITION;
4265       }
4266       /* Save the index of the vertex position output register */
4267       emit->vposition.out_index = index;
4268       break;
4269    case TGSI_SEMANTIC_CLIPDIST:
4270       type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
4271       name = VGPU10_NAME_CLIP_DISTANCE;
4272       /* save the starting index of the clip distance output register */
4273       if (semantic_index == 0)
4274          emit->clip_dist_out_index = index;
4275       final_mask = apply_clip_plane_mask(emit, writemask, semantic_index);
4276       if (final_mask == 0x0)
4277          return; /* discard this do-nothing declaration */
4278       break;
4279    case TGSI_SEMANTIC_CLIPVERTEX:
4280       type = VGPU10_OPCODE_DCL_OUTPUT;
4281       name = VGPU10_NAME_UNDEFINED;
4282       emit->clip_vertex_out_index = index;
4283       break;
4284    default:
4285       /* generic output */
4286       type = VGPU10_OPCODE_DCL_OUTPUT;
4287       name = VGPU10_NAME_UNDEFINED;
4288    }
4289 
4290    emit_output_declaration(emit, type, index, name, final_mask, addSignature,
4291                            map_tgsi_semantic_to_sgn_name(semantic_name));
4292 }
4293 
4294 
4295 /**
4296  * Emit declaration for outputs in vertex shader.
4297  */
4298 static void
emit_vs_output_declarations(struct svga_shader_emitter_v10 * emit)4299 emit_vs_output_declarations(struct svga_shader_emitter_v10 *emit)
4300 {
4301    unsigned i;
4302    for (i = 0; i < emit->info.num_outputs; i++) {
4303       emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], true);
4304    }
4305 }
4306 
4307 
4308 /**
4309  * A helper function to determine the writemask for an output
4310  * for the specified stream.
4311  */
4312 static unsigned
output_writemask_for_stream(unsigned stream,uint8_t output_streams,uint8_t output_usagemask)4313 output_writemask_for_stream(unsigned stream, uint8_t output_streams,
4314                             uint8_t output_usagemask)
4315 {
4316    unsigned i;
4317    unsigned writemask = 0;
4318 
4319    for (i = 0; i < 4; i++) {
4320       if ((output_streams & 0x3) == stream)
4321          writemask |= (VGPU10_OPERAND_4_COMPONENT_MASK_X << i);
4322       output_streams >>= 2;
4323    }
4324    return writemask & output_usagemask;
4325 }
4326 
4327 
4328 /**
4329  * Emit declaration for outputs in geometry shader.
4330  */
4331 static void
emit_gs_output_declarations(struct svga_shader_emitter_v10 * emit)4332 emit_gs_output_declarations(struct svga_shader_emitter_v10 *emit)
4333 {
4334    unsigned i;
4335    VGPU10OpcodeToken0 opcode0;
4336    unsigned numStreamsSupported = 1;
4337    int s;
4338 
4339    if (emit->version >= 50) {
4340       numStreamsSupported = ARRAY_SIZE(emit->info.num_stream_output_components);
4341    }
4342 
4343    /**
4344     * Start emitting from the last stream first, so we end with
4345     * stream 0, so any of the auxiliary output declarations will
4346     * go to stream 0.
4347     */
4348    for (s = numStreamsSupported-1; s >= 0; s--) {
4349 
4350       if (emit->info.num_stream_output_components[s] == 0)
4351          continue;
4352 
4353       if (emit->version >= 50) {
4354          /* DCL_STREAM stream */
4355          begin_emit_instruction(emit);
4356          emit_opcode(emit, VGPU10_OPCODE_DCL_STREAM, false);
4357          emit_stream_register(emit, s);
4358          end_emit_instruction(emit);
4359       }
4360 
4361       /* emit output primitive topology declaration */
4362       opcode0.value = 0;
4363       opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY;
4364       opcode0.primitiveTopology = emit->gs.prim_topology;
4365       emit_property_instruction(emit, opcode0, 0, 0);
4366 
4367       for (i = 0; i < emit->info.num_outputs; i++) {
4368          unsigned writemask;
4369 
4370          /* find out the writemask for this stream */
4371          writemask = output_writemask_for_stream(s, emit->info.output_streams[i],
4372                                                  emit->output_usage_mask[i]);
4373 
4374          if (writemask) {
4375             enum tgsi_semantic semantic_name =
4376                emit->info.output_semantic_name[i];
4377 
4378             /* TODO: Still need to take care of a special case where a
4379              *       single varying spans across multiple output registers.
4380              */
4381             switch(semantic_name) {
4382             case TGSI_SEMANTIC_PRIMID:
4383                emit_output_declaration(emit,
4384                                        VGPU10_OPCODE_DCL_OUTPUT_SGV, i,
4385                                        VGPU10_NAME_PRIMITIVE_ID,
4386                                        VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4387                                        false,
4388                                        map_tgsi_semantic_to_sgn_name(semantic_name));
4389                break;
4390             case TGSI_SEMANTIC_LAYER:
4391                emit_output_declaration(emit,
4392                                        VGPU10_OPCODE_DCL_OUTPUT_SIV, i,
4393                                        VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX,
4394                                        VGPU10_OPERAND_4_COMPONENT_MASK_X,
4395                                        false,
4396                                        map_tgsi_semantic_to_sgn_name(semantic_name));
4397                break;
4398             case TGSI_SEMANTIC_VIEWPORT_INDEX:
4399                emit_output_declaration(emit,
4400                                        VGPU10_OPCODE_DCL_OUTPUT_SIV, i,
4401                                        VGPU10_NAME_VIEWPORT_ARRAY_INDEX,
4402                                        VGPU10_OPERAND_4_COMPONENT_MASK_X,
4403                                        false,
4404                                        map_tgsi_semantic_to_sgn_name(semantic_name));
4405                emit->gs.viewport_index_out_index = i;
4406                break;
4407             default:
4408                emit_vertex_output_declaration(emit, i, writemask, false);
4409             }
4410          }
4411       }
4412    }
4413 
4414    /* For geometry shader outputs, it is possible the same register is
4415     * declared multiple times for different streams. So to avoid
4416     * redundant signature entries, geometry shader output signature is done
4417     * outside of the declaration.
4418     */
4419    struct svga_shader_signature *sgn = &emit->signature;
4420    SVGA3dDXShaderSignatureEntry *sgnEntry;
4421 
4422    for (i = 0; i < emit->info.num_outputs; i++) {
4423       if (emit->output_usage_mask[i]) {
4424          enum tgsi_semantic sem_name = emit->info.output_semantic_name[i];
4425 
4426          sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++];
4427          set_shader_signature_entry(sgnEntry, i,
4428                                     map_tgsi_semantic_to_sgn_name(sem_name),
4429                                     emit->output_usage_mask[i],
4430                                     SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4431                                     SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4432       }
4433    }
4434 }
4435 
4436 
4437 /**
4438  * Emit the declaration for the tess inner/outer output.
4439  * \param opcodeType either VGPU10_OPCODE_DCL_OUTPUT_SIV or _INPUT_SIV
4440  * \param operandType either VGPU10_OPERAND_TYPE_OUTPUT or _INPUT
4441  * \param name VGPU10_NAME_FINAL_*_TESSFACTOR value
4442  */
4443 static void
emit_tesslevel_declaration(struct svga_shader_emitter_v10 * emit,unsigned index,unsigned opcodeType,unsigned operandType,VGPU10_SYSTEM_NAME name,SVGA3dDXSignatureSemanticName sgnName)4444 emit_tesslevel_declaration(struct svga_shader_emitter_v10 *emit,
4445                            unsigned index, unsigned opcodeType,
4446                            unsigned operandType, VGPU10_SYSTEM_NAME name,
4447                            SVGA3dDXSignatureSemanticName sgnName)
4448 {
4449    VGPU10OpcodeToken0 opcode0;
4450    VGPU10OperandToken0 operand0;
4451    VGPU10NameToken name_token;
4452 
4453    assert(emit->version >= 50);
4454    assert(name >= VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR ||
4455           (emit->key.tcs.prim_mode == MESA_PRIM_LINES &&
4456            name == VGPU10_NAME_UNDEFINED));
4457    assert(name <= VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
4458 
4459    assert(operandType == VGPU10_OPERAND_TYPE_OUTPUT ||
4460           operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT);
4461 
4462    opcode0.value = operand0.value = name_token.value = 0;
4463 
4464    opcode0.opcodeType = opcodeType;
4465    operand0.operandType = operandType;
4466    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
4467    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
4468    operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
4469    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
4470    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
4471 
4472    name_token.name = name;
4473    emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
4474 
4475    /* Capture patch constant signature */
4476    struct svga_shader_signature *sgn = &emit->signature;
4477    SVGA3dDXShaderSignatureEntry *sgnEntry =
4478       &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
4479    set_shader_signature_entry(sgnEntry, index,
4480                               sgnName, VGPU10_OPERAND_4_COMPONENT_MASK_X,
4481                               SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4482                               SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4483 }
4484 
4485 
4486 /**
4487  * Emit output declarations for tessellation control shader.
4488  */
4489 static void
emit_tcs_output_declarations(struct svga_shader_emitter_v10 * emit)4490 emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit)
4491 {
4492    unsigned int i;
4493    unsigned outputIndex = emit->num_outputs;
4494    struct svga_shader_signature *sgn = &emit->signature;
4495 
4496    /**
4497     * Initialize patch_generic_out_count so it won't be counted twice
4498     * since this function is called twice, one for control point phase
4499     * and another time for patch constant phase.
4500     */
4501    emit->tcs.patch_generic_out_count = 0;
4502 
4503    for (i = 0; i < emit->info.num_outputs; i++) {
4504       unsigned index = i;
4505       const enum tgsi_semantic semantic_name =
4506          emit->info.output_semantic_name[i];
4507 
4508       switch (semantic_name) {
4509       case TGSI_SEMANTIC_TESSINNER:
4510          emit->tcs.inner.tgsi_index = i;
4511 
4512          /* skip per-patch output declarations in control point phase */
4513          if (emit->tcs.control_point_phase)
4514             break;
4515 
4516          emit->tcs.inner.out_index = outputIndex;
4517          switch (emit->key.tcs.prim_mode) {
4518          case MESA_PRIM_QUADS:
4519             emit_tesslevel_declaration(emit, outputIndex++,
4520                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4521                VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4522                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4523 
4524             emit_tesslevel_declaration(emit, outputIndex++,
4525                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4526                VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4527                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4528             break;
4529          case MESA_PRIM_TRIANGLES:
4530             emit_tesslevel_declaration(emit, outputIndex++,
4531                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4532                VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
4533                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
4534             break;
4535          case MESA_PRIM_LINES:
4536             break;
4537          default:
4538             debug_printf("Unsupported primitive type");
4539          }
4540          break;
4541 
4542       case TGSI_SEMANTIC_TESSOUTER:
4543          emit->tcs.outer.tgsi_index = i;
4544 
4545          /* skip per-patch output declarations in control point phase */
4546          if (emit->tcs.control_point_phase)
4547             break;
4548 
4549          emit->tcs.outer.out_index = outputIndex;
4550          switch (emit->key.tcs.prim_mode) {
4551          case MESA_PRIM_QUADS:
4552             for (int j = 0; j < 4; j++) {
4553                emit_tesslevel_declaration(emit, outputIndex++,
4554                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4555                   VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j,
4556                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j);
4557             }
4558             break;
4559          case MESA_PRIM_TRIANGLES:
4560             for (int j = 0; j < 3; j++) {
4561                emit_tesslevel_declaration(emit, outputIndex++,
4562                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4563                   VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j,
4564                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j);
4565             }
4566             break;
4567          case MESA_PRIM_LINES:
4568             for (int j = 0; j < 2; j++) {
4569                emit_tesslevel_declaration(emit, outputIndex++,
4570                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4571                   VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j,
4572                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j);
4573             }
4574             break;
4575          default:
4576             debug_printf("Unsupported primitive type");
4577          }
4578          break;
4579 
4580       case TGSI_SEMANTIC_PATCH:
4581          if (emit->tcs.patch_generic_out_index == INVALID_INDEX)
4582             emit->tcs.patch_generic_out_index= i;
4583          emit->tcs.patch_generic_out_count++;
4584 
4585          /* skip per-patch output declarations in control point phase */
4586          if (emit->tcs.control_point_phase)
4587             break;
4588 
4589          emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, index,
4590                                  VGPU10_NAME_UNDEFINED,
4591                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4592                                  false,
4593                                  map_tgsi_semantic_to_sgn_name(semantic_name));
4594 
4595          SVGA3dDXShaderSignatureEntry *sgnEntry =
4596             &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
4597          set_shader_signature_entry(sgnEntry, index,
4598                                     map_tgsi_semantic_to_sgn_name(semantic_name),
4599                                     VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4600                                     SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4601                                     SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4602 
4603          break;
4604 
4605       default:
4606          /* save the starting index of control point outputs */
4607          if (emit->tcs.control_point_out_index == INVALID_INDEX)
4608             emit->tcs.control_point_out_index = i;
4609          emit->tcs.control_point_out_count++;
4610 
4611          /* skip control point output declarations in patch constant phase */
4612          if (!emit->tcs.control_point_phase)
4613             break;
4614 
4615          emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i],
4616                                         true);
4617 
4618       }
4619    }
4620 
4621    if (emit->tcs.control_point_phase) {
4622       /**
4623        * Add missing control point output in control point phase.
4624        */
4625       if (emit->tcs.control_point_out_index == INVALID_INDEX) {
4626          /* use register index after tessellation factors */
4627          switch (emit->key.tcs.prim_mode) {
4628          case MESA_PRIM_QUADS:
4629             emit->tcs.control_point_out_index = outputIndex + 6;
4630             break;
4631          case MESA_PRIM_TRIANGLES:
4632             emit->tcs.control_point_out_index = outputIndex + 4;
4633             break;
4634          default:
4635             emit->tcs.control_point_out_index = outputIndex + 2;
4636             break;
4637          }
4638          emit->tcs.control_point_out_count++;
4639          emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV,
4640                                  emit->tcs.control_point_out_index,
4641                                  VGPU10_NAME_POSITION,
4642                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4643                                  true,
4644                                  SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
4645 
4646          /* If tcs does not output any control point output,
4647           * we can end the hull shader control point phase here
4648           * after emitting the default control point output.
4649           */
4650          emit->skip_instruction = true;
4651       }
4652    }
4653    else {
4654       if (emit->tcs.outer.out_index == INVALID_INDEX) {
4655          /* since the TCS did not declare out outer tess level output register,
4656           * we declare it here for patch constant phase only.
4657           */
4658          emit->tcs.outer.out_index = outputIndex;
4659          if (emit->key.tcs.prim_mode == MESA_PRIM_QUADS) {
4660             for (int i = 0; i < 4; i++) {
4661                emit_tesslevel_declaration(emit, outputIndex++,
4662                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4663                   VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i,
4664                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i);
4665             }
4666          }
4667          else if (emit->key.tcs.prim_mode == MESA_PRIM_TRIANGLES) {
4668             for (int i = 0; i < 3; i++) {
4669                emit_tesslevel_declaration(emit, outputIndex++,
4670                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4671                   VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i,
4672                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i);
4673             }
4674          }
4675       }
4676 
4677       if (emit->tcs.inner.out_index == INVALID_INDEX) {
4678          /* since the TCS did not declare out inner tess level output register,
4679           * we declare it here
4680           */
4681          emit->tcs.inner.out_index = outputIndex;
4682          if (emit->key.tcs.prim_mode == MESA_PRIM_QUADS) {
4683             emit_tesslevel_declaration(emit, outputIndex++,
4684                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4685                VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4686                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4687             emit_tesslevel_declaration(emit, outputIndex++,
4688                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4689                VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4690                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4691          }
4692          else if (emit->key.tcs.prim_mode == MESA_PRIM_TRIANGLES) {
4693             emit_tesslevel_declaration(emit, outputIndex++,
4694                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4695                VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
4696                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
4697          }
4698       }
4699    }
4700    emit->num_outputs = outputIndex;
4701 }
4702 
4703 
4704 /**
4705  * Emit output declarations for tessellation evaluation shader.
4706  */
4707 static void
emit_tes_output_declarations(struct svga_shader_emitter_v10 * emit)4708 emit_tes_output_declarations(struct svga_shader_emitter_v10 *emit)
4709 {
4710    unsigned int i;
4711 
4712    for (i = 0; i < emit->info.num_outputs; i++) {
4713       emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], true);
4714    }
4715 }
4716 
4717 
4718 /**
4719  * Emit the declaration for a system value input/output.
4720  */
4721 static void
emit_system_value_declaration(struct svga_shader_emitter_v10 * emit,enum tgsi_semantic semantic_name,unsigned index)4722 emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
4723                               enum tgsi_semantic semantic_name, unsigned index)
4724 {
4725    switch (semantic_name) {
4726    case TGSI_SEMANTIC_INSTANCEID:
4727       index = alloc_system_value_index(emit, index);
4728       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
4729                              VGPU10_OPERAND_TYPE_INPUT,
4730                              VGPU10_OPERAND_INDEX_1D,
4731                              index, 1,
4732                              VGPU10_NAME_INSTANCE_ID,
4733                              VGPU10_OPERAND_4_COMPONENT,
4734                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4735                              VGPU10_OPERAND_4_COMPONENT_MASK_X,
4736                              VGPU10_INTERPOLATION_UNDEFINED, true,
4737                              map_tgsi_semantic_to_sgn_name(semantic_name));
4738       break;
4739    case TGSI_SEMANTIC_VERTEXID:
4740       emit->vs.vertex_id_sys_index = index;
4741       index = alloc_system_value_index(emit, index);
4742       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
4743                              VGPU10_OPERAND_TYPE_INPUT,
4744                              VGPU10_OPERAND_INDEX_1D,
4745                              index, 1,
4746                              VGPU10_NAME_VERTEX_ID,
4747                              VGPU10_OPERAND_4_COMPONENT,
4748                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4749                              VGPU10_OPERAND_4_COMPONENT_MASK_X,
4750                              VGPU10_INTERPOLATION_UNDEFINED, true,
4751                              map_tgsi_semantic_to_sgn_name(semantic_name));
4752       break;
4753    case TGSI_SEMANTIC_SAMPLEID:
4754       assert(emit->unit == PIPE_SHADER_FRAGMENT);
4755       emit->fs.sample_id_sys_index = index;
4756       index = alloc_system_value_index(emit, index);
4757       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_PS_SIV,
4758                              VGPU10_OPERAND_TYPE_INPUT,
4759                              VGPU10_OPERAND_INDEX_1D,
4760                              index, 1,
4761                              VGPU10_NAME_SAMPLE_INDEX,
4762                              VGPU10_OPERAND_4_COMPONENT,
4763                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4764                              VGPU10_OPERAND_4_COMPONENT_MASK_X,
4765                              VGPU10_INTERPOLATION_CONSTANT, true,
4766                              map_tgsi_semantic_to_sgn_name(semantic_name));
4767       break;
4768    case TGSI_SEMANTIC_SAMPLEPOS:
4769       /* This system value contains the position of the current sample
4770        * when using per-sample shading.  We implement this by calling
4771        * the VGPU10_OPCODE_SAMPLE_POS instruction with the current sample
4772        * index as the argument.  See emit_sample_position_instructions().
4773        */
4774       assert(emit->version >= 41);
4775       emit->fs.sample_pos_sys_index = index;
4776       index = alloc_system_value_index(emit, index);
4777       break;
4778    case TGSI_SEMANTIC_INVOCATIONID:
4779       /* Note: invocation id input is mapped to different register depending
4780        * on the shader type. In GS, it will be mapped to vGSInstanceID#.
4781        * In TCS, it will be mapped to vOutputControlPointID#.
4782        * Since in both cases, the mapped name is unique rather than
4783        * just a generic input name ("v#"), so there is no need to remap
4784        * the index value.
4785        */
4786       assert(emit->unit == PIPE_SHADER_GEOMETRY ||
4787              emit->unit == PIPE_SHADER_TESS_CTRL);
4788       assert(emit->version >= 50);
4789 
4790       if (emit->unit == PIPE_SHADER_GEOMETRY) {
4791          emit->gs.invocation_id_sys_index = index;
4792          emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4793                                 VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID,
4794                                 VGPU10_OPERAND_INDEX_0D,
4795                                 index, 1,
4796                                 VGPU10_NAME_UNDEFINED,
4797                                 VGPU10_OPERAND_0_COMPONENT,
4798                                 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4799                                 0,
4800                                 VGPU10_INTERPOLATION_UNDEFINED, true,
4801                                 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4802       } else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4803          /* The emission of the control point id will be done
4804           * in the control point phase in emit_hull_shader_control_point_phase().
4805           */
4806          emit->tcs.invocation_id_sys_index = index;
4807       }
4808       break;
4809    case TGSI_SEMANTIC_SAMPLEMASK:
4810       /* Note: the PS sample mask input has a unique name ("vCoverage#")
4811        * rather than just a generic input name ("v#") so no need to remap the
4812        * index value.
4813        */
4814       assert(emit->unit == PIPE_SHADER_FRAGMENT);
4815       assert(emit->version >= 50);
4816       emit->fs.sample_mask_in_sys_index = index;
4817       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4818                              VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK,
4819                              VGPU10_OPERAND_INDEX_0D,
4820                              index, 1,
4821                              VGPU10_NAME_UNDEFINED,
4822                              VGPU10_OPERAND_1_COMPONENT,
4823                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4824                              0,
4825                              VGPU10_INTERPOLATION_CONSTANT, true,
4826                              SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4827       break;
4828    case TGSI_SEMANTIC_TESSCOORD:
4829       assert(emit->version >= 50);
4830 
4831       unsigned usageMask = 0;
4832 
4833       if (emit->tes.prim_mode == MESA_PRIM_TRIANGLES) {
4834          usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XYZ;
4835       }
4836       else if (emit->tes.prim_mode == MESA_PRIM_LINES ||
4837                emit->tes.prim_mode == MESA_PRIM_QUADS) {
4838          usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XY;
4839       }
4840 
4841       emit->tes.tesscoord_sys_index = index;
4842       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4843                              VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT,
4844                              VGPU10_OPERAND_INDEX_0D,
4845                              index, 1,
4846                              VGPU10_NAME_UNDEFINED,
4847                              VGPU10_OPERAND_4_COMPONENT,
4848                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4849                              usageMask,
4850                              VGPU10_INTERPOLATION_UNDEFINED, true,
4851                              SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4852       break;
4853    case TGSI_SEMANTIC_TESSINNER:
4854       assert(emit->version >= 50);
4855       emit->tes.inner.tgsi_index = index;
4856       break;
4857    case TGSI_SEMANTIC_TESSOUTER:
4858       assert(emit->version >= 50);
4859       emit->tes.outer.tgsi_index = index;
4860       break;
4861    case TGSI_SEMANTIC_VERTICESIN:
4862       assert(emit->unit == PIPE_SHADER_TESS_CTRL);
4863       assert(emit->version >= 50);
4864 
4865       /* save the system value index */
4866       emit->tcs.vertices_per_patch_index = index;
4867       break;
4868    case TGSI_SEMANTIC_PRIMID:
4869       assert(emit->version >= 50);
4870       if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4871          emit->tcs.prim_id_index = index;
4872       }
4873       else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
4874          emit->tes.prim_id_index = index;
4875          emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4876                                 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
4877                                 VGPU10_OPERAND_INDEX_0D,
4878                                 index, 1,
4879                                 VGPU10_NAME_UNDEFINED,
4880                                 VGPU10_OPERAND_0_COMPONENT,
4881                                 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4882                                 0,
4883                                 VGPU10_INTERPOLATION_UNDEFINED, true,
4884                                 map_tgsi_semantic_to_sgn_name(semantic_name));
4885       }
4886       break;
4887    case TGSI_SEMANTIC_THREAD_ID:
4888       assert(emit->unit >= PIPE_SHADER_COMPUTE);
4889       assert(emit->version >= 50);
4890       emit->cs.thread_id_index = index;
4891       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4892                              VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP,
4893                              VGPU10_OPERAND_INDEX_0D,
4894                              index, 1,
4895                              VGPU10_NAME_UNDEFINED,
4896                              VGPU10_OPERAND_4_COMPONENT,
4897                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4898                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4899                              VGPU10_INTERPOLATION_UNDEFINED, true,
4900                              map_tgsi_semantic_to_sgn_name(semantic_name));
4901       break;
4902    case TGSI_SEMANTIC_BLOCK_ID:
4903       assert(emit->unit >= PIPE_SHADER_COMPUTE);
4904       assert(emit->version >= 50);
4905       emit->cs.block_id_index = index;
4906       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4907                              VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID,
4908                              VGPU10_OPERAND_INDEX_0D,
4909                              index, 1,
4910                              VGPU10_NAME_UNDEFINED,
4911                              VGPU10_OPERAND_4_COMPONENT,
4912                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4913                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4914                              VGPU10_INTERPOLATION_UNDEFINED, true,
4915                              map_tgsi_semantic_to_sgn_name(semantic_name));
4916       break;
4917    case TGSI_SEMANTIC_GRID_SIZE:
4918       assert(emit->unit == PIPE_SHADER_COMPUTE);
4919       assert(emit->version >= 50);
4920       emit->cs.grid_size.tgsi_index = index;
4921       break;
4922    default:
4923       debug_printf("unexpected system value semantic index %u / %s\n",
4924                    semantic_name, tgsi_semantic_names[semantic_name]);
4925    }
4926 }
4927 
4928 /**
4929  * Translate a TGSI declaration to VGPU10.
4930  */
4931 static bool
emit_vgpu10_declaration(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_declaration * decl)4932 emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
4933                         const struct tgsi_full_declaration *decl)
4934 {
4935    switch (decl->Declaration.File) {
4936    case TGSI_FILE_INPUT:
4937       /* do nothing - see emit_input_declarations() */
4938       return true;
4939 
4940    case TGSI_FILE_OUTPUT:
4941       assert(decl->Range.First == decl->Range.Last);
4942       emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask;
4943       return true;
4944 
4945    case TGSI_FILE_TEMPORARY:
4946       /* Don't declare the temps here.  Just keep track of how many
4947        * and emit the declaration later.
4948        */
4949       if (decl->Declaration.Array) {
4950          /* Indexed temporary array.  Save the start index of the array
4951           * and the size of the array.
4952           */
4953          const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS);
4954          assert(arrayID < ARRAY_SIZE(emit->temp_arrays));
4955 
4956          /* Save this array so we can emit the declaration for it later */
4957          create_temp_array(emit, arrayID, decl->Range.First,
4958                            decl->Range.Last - decl->Range.First + 1,
4959                            decl->Range.First);
4960       }
4961 
4962       /* for all temps, indexed or not, keep track of highest index */
4963       emit->num_shader_temps = MAX2(emit->num_shader_temps,
4964                                     decl->Range.Last + 1);
4965       return true;
4966 
4967    case TGSI_FILE_CONSTANT:
4968       /* Don't declare constants here.  Just keep track and emit later. */
4969       {
4970          unsigned constbuf = 0, num_consts;
4971          if (decl->Declaration.Dimension) {
4972             constbuf = decl->Dim.Index2D;
4973          }
4974          /* We throw an assertion here when, in fact, the shader should never
4975           * have linked due to constbuf index out of bounds, so we shouldn't
4976           * have reached here.
4977           */
4978          assert(constbuf < ARRAY_SIZE(emit->num_shader_consts));
4979 
4980          num_consts = MAX2(emit->num_shader_consts[constbuf],
4981                            decl->Range.Last + 1);
4982 
4983          if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
4984             debug_printf("Warning: constant buffer is declared to size [%u]"
4985                          " but [%u] is the limit.\n",
4986                          num_consts,
4987                          VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
4988             emit->register_overflow = true;
4989          }
4990          /* The linker doesn't enforce the max UBO size so we clamp here */
4991          emit->num_shader_consts[constbuf] =
4992             MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
4993       }
4994       return true;
4995 
4996    case TGSI_FILE_IMMEDIATE:
4997       assert(!"TGSI_FILE_IMMEDIATE not handled yet!");
4998       return false;
4999 
5000    case TGSI_FILE_SYSTEM_VALUE:
5001       emit_system_value_declaration(emit, decl->Semantic.Name,
5002                                     decl->Range.First);
5003       return true;
5004 
5005    case TGSI_FILE_SAMPLER:
5006       /* Don't declare samplers here.  Just keep track and emit later. */
5007       emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
5008       return true;
5009 
5010 #if 0
5011    case TGSI_FILE_RESOURCE:
5012       /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/
5013       /* XXX more, VGPU10_RETURN_TYPE_FLOAT */
5014       assert(!"TGSI_FILE_RESOURCE not handled yet");
5015       return false;
5016 #endif
5017 
5018    case TGSI_FILE_ADDRESS:
5019       emit->num_address_regs = MAX2(emit->num_address_regs,
5020                                     decl->Range.Last + 1);
5021       return true;
5022 
5023    case TGSI_FILE_SAMPLER_VIEW:
5024       {
5025          unsigned unit = decl->Range.First;
5026          assert(decl->Range.First == decl->Range.Last);
5027          emit->sampler_target[unit] = decl->SamplerView.Resource;
5028 
5029          /* Note: we can ignore YZW return types for now */
5030          emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX;
5031          emit->sampler_view[unit] = true;
5032       }
5033       return true;
5034 
5035    case TGSI_FILE_IMAGE:
5036       {
5037          unsigned unit = decl->Range.First;
5038          assert(decl->Range.First == decl->Range.Last);
5039          assert(unit < PIPE_MAX_SHADER_IMAGES);
5040          emit->image[unit] = decl->Image;
5041          emit->image_mask |= 1 << unit;
5042          emit->num_images++;
5043       }
5044       return true;
5045 
5046    case TGSI_FILE_HW_ATOMIC:
5047       /* Declare the atomic buffer if it is not already declared. */
5048       if (!(emit->atomic_bufs_mask & (1 << decl->Dim.Index2D))) {
5049          emit->num_atomic_bufs++;
5050          emit->atomic_bufs_mask |= (1 << decl->Dim.Index2D);
5051       }
5052 
5053       /* Remember the maximum atomic counter index encountered */
5054       emit->max_atomic_counter_index =
5055          MAX2(emit->max_atomic_counter_index, decl->Range.Last);
5056       return true;
5057 
5058    case TGSI_FILE_MEMORY:
5059       /* Record memory has been used. */
5060       if (emit->unit == PIPE_SHADER_COMPUTE &&
5061           decl->Declaration.MemType == TGSI_MEMORY_TYPE_SHARED) {
5062          emit->cs.shared_memory_declared = true;
5063       }
5064 
5065       return true;
5066 
5067    case TGSI_FILE_BUFFER:
5068       assert(emit->version >= 50);
5069       emit->num_shader_bufs++;
5070       return true;
5071 
5072    default:
5073       assert(!"Unexpected type of declaration");
5074       return false;
5075    }
5076 }
5077 
5078 
5079 /**
5080  * Emit input declarations for fragment shader.
5081  */
5082 static void
emit_fs_input_declarations(struct svga_shader_emitter_v10 * emit)5083 emit_fs_input_declarations(struct svga_shader_emitter_v10 *emit)
5084 {
5085    unsigned i;
5086 
5087    for (i = 0; i < emit->linkage.num_inputs; i++) {
5088       enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
5089       unsigned usage_mask = emit->info.input_usage_mask[i];
5090       unsigned index = emit->linkage.input_map[i];
5091       unsigned type, interpolationMode, name;
5092       unsigned mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
5093 
5094       if (usage_mask == 0)
5095          continue;  /* register is not actually used */
5096 
5097       if (semantic_name == TGSI_SEMANTIC_POSITION) {
5098          /* fragment position input */
5099          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5100          interpolationMode = VGPU10_INTERPOLATION_LINEAR;
5101          name = VGPU10_NAME_POSITION;
5102          if (usage_mask & TGSI_WRITEMASK_W) {
5103             /* we need to replace use of 'w' with '1/w' */
5104             emit->fs.fragcoord_input_index = i;
5105          }
5106       }
5107       else if (semantic_name == TGSI_SEMANTIC_FACE) {
5108          /* fragment front-facing input */
5109          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5110          interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5111          name = VGPU10_NAME_IS_FRONT_FACE;
5112          emit->fs.face_input_index = i;
5113       }
5114       else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
5115          /* primitive ID */
5116          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5117          interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5118          name = VGPU10_NAME_PRIMITIVE_ID;
5119       }
5120       else if (semantic_name == TGSI_SEMANTIC_SAMPLEID) {
5121          /* sample index / ID */
5122          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5123          interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5124          name = VGPU10_NAME_SAMPLE_INDEX;
5125       }
5126       else if (semantic_name == TGSI_SEMANTIC_LAYER) {
5127          /* render target array index */
5128          if (emit->key.fs.layer_to_zero) {
5129             /**
5130              * The shader from the previous stage does not write to layer,
5131              * so reading the layer index in fragment shader should return 0.
5132              */
5133             emit->fs.layer_input_index = i;
5134             continue;
5135          } else {
5136             type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5137             interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5138             name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX;
5139             mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
5140          }
5141       }
5142       else if (semantic_name == TGSI_SEMANTIC_VIEWPORT_INDEX) {
5143          /* viewport index */
5144          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5145          interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5146          name = VGPU10_NAME_VIEWPORT_ARRAY_INDEX;
5147          mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
5148       }
5149       else {
5150          /* general fragment input */
5151          type = VGPU10_OPCODE_DCL_INPUT_PS;
5152          interpolationMode =
5153                translate_interpolation(emit,
5154                                        emit->info.input_interpolate[i],
5155                                        emit->info.input_interpolate_loc[i]);
5156 
5157          /* keeps track if flat interpolation mode is being used */
5158          emit->uses_flat_interp = emit->uses_flat_interp ||
5159                (interpolationMode == VGPU10_INTERPOLATION_CONSTANT);
5160 
5161          name = VGPU10_NAME_UNDEFINED;
5162       }
5163 
5164       emit_input_declaration(emit, type,
5165                              VGPU10_OPERAND_TYPE_INPUT,
5166                              VGPU10_OPERAND_INDEX_1D, index, 1,
5167                              name,
5168                              VGPU10_OPERAND_4_COMPONENT,
5169                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5170                              mask,
5171                              interpolationMode, true,
5172                              map_tgsi_semantic_to_sgn_name(semantic_name));
5173    }
5174 }
5175 
5176 
5177 /**
5178  * Emit input declarations for vertex shader.
5179  */
5180 static void
emit_vs_input_declarations(struct svga_shader_emitter_v10 * emit)5181 emit_vs_input_declarations(struct svga_shader_emitter_v10 *emit)
5182 {
5183    unsigned i;
5184 
5185    for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) {
5186       unsigned usage_mask = emit->info.input_usage_mask[i];
5187       unsigned index = i;
5188 
5189       if (usage_mask == 0)
5190          continue;  /* register is not actually used */
5191 
5192       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5193                              VGPU10_OPERAND_TYPE_INPUT,
5194                              VGPU10_OPERAND_INDEX_1D, index, 1,
5195                              VGPU10_NAME_UNDEFINED,
5196                              VGPU10_OPERAND_4_COMPONENT,
5197                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5198                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5199                              VGPU10_INTERPOLATION_UNDEFINED, true,
5200                              SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5201    }
5202 }
5203 
5204 
5205 /**
5206  * Emit input declarations for geometry shader.
5207  */
5208 static void
emit_gs_input_declarations(struct svga_shader_emitter_v10 * emit)5209 emit_gs_input_declarations(struct svga_shader_emitter_v10 *emit)
5210 {
5211    unsigned i;
5212 
5213    for (i = 0; i < emit->info.num_inputs; i++) {
5214       enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
5215       unsigned usage_mask = emit->info.input_usage_mask[i];
5216       unsigned index = emit->linkage.input_map[i];
5217       unsigned opcodeType, operandType;
5218       unsigned numComp, selMode;
5219       unsigned name;
5220       unsigned dim;
5221 
5222       if (usage_mask == 0)
5223          continue;  /* register is not actually used */
5224 
5225       opcodeType = VGPU10_OPCODE_DCL_INPUT;
5226       operandType = VGPU10_OPERAND_TYPE_INPUT;
5227       numComp = VGPU10_OPERAND_4_COMPONENT;
5228       selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
5229       name = VGPU10_NAME_UNDEFINED;
5230 
5231       /* all geometry shader inputs are two dimensional except
5232        * gl_PrimitiveID
5233        */
5234       dim = VGPU10_OPERAND_INDEX_2D;
5235 
5236       if (semantic_name == TGSI_SEMANTIC_PRIMID) {
5237          /* Primitive ID */
5238          operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
5239          dim = VGPU10_OPERAND_INDEX_0D;
5240          numComp = VGPU10_OPERAND_0_COMPONENT;
5241          selMode = 0;
5242 
5243          /* also save the register index so we can check for
5244           * primitive id when emit src register. We need to modify the
5245           * operand type, index dimension when emit primitive id src reg.
5246           */
5247           emit->gs.prim_id_index = i;
5248       }
5249       else if (semantic_name == TGSI_SEMANTIC_POSITION) {
5250          /* vertex position input */
5251          opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV;
5252          name = VGPU10_NAME_POSITION;
5253       }
5254 
5255       emit_input_declaration(emit, opcodeType, operandType,
5256                              dim, index,
5257                              emit->gs.input_size,
5258                              name,
5259                              numComp, selMode,
5260                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5261                              VGPU10_INTERPOLATION_UNDEFINED, true,
5262                              map_tgsi_semantic_to_sgn_name(semantic_name));
5263    }
5264 }
5265 
5266 
5267 /**
5268  * Emit input declarations for tessellation control shader.
5269  */
5270 static void
emit_tcs_input_declarations(struct svga_shader_emitter_v10 * emit)5271 emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit)
5272 {
5273    unsigned i;
5274    unsigned size = emit->key.tcs.vertices_per_patch;
5275    bool addSignature = true;
5276 
5277    if (!emit->tcs.control_point_phase)
5278       addSignature = emit->tcs.fork_phase_add_signature;
5279 
5280    for (i = 0; i < emit->info.num_inputs; i++) {
5281       unsigned usage_mask = emit->info.input_usage_mask[i];
5282       unsigned index = emit->linkage.input_map[i];
5283       enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
5284       VGPU10_SYSTEM_NAME name = VGPU10_NAME_UNDEFINED;
5285       VGPU10_OPERAND_TYPE operandType = VGPU10_OPERAND_TYPE_INPUT;
5286       SVGA3dDXSignatureSemanticName sgn_name =
5287          map_tgsi_semantic_to_sgn_name(semantic_name);
5288 
5289       if (semantic_name == TGSI_SEMANTIC_POSITION ||
5290           index == emit->linkage.position_index) {
5291          /* save the input control point index for later use */
5292          emit->tcs.control_point_input_index = i;
5293       }
5294       else if (usage_mask == 0) {
5295          continue;  /* register is not actually used */
5296       }
5297       else if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
5298          /* The shadow copy is being used here. So set the signature name
5299           * to UNDEFINED.
5300           */
5301          sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
5302       }
5303 
5304       /* input control points in the patch constant phase are emitted in the
5305        * vicp register rather than the v register.
5306        */
5307       if (!emit->tcs.control_point_phase) {
5308          operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
5309       }
5310 
5311       /* Tessellation control shader inputs are two dimensional.
5312        * The array size is determined by the patch vertex count.
5313        */
5314       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5315                              operandType,
5316                              VGPU10_OPERAND_INDEX_2D,
5317                              index, size, name,
5318                              VGPU10_OPERAND_4_COMPONENT,
5319                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5320                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5321                              VGPU10_INTERPOLATION_UNDEFINED,
5322                              addSignature, sgn_name);
5323    }
5324 
5325    if (emit->tcs.control_point_phase) {
5326 
5327       /* Also add an address register for the indirection to the
5328        * input control points
5329        */
5330       emit->tcs.control_point_addr_index = emit->num_address_regs++;
5331    }
5332 }
5333 
5334 
5335 static void
emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 * emit)5336 emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 *emit)
5337 {
5338 
5339    /* In tcs, tess factors are emitted as extra outputs.
5340     * The starting register index for the tess factors is captured
5341     * in the compile key.
5342     */
5343    unsigned inputIndex = emit->key.tes.tessfactor_index;
5344 
5345    if (emit->tes.prim_mode == MESA_PRIM_QUADS) {
5346       if (emit->key.tes.need_tessouter) {
5347          emit->tes.outer.in_index = inputIndex;
5348          for (int i = 0; i < 4; i++) {
5349             emit_tesslevel_declaration(emit, inputIndex++,
5350                VGPU10_OPCODE_DCL_INPUT_SIV,
5351                VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5352                VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i,
5353                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i);
5354          }
5355       }
5356 
5357       if (emit->key.tes.need_tessinner) {
5358          emit->tes.inner.in_index = inputIndex;
5359          emit_tesslevel_declaration(emit, inputIndex++,
5360             VGPU10_OPCODE_DCL_INPUT_SIV,
5361             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5362             VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
5363             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
5364 
5365          emit_tesslevel_declaration(emit, inputIndex++,
5366             VGPU10_OPCODE_DCL_INPUT_SIV,
5367             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5368             VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
5369             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
5370       }
5371    }
5372    else if (emit->tes.prim_mode == MESA_PRIM_TRIANGLES) {
5373       if (emit->key.tes.need_tessouter) {
5374          emit->tes.outer.in_index = inputIndex;
5375          for (int i = 0; i < 3; i++) {
5376             emit_tesslevel_declaration(emit, inputIndex++,
5377                VGPU10_OPCODE_DCL_INPUT_SIV,
5378                VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5379                VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i,
5380                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i);
5381          }
5382       }
5383 
5384       if (emit->key.tes.need_tessinner) {
5385          emit->tes.inner.in_index = inputIndex;
5386          emit_tesslevel_declaration(emit, inputIndex++,
5387             VGPU10_OPCODE_DCL_INPUT_SIV,
5388             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5389             VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
5390             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
5391       }
5392    }
5393    else if (emit->tes.prim_mode == MESA_PRIM_LINES) {
5394       if (emit->key.tes.need_tessouter) {
5395          emit->tes.outer.in_index = inputIndex;
5396          emit_tesslevel_declaration(emit, inputIndex++,
5397             VGPU10_OPCODE_DCL_INPUT_SIV,
5398             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5399             VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR,
5400             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR);
5401 
5402          emit_tesslevel_declaration(emit, inputIndex++,
5403             VGPU10_OPCODE_DCL_INPUT_SIV,
5404             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5405             VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR,
5406             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
5407       }
5408    }
5409 }
5410 
5411 
5412 /**
5413  * Emit input declarations for tessellation evaluation shader.
5414  */
5415 static void
emit_tes_input_declarations(struct svga_shader_emitter_v10 * emit)5416 emit_tes_input_declarations(struct svga_shader_emitter_v10 *emit)
5417 {
5418    unsigned i;
5419 
5420    for (i = 0; i < emit->info.num_inputs; i++) {
5421       unsigned usage_mask = emit->info.input_usage_mask[i];
5422       unsigned index = emit->linkage.input_map[i];
5423       unsigned size;
5424       const enum tgsi_semantic semantic_name =
5425          emit->info.input_semantic_name[i];
5426       SVGA3dDXSignatureSemanticName sgn_name;
5427       VGPU10_OPERAND_TYPE operandType;
5428       VGPU10_OPERAND_INDEX_DIMENSION dim;
5429 
5430       if (usage_mask == 0)
5431          usage_mask = 1;  /* at least set usage mask to one */
5432 
5433       if (semantic_name == TGSI_SEMANTIC_PATCH) {
5434          operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT;
5435          dim = VGPU10_OPERAND_INDEX_1D;
5436          size = 1;
5437          sgn_name = map_tgsi_semantic_to_sgn_name(semantic_name);
5438       }
5439       else {
5440          operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
5441          dim = VGPU10_OPERAND_INDEX_2D;
5442          size = emit->key.tes.vertices_per_patch;
5443          sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
5444       }
5445 
5446       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, operandType,
5447                              dim, index, size, VGPU10_NAME_UNDEFINED,
5448                              VGPU10_OPERAND_4_COMPONENT,
5449                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5450                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5451                              VGPU10_INTERPOLATION_UNDEFINED,
5452                              true, sgn_name);
5453    }
5454 
5455    emit_tessfactor_input_declarations(emit);
5456 
5457    /* DX spec requires DS input controlpoint/patch-constant signatures to match
5458     * the HS output controlpoint/patch-constant signatures exactly.
5459     * Add missing input declarations even if they are not used in the shader.
5460     */
5461    if (emit->linkage.num_inputs < emit->linkage.prevShader.num_outputs) {
5462       struct tgsi_shader_info *prevInfo = emit->prevShaderInfo;
5463       for (i = 0; i < emit->linkage.prevShader.num_outputs; i++) {
5464 
5465           /* If a tcs output does not have a corresponding input register in
5466            * tes, add one.
5467            */
5468           if (emit->linkage.prevShader.output_map[i] >
5469               emit->linkage.input_map_max) {
5470              const enum tgsi_semantic sem_name = prevInfo->output_semantic_name[i];
5471 
5472              if (sem_name == TGSI_SEMANTIC_PATCH) {
5473                 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5474                                        VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5475                                        VGPU10_OPERAND_INDEX_1D,
5476                                        i, 1, VGPU10_NAME_UNDEFINED,
5477                                        VGPU10_OPERAND_4_COMPONENT,
5478                                        VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5479                                        VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5480                                        VGPU10_INTERPOLATION_UNDEFINED,
5481                                        true,
5482                                        map_tgsi_semantic_to_sgn_name(sem_name));
5483 
5484              } else if (sem_name != TGSI_SEMANTIC_TESSINNER &&
5485                         sem_name != TGSI_SEMANTIC_TESSOUTER) {
5486                 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5487                                        VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT,
5488                                        VGPU10_OPERAND_INDEX_2D,
5489                                        i, emit->key.tes.vertices_per_patch,
5490                                        VGPU10_NAME_UNDEFINED,
5491                                        VGPU10_OPERAND_4_COMPONENT,
5492                                        VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5493                                        VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5494                                        VGPU10_INTERPOLATION_UNDEFINED,
5495                                        true,
5496                                        map_tgsi_semantic_to_sgn_name(sem_name));
5497              }
5498              /* tessellation factors are taken care of in
5499               * emit_tessfactor_input_declarations().
5500               */
5501          }
5502       }
5503    }
5504 }
5505 
5506 
5507 /**
5508  * Emit all input declarations.
5509  */
5510 static bool
emit_input_declarations(struct svga_shader_emitter_v10 * emit)5511 emit_input_declarations(struct svga_shader_emitter_v10 *emit)
5512 {
5513    emit->index_range.required =
5514       emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? true : false;
5515 
5516    switch (emit->unit) {
5517    case PIPE_SHADER_FRAGMENT:
5518       emit_fs_input_declarations(emit);
5519       break;
5520    case PIPE_SHADER_GEOMETRY:
5521       emit_gs_input_declarations(emit);
5522       break;
5523    case PIPE_SHADER_VERTEX:
5524       emit_vs_input_declarations(emit);
5525       break;
5526    case PIPE_SHADER_TESS_CTRL:
5527       emit_tcs_input_declarations(emit);
5528       break;
5529    case PIPE_SHADER_TESS_EVAL:
5530       emit_tes_input_declarations(emit);
5531       break;
5532    case PIPE_SHADER_COMPUTE:
5533       //XXX emit_cs_input_declarations(emit);
5534       break;
5535    default:
5536       assert(0);
5537    }
5538 
5539    if (emit->index_range.start_index != INVALID_INDEX) {
5540       emit_index_range_declaration(emit);
5541    }
5542    emit->index_range.required = false;
5543    return true;
5544 }
5545 
5546 
5547 /**
5548  * Emit all output declarations.
5549  */
5550 static bool
emit_output_declarations(struct svga_shader_emitter_v10 * emit)5551 emit_output_declarations(struct svga_shader_emitter_v10 *emit)
5552 {
5553    emit->index_range.required =
5554       emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? true : false;
5555 
5556    switch (emit->unit) {
5557    case PIPE_SHADER_FRAGMENT:
5558       emit_fs_output_declarations(emit);
5559       break;
5560    case PIPE_SHADER_GEOMETRY:
5561       emit_gs_output_declarations(emit);
5562       break;
5563    case PIPE_SHADER_VERTEX:
5564       emit_vs_output_declarations(emit);
5565       break;
5566    case PIPE_SHADER_TESS_CTRL:
5567       emit_tcs_output_declarations(emit);
5568       break;
5569    case PIPE_SHADER_TESS_EVAL:
5570       emit_tes_output_declarations(emit);
5571       break;
5572    case PIPE_SHADER_COMPUTE:
5573       //XXX emit_cs_output_declarations(emit);
5574       break;
5575    default:
5576       assert(0);
5577    }
5578 
5579    if (emit->vposition.so_index != INVALID_INDEX &&
5580        emit->vposition.out_index != INVALID_INDEX) {
5581 
5582       assert(emit->unit != PIPE_SHADER_FRAGMENT);
5583 
5584       /* Emit the declaration for the non-adjusted vertex position
5585        * for stream output purpose
5586        */
5587       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5588                               emit->vposition.so_index,
5589                               VGPU10_NAME_UNDEFINED,
5590                               VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5591                               true,
5592                               SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
5593    }
5594 
5595    if (emit->clip_dist_so_index != INVALID_INDEX &&
5596        emit->clip_dist_out_index != INVALID_INDEX) {
5597 
5598       assert(emit->unit != PIPE_SHADER_FRAGMENT);
5599 
5600       /* Emit the declaration for the clip distance shadow copy which
5601        * will be used for stream output purpose and for clip distance
5602        * varying variable. Note all clip distances
5603        * will be written regardless of the enabled clipping planes.
5604        */
5605       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5606                               emit->clip_dist_so_index,
5607                               VGPU10_NAME_UNDEFINED,
5608                               VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5609                               true,
5610                               SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5611 
5612       if (emit->info.num_written_clipdistance > 4) {
5613          /* for the second clip distance register, each handles 4 planes */
5614          emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5615                                  emit->clip_dist_so_index + 1,
5616                                  VGPU10_NAME_UNDEFINED,
5617                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5618                                  true,
5619                                  SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5620       }
5621    }
5622 
5623    if (emit->index_range.start_index != INVALID_INDEX) {
5624       emit_index_range_declaration(emit);
5625    }
5626    emit->index_range.required = false;
5627    return true;
5628 }
5629 
5630 
5631 /**
5632  * A helper function to create a temporary indexable array
5633  * and initialize the corresponding entries in the temp_map array.
5634  */
5635 static void
create_temp_array(struct svga_shader_emitter_v10 * emit,unsigned arrayID,unsigned first,unsigned count,unsigned startIndex)5636 create_temp_array(struct svga_shader_emitter_v10 *emit,
5637                   unsigned arrayID, unsigned first, unsigned count,
5638                   unsigned startIndex)
5639 {
5640    unsigned i, tempIndex = startIndex;
5641 
5642    emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1);
5643    assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS);
5644    emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS);
5645 
5646    emit->temp_arrays[arrayID].start = first;
5647    emit->temp_arrays[arrayID].size = count;
5648 
5649    /* Fill in the temp_map entries for this temp array */
5650    for (i = 0; i < count; i++, tempIndex++) {
5651       emit->temp_map[tempIndex].arrayId = arrayID;
5652       emit->temp_map[tempIndex].index = i;
5653    }
5654 }
5655 
5656 
5657 /**
5658  * Emit the declaration for the temporary registers.
5659  */
5660 static bool
emit_temporaries_declaration(struct svga_shader_emitter_v10 * emit)5661 emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
5662 {
5663    unsigned total_temps, reg, i;
5664 
5665    total_temps = emit->num_shader_temps;
5666 
5667    /* If there is indirect access to non-indexable temps in the shader,
5668     * convert those temps to indexable temps. This works around a bug
5669     * in the GLSL->TGSI translator exposed in piglit test
5670     * glsl-1.20/execution/fs-const-array-of-struct-of-array.shader_test.
5671     * Internal temps added by the driver remain as non-indexable temps.
5672     */
5673    if ((emit->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) &&
5674        emit->num_temp_arrays == 0) {
5675       create_temp_array(emit, 1, 0, total_temps, 0);
5676    }
5677 
5678    /* Allocate extra temps for specially-implemented instructions,
5679     * such as LIT.
5680     */
5681    total_temps += MAX_INTERNAL_TEMPS;
5682 
5683    /* Allocate extra temps for clip distance or clip vertex.
5684     */
5685    if (emit->clip_mode == CLIP_DISTANCE) {
5686       /* We need to write the clip distance to a temporary register
5687        * first. Then it will be copied to the shadow copy for
5688        * the clip distance varying variable and stream output purpose.
5689        * It will also be copied to the actual CLIPDIST register
5690        * according to the enabled clip planes
5691        */
5692       emit->clip_dist_tmp_index = total_temps++;
5693       if (emit->info.num_written_clipdistance > 4)
5694          total_temps++; /* second clip register */
5695    }
5696    else if (emit->clip_mode == CLIP_VERTEX && emit->key.last_vertex_stage) {
5697       /* If the current shader is in the last vertex processing stage,
5698        * We need to convert the TGSI CLIPVERTEX output to one or more
5699        * clip distances.  Allocate a temp reg for the clipvertex here.
5700        */
5701       assert(emit->info.writes_clipvertex > 0);
5702       emit->clip_vertex_tmp_index = total_temps;
5703       total_temps++;
5704    }
5705 
5706    if (emit->info.uses_vertexid) {
5707       assert(emit->unit == PIPE_SHADER_VERTEX);
5708       emit->vs.vertex_id_tmp_index = total_temps++;
5709    }
5710 
5711    if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) {
5712       if (emit->vposition.need_prescale || emit->key.vs.undo_viewport ||
5713           emit->key.clip_plane_enable ||
5714           emit->vposition.so_index != INVALID_INDEX) {
5715          emit->vposition.tmp_index = total_temps;
5716          total_temps += 1;
5717       }
5718 
5719       if (emit->vposition.need_prescale) {
5720          emit->vposition.prescale_scale_index = total_temps++;
5721          emit->vposition.prescale_trans_index = total_temps++;
5722       }
5723 
5724       if (emit->unit == PIPE_SHADER_VERTEX) {
5725          unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 |
5726                                  emit->key.vs.adjust_attrib_itof |
5727                                  emit->key.vs.adjust_attrib_utof |
5728                                  emit->key.vs.attrib_is_bgra |
5729                                  emit->key.vs.attrib_puint_to_snorm |
5730                                  emit->key.vs.attrib_puint_to_uscaled |
5731                                  emit->key.vs.attrib_puint_to_sscaled);
5732          while (attrib_mask) {
5733             unsigned index = u_bit_scan(&attrib_mask);
5734             emit->vs.adjusted_input[index] = total_temps++;
5735          }
5736       }
5737       else if (emit->unit == PIPE_SHADER_GEOMETRY) {
5738          if (emit->key.gs.writes_viewport_index)
5739             emit->gs.viewport_index_tmp_index = total_temps++;
5740       }
5741    }
5742    else if (emit->unit == PIPE_SHADER_FRAGMENT) {
5743       if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS ||
5744           emit->key.fs.write_color0_to_n_cbufs > 1) {
5745          /* Allocate a temp to hold the output color */
5746          emit->fs.color_tmp_index = total_temps;
5747          total_temps += 1;
5748       }
5749 
5750       if (emit->fs.face_input_index != INVALID_INDEX) {
5751          /* Allocate a temp for the +/-1 face register */
5752          emit->fs.face_tmp_index = total_temps;
5753          total_temps += 1;
5754       }
5755 
5756       if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
5757          /* Allocate a temp for modified fragment position register */
5758          emit->fs.fragcoord_tmp_index = total_temps;
5759          total_temps += 1;
5760       }
5761 
5762       if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
5763          /* Allocate a temp for the sample position */
5764          emit->fs.sample_pos_tmp_index = total_temps++;
5765       }
5766    }
5767    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
5768       if (emit->vposition.need_prescale) {
5769          emit->vposition.tmp_index = total_temps++;
5770          emit->vposition.prescale_scale_index = total_temps++;
5771          emit->vposition.prescale_trans_index = total_temps++;
5772       }
5773 
5774       if (emit->tes.inner.tgsi_index) {
5775          emit->tes.inner.temp_index = total_temps;
5776          total_temps += 1;
5777       }
5778 
5779       if (emit->tes.outer.tgsi_index) {
5780          emit->tes.outer.temp_index = total_temps;
5781          total_temps += 1;
5782       }
5783    }
5784    else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
5785       if (emit->tcs.inner.tgsi_index != INVALID_INDEX) {
5786          if (!emit->tcs.control_point_phase) {
5787             emit->tcs.inner.temp_index = total_temps;
5788             total_temps += 1;
5789          }
5790       }
5791       if (emit->tcs.outer.tgsi_index != INVALID_INDEX) {
5792          if (!emit->tcs.control_point_phase) {
5793             emit->tcs.outer.temp_index = total_temps;
5794             total_temps += 1;
5795          }
5796       }
5797 
5798       if (emit->tcs.control_point_phase &&
5799           emit->info.reads_pervertex_outputs) {
5800          emit->tcs.control_point_tmp_index = total_temps;
5801          total_temps += emit->tcs.control_point_out_count;
5802       }
5803       else if (!emit->tcs.control_point_phase &&
5804                emit->info.reads_perpatch_outputs) {
5805 
5806          /* If there is indirect access to the patch constant outputs
5807           * in the control point phase, then an indexable temporary array
5808           * will be created for these patch constant outputs.
5809           * Note, indirect access can only be applicable to
5810           * patch constant outputs in the control point phase.
5811           */
5812          if (emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
5813             unsigned arrayID =
5814                emit->num_temp_arrays ? emit->num_temp_arrays : 1;
5815             create_temp_array(emit, arrayID, 0,
5816                               emit->tcs.patch_generic_out_count, total_temps);
5817          }
5818          emit->tcs.patch_generic_tmp_index = total_temps;
5819          total_temps += emit->tcs.patch_generic_out_count;
5820       }
5821 
5822       emit->tcs.invocation_id_tmp_index = total_temps++;
5823    }
5824 
5825    if (emit->raw_bufs) {
5826       /**
5827        * Add 3 more temporaries if we need to translate constant buffer
5828        * to srv raw buffer. Since we need to load the value to a temporary
5829        * before it can be used as a source. There could be three source
5830        * register in an instruction.
5831        */
5832       emit->raw_buf_tmp_index = total_temps;
5833       total_temps+=3;
5834    }
5835 
5836    for (i = 0; i < emit->num_address_regs; i++) {
5837       emit->address_reg_index[i] = total_temps++;
5838    }
5839 
5840    /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10
5841     * temp indexes.  Basically, we compact all the non-array temp register
5842     * indexes into a consecutive series.
5843     *
5844     * Before, we may have some TGSI declarations like:
5845     *   DCL TEMP[0..1], LOCAL
5846     *   DCL TEMP[2..4], ARRAY(1), LOCAL
5847     *   DCL TEMP[5..7], ARRAY(2), LOCAL
5848     *   plus, some extra temps, like TEMP[8], TEMP[9] for misc things
5849     *
5850     * After, we'll have a map like this:
5851     *   temp_map[0] = { array 0, index 0 }
5852     *   temp_map[1] = { array 0, index 1 }
5853     *   temp_map[2] = { array 1, index 0 }
5854     *   temp_map[3] = { array 1, index 1 }
5855     *   temp_map[4] = { array 1, index 2 }
5856     *   temp_map[5] = { array 2, index 0 }
5857     *   temp_map[6] = { array 2, index 1 }
5858     *   temp_map[7] = { array 2, index 2 }
5859     *   temp_map[8] = { array 0, index 2 }
5860     *   temp_map[9] = { array 0, index 3 }
5861     *
5862     * We'll declare two arrays of 3 elements, plus a set of four non-indexed
5863     * temps numbered 0..3
5864     *
5865     * Any time we emit a temporary register index, we'll have to use the
5866     * temp_map[] table to convert the TGSI index to the VGPU10 index.
5867     *
5868     * Finally, we recompute the total_temps value here.
5869     */
5870    reg = 0;
5871    for (i = 0; i < total_temps; i++) {
5872       if (emit->temp_map[i].arrayId == 0) {
5873          emit->temp_map[i].index = reg++;
5874       }
5875    }
5876 
5877    if (0) {
5878       debug_printf("total_temps %u\n", total_temps);
5879       for (i = 0; i < total_temps; i++) {
5880          debug_printf("temp %u ->  array %u  index %u\n",
5881                       i, emit->temp_map[i].arrayId, emit->temp_map[i].index);
5882       }
5883    }
5884 
5885    total_temps = reg;
5886 
5887    /* Emit declaration of ordinary temp registers */
5888    if (total_temps > 0) {
5889       VGPU10OpcodeToken0 opcode0;
5890 
5891       opcode0.value = 0;
5892       opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS;
5893 
5894       begin_emit_instruction(emit);
5895       emit_dword(emit, opcode0.value);
5896       emit_dword(emit, total_temps);
5897       end_emit_instruction(emit);
5898    }
5899 
5900    /* Emit declarations for indexable temp arrays.  Skip 0th entry since
5901     * it's unused.
5902     */
5903    for (i = 1; i < emit->num_temp_arrays; i++) {
5904       unsigned num_temps = emit->temp_arrays[i].size;
5905 
5906       if (num_temps > 0) {
5907          VGPU10OpcodeToken0 opcode0;
5908 
5909          opcode0.value = 0;
5910          opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP;
5911 
5912          begin_emit_instruction(emit);
5913          emit_dword(emit, opcode0.value);
5914          emit_dword(emit, i); /* which array */
5915          emit_dword(emit, num_temps);
5916          emit_dword(emit, 4); /* num components */
5917          end_emit_instruction(emit);
5918 
5919          total_temps += num_temps;
5920       }
5921    }
5922 
5923    /* Check that the grand total of all regular and indexed temps is
5924     * under the limit.
5925     */
5926    check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1);
5927 
5928    return true;
5929 }
5930 
5931 
5932 static bool
emit_rawbuf_declaration(struct svga_shader_emitter_v10 * emit,unsigned index)5933 emit_rawbuf_declaration(struct svga_shader_emitter_v10 *emit,
5934                         unsigned index)
5935 {
5936    VGPU10OpcodeToken0 opcode1;
5937    VGPU10OperandToken0 operand1;
5938 
5939    opcode1.value = 0;
5940    opcode1.opcodeType = VGPU10_OPCODE_DCL_RESOURCE_RAW;
5941    opcode1.resourceDimension = VGPU10_RESOURCE_DIMENSION_UNKNOWN;
5942 
5943    operand1.value = 0;
5944    operand1.numComponents = VGPU10_OPERAND_0_COMPONENT;
5945    operand1.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
5946    operand1.indexDimension = VGPU10_OPERAND_INDEX_1D;
5947    operand1.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5948 
5949    begin_emit_instruction(emit);
5950    emit_dword(emit, opcode1.value);
5951    emit_dword(emit, operand1.value);
5952    emit_dword(emit, index);
5953    end_emit_instruction(emit);
5954 
5955    return true;
5956 }
5957 
5958 
5959 static bool
emit_constant_declaration(struct svga_shader_emitter_v10 * emit)5960 emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
5961 {
5962    VGPU10OpcodeToken0 opcode0;
5963    VGPU10OperandToken0 operand0;
5964    unsigned total_consts, i;
5965 
5966    opcode0.value = 0;
5967    opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER;
5968    opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED;
5969    /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */
5970 
5971    operand0.value = 0;
5972    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
5973    operand0.indexDimension = VGPU10_OPERAND_INDEX_2D;
5974    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5975    operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5976    operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
5977    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
5978    operand0.swizzleX = 0;
5979    operand0.swizzleY = 1;
5980    operand0.swizzleZ = 2;
5981    operand0.swizzleW = 3;
5982 
5983    /**
5984     * Emit declaration for constant buffer [0].  We also allocate
5985     * room for the extra constants here.
5986     */
5987    total_consts = emit->num_shader_consts[0];
5988 
5989    /* Now, allocate constant slots for the "extra" constants.
5990     * Note: it's critical that these extra constant locations
5991     * exactly match what's emitted by the "extra" constants code
5992     * in svga_state_constants.c
5993     */
5994 
5995    /* Vertex position scale/translation */
5996    if (emit->vposition.need_prescale) {
5997       emit->vposition.prescale_cbuf_index = total_consts;
5998       total_consts += (2 * emit->vposition.num_prescale);
5999    }
6000 
6001    if (emit->unit == PIPE_SHADER_VERTEX) {
6002       if (emit->key.vs.undo_viewport) {
6003          emit->vs.viewport_index = total_consts++;
6004       }
6005       if (emit->key.vs.need_vertex_id_bias) {
6006          emit->vs.vertex_id_bias_index = total_consts++;
6007       }
6008    }
6009 
6010    /* user-defined clip planes */
6011    if (emit->key.clip_plane_enable) {
6012       unsigned n = util_bitcount(emit->key.clip_plane_enable);
6013       assert(emit->unit != PIPE_SHADER_FRAGMENT &&
6014              emit->unit != PIPE_SHADER_COMPUTE);
6015       for (i = 0; i < n; i++) {
6016          emit->clip_plane_const[i] = total_consts++;
6017       }
6018    }
6019 
6020    for (i = 0; i < emit->num_samplers; i++) {
6021 
6022       if (emit->key.tex[i].sampler_view) {
6023          /* Texcoord scale factors for RECT textures */
6024          if (emit->key.tex[i].unnormalized) {
6025             emit->texcoord_scale_index[i] = total_consts++;
6026          }
6027 
6028          /* Texture buffer sizes */
6029          if (emit->key.tex[i].target == PIPE_BUFFER) {
6030             emit->texture_buffer_size_index[i] = total_consts++;
6031          }
6032       }
6033    }
6034    if (emit->key.image_size_used) {
6035       emit->image_size_index = total_consts;
6036       total_consts += emit->num_images;
6037    }
6038 
6039    if (total_consts > 0) {
6040       if (total_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
6041          debug_printf("Warning: Too many constants [%u] declared in constant"
6042                       " buffer 0. %u is the limit.\n",
6043                       total_consts,
6044                       VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
6045          total_consts = VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT;
6046          emit->register_overflow = true;
6047       }
6048       begin_emit_instruction(emit);
6049       emit_dword(emit, opcode0.value);
6050       emit_dword(emit, operand0.value);
6051       emit_dword(emit, 0);  /* which const buffer slot */
6052       emit_dword(emit, total_consts);
6053       end_emit_instruction(emit);
6054    }
6055 
6056    /* Declare remaining constant buffers (UBOs) */
6057 
6058    for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) {
6059       if (emit->num_shader_consts[i] > 0) {
6060          if (emit->raw_bufs & (1 << i)) {
6061             /* UBO declared as srv raw buffer */
6062             emit_rawbuf_declaration(emit, i + emit->raw_buf_srv_start_index);
6063          }
6064          else {
6065 
6066             /* UBO declared as const buffer */
6067             begin_emit_instruction(emit);
6068             emit_dword(emit, opcode0.value);
6069             emit_dword(emit, operand0.value);
6070             emit_dword(emit, i);  /* which const buffer slot */
6071             emit_dword(emit, emit->num_shader_consts[i]);
6072             end_emit_instruction(emit);
6073          }
6074       }
6075    }
6076 
6077    return true;
6078 }
6079 
6080 
6081 /**
6082  * Emit declarations for samplers.
6083  */
6084 static bool
emit_sampler_declarations(struct svga_shader_emitter_v10 * emit)6085 emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
6086 {
6087    unsigned i;
6088 
6089    for (i = 0; i < emit->key.num_samplers; i++) {
6090 
6091       VGPU10OpcodeToken0 opcode0;
6092       VGPU10OperandToken0 operand0;
6093 
6094       opcode0.value = 0;
6095       opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER;
6096       opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT;
6097 
6098       operand0.value = 0;
6099       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6100       operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
6101       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6102       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6103 
6104       begin_emit_instruction(emit);
6105       emit_dword(emit, opcode0.value);
6106       emit_dword(emit, operand0.value);
6107       emit_dword(emit, i);
6108       end_emit_instruction(emit);
6109    }
6110 
6111    return true;
6112 }
6113 
6114 
6115 /**
6116  * Translate PIPE_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x.
6117  */
6118 static unsigned
pipe_texture_to_resource_dimension(enum tgsi_texture_type target,unsigned num_samples,bool is_array,bool is_uav)6119 pipe_texture_to_resource_dimension(enum tgsi_texture_type target,
6120                                    unsigned num_samples,
6121                                    bool is_array,
6122                                    bool is_uav)
6123 {
6124    switch (target) {
6125    case PIPE_BUFFER:
6126       return VGPU10_RESOURCE_DIMENSION_BUFFER;
6127    case PIPE_TEXTURE_1D:
6128       return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6129    case PIPE_TEXTURE_2D:
6130       return num_samples > 2 ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS :
6131          VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6132    case PIPE_TEXTURE_RECT:
6133       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6134    case PIPE_TEXTURE_3D:
6135       return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
6136    case PIPE_TEXTURE_CUBE:
6137       return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
6138    case PIPE_TEXTURE_1D_ARRAY:
6139       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
6140          : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6141    case PIPE_TEXTURE_2D_ARRAY:
6142       if (num_samples > 2 && is_array)
6143          return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY;
6144       else if (is_array)
6145          return VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY;
6146       else
6147          return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6148    case PIPE_TEXTURE_CUBE_ARRAY:
6149       return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY :
6150              (is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY :
6151                          VGPU10_RESOURCE_DIMENSION_TEXTURECUBE);
6152    default:
6153       assert(!"Unexpected resource type");
6154       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6155    }
6156 }
6157 
6158 
6159 /**
6160  * Translate TGSI_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x.
6161  */
6162 static unsigned
tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,unsigned num_samples,bool is_array,bool is_uav)6163 tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,
6164                                    unsigned num_samples,
6165                                    bool is_array,
6166                                    bool is_uav)
6167 {
6168    if (target == TGSI_TEXTURE_2D_MSAA && num_samples < 2) {
6169       target = TGSI_TEXTURE_2D;
6170    }
6171    else if (target == TGSI_TEXTURE_2D_ARRAY_MSAA && num_samples < 2) {
6172       target = TGSI_TEXTURE_2D_ARRAY;
6173    }
6174 
6175    switch (target) {
6176    case TGSI_TEXTURE_BUFFER:
6177       return VGPU10_RESOURCE_DIMENSION_BUFFER;
6178    case TGSI_TEXTURE_1D:
6179       return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6180    case TGSI_TEXTURE_2D:
6181    case TGSI_TEXTURE_RECT:
6182       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6183    case TGSI_TEXTURE_3D:
6184       return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
6185    case TGSI_TEXTURE_CUBE:
6186    case TGSI_TEXTURE_SHADOWCUBE:
6187       return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY :
6188                       VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
6189    case TGSI_TEXTURE_SHADOW1D:
6190       return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6191    case TGSI_TEXTURE_SHADOW2D:
6192    case TGSI_TEXTURE_SHADOWRECT:
6193       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6194    case TGSI_TEXTURE_1D_ARRAY:
6195    case TGSI_TEXTURE_SHADOW1D_ARRAY:
6196       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
6197          : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6198    case TGSI_TEXTURE_2D_ARRAY:
6199    case TGSI_TEXTURE_SHADOW2D_ARRAY:
6200       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY
6201          : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6202    case TGSI_TEXTURE_2D_MSAA:
6203       return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
6204    case TGSI_TEXTURE_2D_ARRAY_MSAA:
6205       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
6206          : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
6207    case TGSI_TEXTURE_CUBE_ARRAY:
6208       return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY :
6209              (is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY :
6210                          VGPU10_RESOURCE_DIMENSION_TEXTURECUBE);
6211    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
6212       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY
6213          : VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
6214    default:
6215       assert(!"Unexpected resource type");
6216       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6217    }
6218 }
6219 
6220 
6221 /**
6222  * Given a tgsi_return_type, return true iff it is an integer type.
6223  */
6224 static bool
is_integer_type(enum tgsi_return_type type)6225 is_integer_type(enum tgsi_return_type type)
6226 {
6227    switch (type) {
6228       case TGSI_RETURN_TYPE_SINT:
6229       case TGSI_RETURN_TYPE_UINT:
6230          return true;
6231       case TGSI_RETURN_TYPE_FLOAT:
6232       case TGSI_RETURN_TYPE_UNORM:
6233       case TGSI_RETURN_TYPE_SNORM:
6234          return false;
6235       case TGSI_RETURN_TYPE_COUNT:
6236       default:
6237          assert(!"is_integer_type: Unknown tgsi_return_type");
6238          return false;
6239    }
6240 }
6241 
6242 
6243 /**
6244  * Emit declarations for resources.
6245  * XXX When we're sure that all TGSI shaders will be generated with
6246  * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may
6247  * rework this code.
6248  */
6249 static bool
emit_resource_declarations(struct svga_shader_emitter_v10 * emit)6250 emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
6251 {
6252    unsigned i;
6253 
6254    /* Emit resource decl for each sampler */
6255    for (i = 0; i < emit->num_samplers; i++) {
6256       if (!(emit->info.samplers_declared & (1 << i)))
6257          continue;
6258 
6259       VGPU10OpcodeToken0 opcode0;
6260       VGPU10OperandToken0 operand0;
6261       VGPU10ResourceReturnTypeToken return_type;
6262       VGPU10_RESOURCE_RETURN_TYPE rt;
6263 
6264       opcode0.value = 0;
6265       opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;
6266       if (emit->sampler_view[i] || !emit->key.tex[i].sampler_view) {
6267          opcode0.resourceDimension =
6268             tgsi_texture_to_resource_dimension(emit->sampler_target[i],
6269                                                emit->key.tex[i].num_samples,
6270                                                emit->key.tex[i].is_array,
6271                                                false);
6272       }
6273       else {
6274          opcode0.resourceDimension =
6275             pipe_texture_to_resource_dimension(emit->key.tex[i].target,
6276                                                emit->key.tex[i].num_samples,
6277                                                emit->key.tex[i].is_array,
6278                                                false);
6279       }
6280       opcode0.sampleCount = emit->key.tex[i].num_samples;
6281       operand0.value = 0;
6282       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6283       operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
6284       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6285       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6286 
6287 #if 1
6288       /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */
6289       STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1);
6290       STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1);
6291       STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1);
6292       STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1);
6293       STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1);
6294       assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT);
6295       if (emit->sampler_view[i] || !emit->key.tex[i].sampler_view) {
6296          rt = emit->sampler_return_type[i] + 1;
6297       }
6298       else {
6299          rt = emit->key.tex[i].sampler_return_type;
6300       }
6301 #else
6302       switch (emit->sampler_return_type[i]) {
6303          case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break;
6304          case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break;
6305          case TGSI_RETURN_TYPE_SINT:  rt = VGPU10_RETURN_TYPE_SINT;  break;
6306          case TGSI_RETURN_TYPE_UINT:  rt = VGPU10_RETURN_TYPE_UINT;  break;
6307          case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break;
6308          case TGSI_RETURN_TYPE_COUNT:
6309          default:
6310             rt = VGPU10_RETURN_TYPE_FLOAT;
6311             assert(!"emit_resource_declarations: Unknown tgsi_return_type");
6312       }
6313 #endif
6314 
6315       return_type.value = 0;
6316       return_type.component0 = rt;
6317       return_type.component1 = rt;
6318       return_type.component2 = rt;
6319       return_type.component3 = rt;
6320 
6321       begin_emit_instruction(emit);
6322       emit_dword(emit, opcode0.value);
6323       emit_dword(emit, operand0.value);
6324       emit_dword(emit, i);
6325       emit_dword(emit, return_type.value);
6326       end_emit_instruction(emit);
6327    }
6328 
6329    return true;
6330 }
6331 
6332 
6333 /**
6334  * Emit instruction to declare uav for the shader image
6335  */
6336 static void
emit_image_declarations(struct svga_shader_emitter_v10 * emit)6337 emit_image_declarations(struct svga_shader_emitter_v10 *emit)
6338 {
6339    unsigned i = 0;
6340    unsigned unit = 0;
6341    unsigned uav_mask = 0;
6342 
6343    /* Emit uav decl for each image */
6344    for (i = 0; i < emit->num_images; i++, unit++) {
6345 
6346       /* Find the unit index of the next declared image.
6347        */
6348       while (!(emit->image_mask & (1 << unit))) {
6349          unit++;
6350       }
6351 
6352       VGPU10OpcodeToken0 opcode0;
6353       VGPU10OperandToken0 operand0;
6354       VGPU10ResourceReturnTypeToken return_type;
6355 
6356       /* If the corresponding uav for the image is already declared,
6357        * skip this image declaration.
6358        */
6359       if (uav_mask & (1 << emit->key.images[unit].uav_index))
6360          continue;
6361 
6362       opcode0.value = 0;
6363       opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_TYPED;
6364       opcode0.uavResourceDimension =
6365          tgsi_texture_to_resource_dimension(emit->image[unit].Resource,
6366                                             0, emit->key.images[unit].is_array,
6367                                             true);
6368 
6369       if (emit->key.images[unit].is_single_layer &&
6370           emit->key.images[unit].resource_target == PIPE_TEXTURE_3D) {
6371          opcode0.uavResourceDimension = VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
6372       }
6373 
6374       /* Declare the uav as global coherent if the shader includes memory
6375        * barrier instructions.
6376        */
6377       opcode0.globallyCoherent =
6378          (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
6379 
6380       operand0.value = 0;
6381       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6382       operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
6383       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6384       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6385 
6386       return_type.value = 0;
6387       return_type.component0 =
6388          return_type.component1 =
6389          return_type.component2 =
6390          return_type.component3 = emit->key.images[unit].return_type + 1;
6391 
6392       assert(emit->key.images[unit].uav_index != SVGA3D_INVALID_ID);
6393       begin_emit_instruction(emit);
6394       emit_dword(emit, opcode0.value);
6395       emit_dword(emit, operand0.value);
6396       emit_dword(emit, emit->key.images[unit].uav_index);
6397       emit_dword(emit, return_type.value);
6398       end_emit_instruction(emit);
6399 
6400       /* Mark the uav is already declared */
6401       uav_mask |= 1 << emit->key.images[unit].uav_index;
6402    }
6403 
6404    emit->uav_declared |= uav_mask;
6405 }
6406 
6407 
6408 /**
6409  * Emit instruction to declare uav for the shader buffer
6410  */
6411 static void
emit_shader_buf_declarations(struct svga_shader_emitter_v10 * emit)6412 emit_shader_buf_declarations(struct svga_shader_emitter_v10 *emit)
6413 {
6414    unsigned i;
6415    unsigned uav_mask = 0;
6416 
6417    /* Emit uav decl for each shader buffer */
6418    for (i = 0; i < emit->num_shader_bufs; i++) {
6419       VGPU10OpcodeToken0 opcode0;
6420       VGPU10OperandToken0 operand0;
6421 
6422       if (emit->raw_shaderbufs & (1 << i)) {
6423          emit_rawbuf_declaration(emit, i + emit->raw_shaderbuf_srv_start_index);
6424          continue;
6425       }
6426 
6427       /* If the corresponding uav for the shader buf is already declared,
6428        * skip this shader buffer declaration.
6429        */
6430       if (uav_mask & (1 << emit->key.shader_buf_uav_index[i]))
6431          continue;
6432 
6433       opcode0.value = 0;
6434       opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_RAW;
6435 
6436       /* Declare the uav as global coherent if the shader includes memory
6437        * barrier instructions.
6438        */
6439       opcode0.globallyCoherent =
6440          (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
6441 
6442       operand0.value = 0;
6443       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6444       operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
6445       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6446       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6447 
6448       assert(emit->key.shader_buf_uav_index[i] != SVGA3D_INVALID_ID);
6449       begin_emit_instruction(emit);
6450       emit_dword(emit, opcode0.value);
6451       emit_dword(emit, operand0.value);
6452       emit_dword(emit, emit->key.shader_buf_uav_index[i]);
6453       end_emit_instruction(emit);
6454 
6455       /* Mark the uav is already declared */
6456       uav_mask |= 1 << emit->key.shader_buf_uav_index[i];
6457    }
6458 
6459    emit->uav_declared |= uav_mask;
6460 }
6461 
6462 
6463 /**
6464  * Emit instruction to declare thread group shared memory(tgsm) for shared memory
6465  */
6466 static void
emit_memory_declarations(struct svga_shader_emitter_v10 * emit)6467 emit_memory_declarations(struct svga_shader_emitter_v10 *emit)
6468 {
6469    if (emit->cs.shared_memory_declared) {
6470       VGPU10OpcodeToken0 opcode0;
6471       VGPU10OperandToken0 operand0;
6472 
6473       opcode0.value = 0;
6474       opcode0.opcodeType = VGPU10_OPCODE_DCL_TGSM_RAW;
6475 
6476       /* Declare the uav as global coherent if the shader includes memory
6477        * barrier instructions.
6478        */
6479       opcode0.globallyCoherent =
6480          (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
6481 
6482       operand0.value = 0;
6483       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6484       operand0.operandType = VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY;
6485       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6486       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6487 
6488       begin_emit_instruction(emit);
6489       emit_dword(emit, opcode0.value);
6490       emit_dword(emit, operand0.value);
6491 
6492       /* Current state tracker only declares one shared memory for GLSL.
6493        * Use index 0 for this shared memory.
6494        */
6495       emit_dword(emit, 0);
6496       emit_dword(emit, emit->key.cs.mem_size); /* byte Count */
6497       end_emit_instruction(emit);
6498    }
6499 }
6500 
6501 
6502 /**
6503  * Emit instruction to declare uav for atomic buffers
6504  */
6505 static void
emit_atomic_buf_declarations(struct svga_shader_emitter_v10 * emit)6506 emit_atomic_buf_declarations(struct svga_shader_emitter_v10 *emit)
6507 {
6508    unsigned atomic_bufs_mask = emit->atomic_bufs_mask;
6509    unsigned uav_mask = 0;
6510 
6511    /* Emit uav decl for each atomic buffer */
6512    while (atomic_bufs_mask) {
6513       unsigned buf_index = u_bit_scan(&atomic_bufs_mask);
6514       unsigned uav_index = emit->key.atomic_buf_uav_index[buf_index];
6515 
6516       /* If the corresponding uav for the shader buf is already declared,
6517        * skip this shader buffer declaration.
6518        */
6519       if (uav_mask & (1 << uav_index))
6520          continue;
6521 
6522       VGPU10OpcodeToken0 opcode0;
6523       VGPU10OperandToken0 operand0;
6524 
6525       assert(uav_index != SVGA3D_INVALID_ID);
6526 
6527       opcode0.value = 0;
6528       opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_RAW;
6529       opcode0.uavResourceDimension = VGPU10_RESOURCE_DIMENSION_BUFFER;
6530 
6531       /* Declare the uav as global coherent if the shader includes memory
6532        * barrier instructions.
6533        */
6534       opcode0.globallyCoherent =
6535          (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
6536       opcode0.uavHasCounter = 1;
6537 
6538       operand0.value = 0;
6539       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6540       operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
6541       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6542       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6543 
6544       begin_emit_instruction(emit);
6545       emit_dword(emit, opcode0.value);
6546       emit_dword(emit, operand0.value);
6547       emit_dword(emit, uav_index);
6548       end_emit_instruction(emit);
6549 
6550       /* Mark the uav is already declared */
6551       uav_mask |= 1 << uav_index;
6552    }
6553 
6554    emit->uav_declared |= uav_mask;
6555 
6556    /* Allocate immediates to be used for index to the atomic buffers */
6557    unsigned j = 0;
6558    for (unsigned i = 0; i <= emit->num_atomic_bufs / 4; i++, j+=4) {
6559       alloc_immediate_int4(emit, j+0, j+1, j+2, j+3);
6560    }
6561 
6562    /* Allocate immediates for the atomic counter index */
6563    for (; j <= emit->max_atomic_counter_index; j+=4) {
6564       alloc_immediate_int4(emit, j+0, j+1, j+2, j+3);
6565    }
6566 }
6567 
6568 
6569 /**
6570  * Emit instruction with n=1, 2 or 3 source registers.
6571  */
6572 static void
emit_instruction_opn(struct svga_shader_emitter_v10 * emit,unsigned opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src1,const struct tgsi_full_src_register * src2,const struct tgsi_full_src_register * src3,bool saturate,bool precise)6573 emit_instruction_opn(struct svga_shader_emitter_v10 *emit,
6574                      unsigned opcode,
6575                      const struct tgsi_full_dst_register *dst,
6576                      const struct tgsi_full_src_register *src1,
6577                      const struct tgsi_full_src_register *src2,
6578                      const struct tgsi_full_src_register *src3,
6579                      bool saturate, bool precise)
6580 {
6581    begin_emit_instruction(emit);
6582    emit_opcode_precise(emit, opcode, saturate, precise);
6583    emit_dst_register(emit, dst);
6584    emit_src_register(emit, src1);
6585    if (src2) {
6586       emit_src_register(emit, src2);
6587    }
6588    if (src3) {
6589       emit_src_register(emit, src3);
6590    }
6591    end_emit_instruction(emit);
6592 }
6593 
6594 static void
emit_instruction_op1(struct svga_shader_emitter_v10 * emit,unsigned opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6595 emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
6596                      unsigned opcode,
6597                      const struct tgsi_full_dst_register *dst,
6598                      const struct tgsi_full_src_register *src)
6599 {
6600    emit_instruction_opn(emit, opcode, dst, src, NULL, NULL, false, false);
6601 }
6602 
6603 static void
emit_instruction_op2(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src1,const struct tgsi_full_src_register * src2)6604 emit_instruction_op2(struct svga_shader_emitter_v10 *emit,
6605                      VGPU10_OPCODE_TYPE opcode,
6606                      const struct tgsi_full_dst_register *dst,
6607                      const struct tgsi_full_src_register *src1,
6608                      const struct tgsi_full_src_register *src2)
6609 {
6610    emit_instruction_opn(emit, opcode, dst, src1, src2, NULL, false, false);
6611 }
6612 
6613 static void
emit_instruction_op3(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src1,const struct tgsi_full_src_register * src2,const struct tgsi_full_src_register * src3)6614 emit_instruction_op3(struct svga_shader_emitter_v10 *emit,
6615                      VGPU10_OPCODE_TYPE opcode,
6616                      const struct tgsi_full_dst_register *dst,
6617                      const struct tgsi_full_src_register *src1,
6618                      const struct tgsi_full_src_register *src2,
6619                      const struct tgsi_full_src_register *src3)
6620 {
6621    emit_instruction_opn(emit, opcode, dst, src1, src2, src3, false, false);
6622 }
6623 
6624 static void
emit_instruction_op0(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcode)6625 emit_instruction_op0(struct svga_shader_emitter_v10 *emit,
6626                      VGPU10_OPCODE_TYPE opcode)
6627 {
6628    begin_emit_instruction(emit);
6629    emit_opcode(emit, opcode, false);
6630    end_emit_instruction(emit);
6631 }
6632 
6633 /**
6634  * Tessellation inner/outer levels needs to be store into its
6635  * appropriate registers depending on prim_mode.
6636  */
6637 static void
store_tesslevels(struct svga_shader_emitter_v10 * emit)6638 store_tesslevels(struct svga_shader_emitter_v10 *emit)
6639 {
6640    int i;
6641 
6642    /* tessellation levels are required input/out in hull shader.
6643     * emitting the inner/outer tessellation levels, either from
6644     * values provided in tcs or fallback default values which is 1.0
6645     */
6646    if (emit->key.tcs.prim_mode == MESA_PRIM_QUADS) {
6647       struct tgsi_full_src_register temp_src;
6648 
6649       if (emit->tcs.inner.tgsi_index != INVALID_INDEX)
6650          temp_src = make_src_temp_reg(emit->tcs.inner.temp_index);
6651       else
6652          temp_src = make_immediate_reg_float(emit, 1.0f);
6653 
6654       for (i = 0; i < 2; i++) {
6655          struct tgsi_full_src_register src =
6656             scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6657          struct tgsi_full_dst_register dst =
6658             make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index + i);
6659          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6660          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6661       }
6662 
6663       if (emit->tcs.outer.tgsi_index != INVALID_INDEX)
6664          temp_src = make_src_temp_reg(emit->tcs.outer.temp_index);
6665       else
6666          temp_src = make_immediate_reg_float(emit, 1.0f);
6667 
6668       for (i = 0; i < 4; i++) {
6669          struct tgsi_full_src_register src =
6670             scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6671          struct tgsi_full_dst_register dst =
6672             make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i);
6673          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6674          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6675       }
6676    }
6677    else if (emit->key.tcs.prim_mode == MESA_PRIM_TRIANGLES) {
6678       struct tgsi_full_src_register temp_src;
6679 
6680       if (emit->tcs.inner.tgsi_index != INVALID_INDEX)
6681          temp_src = make_src_temp_reg(emit->tcs.inner.temp_index);
6682       else
6683          temp_src = make_immediate_reg_float(emit, 1.0f);
6684 
6685       struct tgsi_full_src_register src =
6686          scalar_src(&temp_src, TGSI_SWIZZLE_X);
6687       struct tgsi_full_dst_register dst =
6688          make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index);
6689       dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6690       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6691 
6692       if (emit->tcs.outer.tgsi_index != INVALID_INDEX)
6693          temp_src = make_src_temp_reg(emit->tcs.outer.temp_index);
6694       else
6695          temp_src = make_immediate_reg_float(emit, 1.0f);
6696 
6697       for (i = 0; i < 3; i++) {
6698          struct tgsi_full_src_register src =
6699             scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6700          struct tgsi_full_dst_register dst =
6701             make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i);
6702          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6703          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6704       }
6705    }
6706    else if (emit->key.tcs.prim_mode ==  MESA_PRIM_LINES) {
6707       if (emit->tcs.outer.tgsi_index != INVALID_INDEX) {
6708          struct tgsi_full_src_register temp_src =
6709             make_src_temp_reg(emit->tcs.outer.temp_index);
6710          for (i = 0; i < 2; i++) {
6711             struct tgsi_full_src_register src =
6712                scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6713             struct tgsi_full_dst_register dst =
6714                make_dst_reg(TGSI_FILE_OUTPUT,
6715                             emit->tcs.outer.out_index + i);
6716             dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6717             emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6718          }
6719       }
6720    }
6721    else {
6722       debug_printf("Unsupported primitive type");
6723    }
6724 }
6725 
6726 
6727 /**
6728  * Emit the actual clip distance instructions to be used for clipping
6729  * by copying the clip distance from the temporary registers to the
6730  * CLIPDIST registers written with the enabled planes mask.
6731  * Also copy the clip distance from the temporary to the clip distance
6732  * shadow copy register which will be referenced by the input shader
6733  */
6734 static void
emit_clip_distance_instructions(struct svga_shader_emitter_v10 * emit)6735 emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
6736 {
6737    struct tgsi_full_src_register tmp_clip_dist_src;
6738    struct tgsi_full_dst_register clip_dist_dst;
6739 
6740    unsigned i;
6741    unsigned clip_plane_enable = emit->key.clip_plane_enable;
6742    unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index;
6743    int num_written_clipdist = emit->info.num_written_clipdistance;
6744 
6745    assert(emit->clip_dist_out_index != INVALID_INDEX);
6746    assert(emit->clip_dist_tmp_index != INVALID_INDEX);
6747 
6748    /**
6749     * Temporary reset the temporary clip dist register index so
6750     * that the copy to the real clip dist register will not
6751     * attempt to copy to the temporary register again
6752     */
6753    emit->clip_dist_tmp_index = INVALID_INDEX;
6754 
6755    for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) {
6756 
6757       tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i);
6758 
6759       /**
6760        * copy to the shadow copy for use by varying variable and
6761        * stream output. All clip distances
6762        * will be written regardless of the enabled clipping planes.
6763        */
6764       clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
6765                                    emit->clip_dist_so_index + i);
6766 
6767       /* MOV clip_dist_so, tmp_clip_dist */
6768       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
6769                            &tmp_clip_dist_src);
6770 
6771       /**
6772        * copy those clip distances to enabled clipping planes
6773        * to CLIPDIST registers for clipping
6774        */
6775       if (clip_plane_enable & 0xf) {
6776          clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
6777                                       emit->clip_dist_out_index + i);
6778          clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf);
6779 
6780          /* MOV CLIPDIST, tmp_clip_dist */
6781          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
6782                               &tmp_clip_dist_src);
6783       }
6784       /* four clip planes per clip register */
6785       clip_plane_enable >>= 4;
6786    }
6787    /**
6788     * set the temporary clip dist register index back to the
6789     * temporary index for the next vertex
6790     */
6791    emit->clip_dist_tmp_index = clip_dist_tmp_index;
6792 }
6793 
6794 /* Declare clip distance output registers for user-defined clip planes
6795  * or the TGSI_CLIPVERTEX output.
6796  */
6797 static void
emit_clip_distance_declarations(struct svga_shader_emitter_v10 * emit)6798 emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit)
6799 {
6800    unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
6801    unsigned index = emit->num_outputs;
6802    unsigned plane_mask;
6803 
6804    assert(emit->unit != PIPE_SHADER_FRAGMENT);
6805    assert(num_clip_planes <= 8);
6806 
6807    if (emit->clip_mode != CLIP_LEGACY &&
6808        emit->clip_mode != CLIP_VERTEX) {
6809       return;
6810    }
6811 
6812    if (num_clip_planes == 0)
6813       return;
6814 
6815    /* Convert clip vertex to clip distances only in the last vertex stage */
6816    if (!emit->key.last_vertex_stage)
6817       return;
6818 
6819    /* Declare one or two clip output registers.  The number of components
6820     * in the mask reflects the number of clip planes.  For example, if 5
6821     * clip planes are needed, we'll declare outputs similar to:
6822     * dcl_output_siv o2.xyzw, clip_distance
6823     * dcl_output_siv o3.x, clip_distance
6824     */
6825    emit->clip_dist_out_index = index; /* save the starting clip dist reg index */
6826 
6827    plane_mask = (1 << num_clip_planes) - 1;
6828    if (plane_mask & 0xf) {
6829       unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
6830       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index,
6831                               VGPU10_NAME_CLIP_DISTANCE, cmask, true,
6832                               SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
6833       emit->num_outputs++;
6834    }
6835    if (plane_mask & 0xf0) {
6836       unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
6837       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1,
6838                               VGPU10_NAME_CLIP_DISTANCE, cmask, true,
6839                               SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
6840       emit->num_outputs++;
6841    }
6842 }
6843 
6844 
6845 /**
6846  * Emit the instructions for writing to the clip distance registers
6847  * to handle legacy/automatic clip planes.
6848  * For each clip plane, the distance is the dot product of the vertex
6849  * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients.
6850  * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE
6851  * output registers already declared.
6852  */
6853 static void
emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 * emit,unsigned vpos_tmp_index)6854 emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit,
6855                              unsigned vpos_tmp_index)
6856 {
6857    unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
6858 
6859    assert(emit->clip_mode == CLIP_LEGACY);
6860    assert(num_clip_planes <= 8);
6861 
6862    assert(emit->unit == PIPE_SHADER_VERTEX ||
6863           emit->unit == PIPE_SHADER_GEOMETRY ||
6864           emit->unit == PIPE_SHADER_TESS_EVAL);
6865 
6866    for (i = 0; i < num_clip_planes; i++) {
6867       struct tgsi_full_dst_register dst;
6868       struct tgsi_full_src_register plane_src, vpos_src;
6869       unsigned reg_index = emit->clip_dist_out_index + i / 4;
6870       unsigned comp = i % 4;
6871       unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
6872 
6873       /* create dst, src regs */
6874       dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
6875       dst = writemask_dst(&dst, writemask);
6876 
6877       plane_src = make_src_const_reg(emit->clip_plane_const[i]);
6878       vpos_src = make_src_temp_reg(vpos_tmp_index);
6879 
6880       /* DP4 clip_dist, plane, vpos */
6881       emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
6882                            &plane_src, &vpos_src);
6883    }
6884 }
6885 
6886 
6887 /**
6888  * Emit the instructions for computing the clip distance results from
6889  * the clip vertex temporary.
6890  * For each clip plane, the distance is the dot product of the clip vertex
6891  * position (found in a temp reg) and the clip plane coefficients.
6892  */
6893 static void
emit_clip_vertex_instructions(struct svga_shader_emitter_v10 * emit)6894 emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit)
6895 {
6896    const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable);
6897    unsigned i;
6898    struct tgsi_full_dst_register dst;
6899    struct tgsi_full_src_register clipvert_src;
6900    const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index;
6901 
6902    assert(emit->unit == PIPE_SHADER_VERTEX ||
6903           emit->unit == PIPE_SHADER_GEOMETRY ||
6904           emit->unit == PIPE_SHADER_TESS_EVAL);
6905 
6906    assert(emit->clip_mode == CLIP_VERTEX);
6907 
6908    clipvert_src = make_src_temp_reg(clip_vertex_tmp);
6909 
6910    for (i = 0; i < num_clip; i++) {
6911       struct tgsi_full_src_register plane_src;
6912       unsigned reg_index = emit->clip_dist_out_index + i / 4;
6913       unsigned comp = i % 4;
6914       unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
6915 
6916       /* create dst, src regs */
6917       dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
6918       dst = writemask_dst(&dst, writemask);
6919 
6920       plane_src = make_src_const_reg(emit->clip_plane_const[i]);
6921 
6922       /* DP4 clip_dist, plane, vpos */
6923       emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
6924                            &plane_src, &clipvert_src);
6925    }
6926 
6927    /* copy temporary clip vertex register to the clip vertex register */
6928 
6929    assert(emit->clip_vertex_out_index != INVALID_INDEX);
6930 
6931    /**
6932     * temporary reset the temporary clip vertex register index so
6933     * that copy to the clip vertex register will not attempt
6934     * to copy to the temporary register again
6935     */
6936    emit->clip_vertex_tmp_index = INVALID_INDEX;
6937 
6938    /* MOV clip_vertex, clip_vertex_tmp */
6939    dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index);
6940    emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
6941                         &dst, &clipvert_src);
6942 
6943    /**
6944     * set the temporary clip vertex register index back to the
6945     * temporary index for the next vertex
6946     */
6947    emit->clip_vertex_tmp_index = clip_vertex_tmp;
6948 }
6949 
6950 /**
6951  * Emit code to convert RGBA to BGRA
6952  */
6953 static void
emit_swap_r_b(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6954 emit_swap_r_b(struct svga_shader_emitter_v10 *emit,
6955                      const struct tgsi_full_dst_register *dst,
6956                      const struct tgsi_full_src_register *src)
6957 {
6958    struct tgsi_full_src_register bgra_src =
6959       swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W);
6960 
6961    begin_emit_instruction(emit);
6962    emit_opcode(emit, VGPU10_OPCODE_MOV, false);
6963    emit_dst_register(emit, dst);
6964    emit_src_register(emit, &bgra_src);
6965    end_emit_instruction(emit);
6966 }
6967 
6968 
6969 /** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */
6970 static void
emit_puint_to_snorm(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6971 emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit,
6972                     const struct tgsi_full_dst_register *dst,
6973                     const struct tgsi_full_src_register *src)
6974 {
6975    struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f);
6976    struct tgsi_full_src_register two =
6977       make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f);
6978    struct tgsi_full_src_register neg_two =
6979       make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
6980 
6981    unsigned val_tmp = get_temp_index(emit);
6982    struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp);
6983    struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp);
6984 
6985    unsigned bias_tmp = get_temp_index(emit);
6986    struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp);
6987    struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp);
6988 
6989    /* val = src * 2.0 */
6990    emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, src, &two);
6991 
6992    /* bias = src > 0.5 */
6993    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, src, &half);
6994 
6995    /* bias = bias & -2.0 */
6996    emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst,
6997                         &bias_src, &neg_two);
6998 
6999    /* dst = val + bias */
7000    emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst,
7001                         &val_src, &bias_src);
7002 
7003    free_temp_indexes(emit);
7004 }
7005 
7006 
7007 /** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */
7008 static void
emit_puint_to_uscaled(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)7009 emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit,
7010                       const struct tgsi_full_dst_register *dst,
7011                       const struct tgsi_full_src_register *src)
7012 {
7013    struct tgsi_full_src_register scale =
7014       make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f);
7015 
7016    /* dst = src * scale */
7017    emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale);
7018 }
7019 
7020 
7021 /** Convert from R32_UINT to 10_10_10_2_sscaled */
7022 static void
emit_puint_to_sscaled(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)7023 emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit,
7024                       const struct tgsi_full_dst_register *dst,
7025                       const struct tgsi_full_src_register *src)
7026 {
7027    struct tgsi_full_src_register lshift =
7028       make_immediate_reg_int4(emit, 22, 12, 2, 0);
7029    struct tgsi_full_src_register rshift =
7030       make_immediate_reg_int4(emit, 22, 22, 22, 30);
7031 
7032    struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X);
7033 
7034    unsigned tmp = get_temp_index(emit);
7035    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7036    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7037 
7038    /*
7039     * r = (pixel << 22) >> 22;   # signed int in [511, -512]
7040     * g = (pixel << 12) >> 22;   # signed int in [511, -512]
7041     * b = (pixel <<  2) >> 22;   # signed int in [511, -512]
7042     * a = (pixel <<  0) >> 30;   # signed int in [1, -2]
7043     * dst = i_to_f(r,g,b,a);     # convert to float
7044     */
7045    emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst,
7046                         &src_xxxx, &lshift);
7047    emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst,
7048                         &tmp_src, &rshift);
7049    emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src);
7050 
7051    free_temp_indexes(emit);
7052 }
7053 
7054 
7055 /**
7056  * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction.
7057  */
7058 static bool
emit_arl_uarl(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7059 emit_arl_uarl(struct svga_shader_emitter_v10 *emit,
7060               const struct tgsi_full_instruction *inst)
7061 {
7062    unsigned index = inst->Dst[0].Register.Index;
7063    struct tgsi_full_dst_register dst;
7064    VGPU10_OPCODE_TYPE opcode;
7065 
7066    assert(index < MAX_VGPU10_ADDR_REGS);
7067    dst = make_dst_temp_reg(emit->address_reg_index[index]);
7068    dst = writemask_dst(&dst, inst->Dst[0].Register.WriteMask);
7069 
7070    /* ARL dst, s0
7071     * Translates into:
7072     * FTOI address_tmp, s0
7073     *
7074     * UARL dst, s0
7075     * Translates into:
7076     * MOV address_tmp, s0
7077     */
7078    if (inst->Instruction.Opcode == TGSI_OPCODE_ARL)
7079       opcode = VGPU10_OPCODE_FTOI;
7080    else
7081       opcode = VGPU10_OPCODE_MOV;
7082 
7083    emit_instruction_op1(emit, opcode, &dst, &inst->Src[0]);
7084 
7085    return true;
7086 }
7087 
7088 
7089 /**
7090  * Emit code for TGSI_OPCODE_CAL instruction.
7091  */
7092 static bool
emit_cal(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7093 emit_cal(struct svga_shader_emitter_v10 *emit,
7094          const struct tgsi_full_instruction *inst)
7095 {
7096    unsigned label = inst->Label.Label;
7097    VGPU10OperandToken0 operand;
7098    operand.value = 0;
7099    operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
7100 
7101    begin_emit_instruction(emit);
7102    emit_dword(emit, operand.value);
7103    emit_dword(emit, label);
7104    end_emit_instruction(emit);
7105 
7106    return true;
7107 }
7108 
7109 
7110 /**
7111  * Emit code for TGSI_OPCODE_IABS instruction.
7112  */
7113 static bool
emit_iabs(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7114 emit_iabs(struct svga_shader_emitter_v10 *emit,
7115           const struct tgsi_full_instruction *inst)
7116 {
7117    /* dst.x = (src0.x < 0) ? -src0.x : src0.x
7118     * dst.y = (src0.y < 0) ? -src0.y : src0.y
7119     * dst.z = (src0.z < 0) ? -src0.z : src0.z
7120     * dst.w = (src0.w < 0) ? -src0.w : src0.w
7121     *
7122     * Translates into
7123     *   IMAX dst, src, neg(src)
7124     */
7125    struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
7126    emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0],
7127                         &inst->Src[0], &neg_src);
7128 
7129    return true;
7130 }
7131 
7132 
7133 /**
7134  * Emit code for TGSI_OPCODE_CMP instruction.
7135  */
7136 static bool
emit_cmp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7137 emit_cmp(struct svga_shader_emitter_v10 *emit,
7138          const struct tgsi_full_instruction *inst)
7139 {
7140    /* dst.x = (src0.x < 0) ? src1.x : src2.x
7141     * dst.y = (src0.y < 0) ? src1.y : src2.y
7142     * dst.z = (src0.z < 0) ? src1.z : src2.z
7143     * dst.w = (src0.w < 0) ? src1.w : src2.w
7144     *
7145     * Translates into
7146     *   LT tmp, src0, 0.0
7147     *   MOVC dst, tmp, src1, src2
7148     */
7149    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7150    unsigned tmp = get_temp_index(emit);
7151    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7152    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7153 
7154    emit_instruction_opn(emit, VGPU10_OPCODE_LT, &tmp_dst,
7155                         &inst->Src[0], &zero, NULL, false,
7156                         inst->Instruction.Precise);
7157    emit_instruction_opn(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0],
7158                         &tmp_src, &inst->Src[1], &inst->Src[2],
7159                         inst->Instruction.Saturate, false);
7160 
7161    free_temp_indexes(emit);
7162 
7163    return true;
7164 }
7165 
7166 
7167 /**
7168  * Emit code for TGSI_OPCODE_DST instruction.
7169  */
7170 static bool
emit_dst(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7171 emit_dst(struct svga_shader_emitter_v10 *emit,
7172          const struct tgsi_full_instruction *inst)
7173 {
7174    /*
7175     * dst.x = 1
7176     * dst.y = src0.y * src1.y
7177     * dst.z = src0.z
7178     * dst.w = src1.w
7179     */
7180 
7181    struct tgsi_full_src_register s0_yyyy =
7182       scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
7183    struct tgsi_full_src_register s0_zzzz =
7184       scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
7185    struct tgsi_full_src_register s1_yyyy =
7186       scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
7187    struct tgsi_full_src_register s1_wwww =
7188       scalar_src(&inst->Src[1], TGSI_SWIZZLE_W);
7189 
7190    /*
7191     * If dst and either src0 and src1 are the same we need
7192     * to create a temporary for it and insert a extra move.
7193     */
7194    unsigned tmp_move = get_temp_index(emit);
7195    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
7196    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
7197 
7198    /* MOV dst.x, 1.0 */
7199    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7200       struct tgsi_full_dst_register dst_x =
7201          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
7202       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7203 
7204       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one);
7205    }
7206 
7207    /* MUL dst.y, s0.y, s1.y */
7208    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7209       struct tgsi_full_dst_register dst_y =
7210          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
7211 
7212       emit_instruction_opn(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy,
7213                            &s1_yyyy, NULL, inst->Instruction.Saturate,
7214                            inst->Instruction.Precise);
7215    }
7216 
7217    /* MOV dst.z, s0.z */
7218    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7219       struct tgsi_full_dst_register dst_z =
7220          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
7221 
7222       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7223                            &dst_z, &s0_zzzz, NULL, NULL,
7224                            inst->Instruction.Saturate,
7225                            inst->Instruction.Precise);
7226   }
7227 
7228    /* MOV dst.w, s1.w */
7229    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7230       struct tgsi_full_dst_register dst_w =
7231          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
7232 
7233       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7234                            &dst_w, &s1_wwww, NULL, NULL,
7235                            inst->Instruction.Saturate,
7236                            inst->Instruction.Precise);
7237    }
7238 
7239    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
7240    free_temp_indexes(emit);
7241 
7242    return true;
7243 }
7244 
7245 
7246 /**
7247  * A helper function to return the stream index as specified in
7248  * the immediate register
7249  */
7250 static inline unsigned
find_stream_index(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * src)7251 find_stream_index(struct svga_shader_emitter_v10 *emit,
7252                   const struct tgsi_full_src_register *src)
7253 {
7254    return emit->immediates[src->Register.Index][src->Register.SwizzleX].Int;
7255 }
7256 
7257 
7258 /**
7259  * Emit code for TGSI_OPCODE_ENDPRIM (GS only)
7260  */
7261 static bool
emit_endprim(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7262 emit_endprim(struct svga_shader_emitter_v10 *emit,
7263              const struct tgsi_full_instruction *inst)
7264 {
7265    assert(emit->unit == PIPE_SHADER_GEOMETRY);
7266 
7267    begin_emit_instruction(emit);
7268    if (emit->version >= 50) {
7269       unsigned streamIndex = find_stream_index(emit, &inst->Src[0]);
7270 
7271       if (emit->info.num_stream_output_components[streamIndex] == 0) {
7272          /**
7273           * If there is no output for this stream, discard this instruction.
7274           */
7275          emit->discard_instruction = true;
7276       }
7277       else {
7278          emit_opcode(emit, VGPU10_OPCODE_CUT_STREAM, false);
7279          assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE);
7280          emit_stream_register(emit, streamIndex);
7281       }
7282    }
7283    else {
7284       emit_opcode(emit, VGPU10_OPCODE_CUT, false);
7285    }
7286    end_emit_instruction(emit);
7287    return true;
7288 }
7289 
7290 
7291 /**
7292  * Emit code for TGSI_OPCODE_EX2 (2^x) instruction.
7293  */
7294 static bool
emit_ex2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7295 emit_ex2(struct svga_shader_emitter_v10 *emit,
7296          const struct tgsi_full_instruction *inst)
7297 {
7298    /* Note that TGSI_OPCODE_EX2 computes only one value from src.x
7299     * while VGPU10 computes four values.
7300     *
7301     * dst = EX2(src):
7302     *   dst.xyzw = 2.0 ^ src.x
7303     */
7304 
7305    struct tgsi_full_src_register src_xxxx =
7306       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7307                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7308 
7309    /* EXP tmp, s0.xxxx */
7310    emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx,
7311                         NULL, NULL,
7312                         inst->Instruction.Saturate,
7313                         inst->Instruction.Precise);
7314 
7315    return true;
7316 }
7317 
7318 
7319 /**
7320  * Emit code for TGSI_OPCODE_EXP instruction.
7321  */
7322 static bool
emit_exp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7323 emit_exp(struct svga_shader_emitter_v10 *emit,
7324          const struct tgsi_full_instruction *inst)
7325 {
7326    /*
7327     * dst.x = 2 ^ floor(s0.x)
7328     * dst.y = s0.x - floor(s0.x)
7329     * dst.z = 2 ^ s0.x
7330     * dst.w = 1.0
7331     */
7332 
7333    struct tgsi_full_src_register src_xxxx =
7334       scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
7335    unsigned tmp = get_temp_index(emit);
7336    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7337    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7338 
7339    /*
7340     * If dst and src are the same we need to create
7341     * a temporary for it and insert a extra move.
7342     */
7343    unsigned tmp_move = get_temp_index(emit);
7344    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
7345    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
7346 
7347    /* only use X component of temp reg */
7348    tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7349    tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7350 
7351    /* ROUND_NI tmp.x, s0.x */
7352    emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
7353                         &src_xxxx); /* round to -infinity */
7354 
7355    /* EXP dst.x, tmp.x */
7356    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7357       struct tgsi_full_dst_register dst_x =
7358          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
7359 
7360       emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src,
7361                            NULL, NULL,
7362                            inst->Instruction.Saturate,
7363                            inst->Instruction.Precise);
7364    }
7365 
7366    /* ADD dst.y, s0.x, -tmp */
7367    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7368       struct tgsi_full_dst_register dst_y =
7369          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
7370       struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src);
7371 
7372       emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx,
7373                            &neg_tmp_src, NULL,
7374                            inst->Instruction.Saturate,
7375                            inst->Instruction.Precise);
7376    }
7377 
7378    /* EXP dst.z, s0.x */
7379    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7380       struct tgsi_full_dst_register dst_z =
7381          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
7382 
7383       emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx,
7384                            NULL, NULL,
7385                            inst->Instruction.Saturate,
7386                            inst->Instruction.Precise);
7387    }
7388 
7389    /* MOV dst.w, 1.0 */
7390    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7391       struct tgsi_full_dst_register dst_w =
7392          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
7393       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7394 
7395       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
7396    }
7397 
7398    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
7399 
7400    free_temp_indexes(emit);
7401 
7402    return true;
7403 }
7404 
7405 
7406 /**
7407  * Emit code for TGSI_OPCODE_IF instruction.
7408  */
7409 static bool
emit_if(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * src)7410 emit_if(struct svga_shader_emitter_v10 *emit,
7411         const struct tgsi_full_src_register *src)
7412 {
7413    VGPU10OpcodeToken0 opcode0;
7414 
7415    /* The src register should be a scalar */
7416    assert(src->Register.SwizzleX == src->Register.SwizzleY &&
7417           src->Register.SwizzleX == src->Register.SwizzleZ &&
7418           src->Register.SwizzleX == src->Register.SwizzleW);
7419 
7420    /* The only special thing here is that we need to set the
7421     * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if
7422     * src.x is non-zero.
7423     */
7424    opcode0.value = 0;
7425    opcode0.opcodeType = VGPU10_OPCODE_IF;
7426    opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
7427 
7428    begin_emit_instruction(emit);
7429    emit_dword(emit, opcode0.value);
7430    emit_src_register(emit, src);
7431    end_emit_instruction(emit);
7432 
7433    return true;
7434 }
7435 
7436 
7437 /**
7438  * Emit code for conditional discard instruction (discard fragment if any of
7439  * the register components are negative).
7440  */
7441 static bool
emit_cond_discard(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7442 emit_cond_discard(struct svga_shader_emitter_v10 *emit,
7443                   const struct tgsi_full_instruction *inst)
7444 {
7445    unsigned tmp = get_temp_index(emit);
7446    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7447    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7448 
7449    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7450 
7451    struct tgsi_full_dst_register tmp_dst_x =
7452       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7453    struct tgsi_full_src_register tmp_src_xxxx =
7454       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7455 
7456    /* tmp = src[0] < 0.0 */
7457    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], &zero);
7458 
7459    if (!same_swizzle_terms(&inst->Src[0])) {
7460       /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to
7461        * logically OR the swizzle terms.  Most uses of this conditional
7462        * discard instruction only test one channel so it's good to
7463        * avoid these extra steps.
7464        */
7465       struct tgsi_full_src_register tmp_src_yyyy =
7466          scalar_src(&tmp_src, TGSI_SWIZZLE_Y);
7467       struct tgsi_full_src_register tmp_src_zzzz =
7468          scalar_src(&tmp_src, TGSI_SWIZZLE_Z);
7469       struct tgsi_full_src_register tmp_src_wwww =
7470          scalar_src(&tmp_src, TGSI_SWIZZLE_W);
7471 
7472       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
7473                            &tmp_src_yyyy);
7474       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
7475                            &tmp_src_zzzz);
7476       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
7477                            &tmp_src_wwww);
7478    }
7479 
7480    begin_emit_instruction(emit);
7481    emit_discard_opcode(emit, true); /* discard if src0.x is non-zero */
7482    emit_src_register(emit, &tmp_src_xxxx);
7483    end_emit_instruction(emit);
7484 
7485    free_temp_indexes(emit);
7486 
7487    return true;
7488 }
7489 
7490 
7491 /**
7492  * Emit code for the unconditional discard instruction.
7493  */
7494 static bool
emit_discard(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7495 emit_discard(struct svga_shader_emitter_v10 *emit,
7496              const struct tgsi_full_instruction *inst)
7497 {
7498    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7499 
7500    /* DISCARD if 0.0 is zero */
7501    begin_emit_instruction(emit);
7502    emit_discard_opcode(emit, false);
7503    emit_src_register(emit, &zero);
7504    end_emit_instruction(emit);
7505 
7506    return true;
7507 }
7508 
7509 
7510 /**
7511  * Emit code for TGSI_OPCODE_LG2 instruction.
7512  */
7513 static bool
emit_lg2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7514 emit_lg2(struct svga_shader_emitter_v10 *emit,
7515          const struct tgsi_full_instruction *inst)
7516 {
7517    /* Note that TGSI_OPCODE_LG2 computes only one value from src.x
7518     * while VGPU10 computes four values.
7519     *
7520     * dst = LG2(src):
7521     *   dst.xyzw = log2(src.x)
7522     */
7523 
7524    struct tgsi_full_src_register src_xxxx =
7525       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7526                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7527 
7528    /* LOG tmp, s0.xxxx */
7529    emit_instruction_opn(emit, VGPU10_OPCODE_LOG,
7530                         &inst->Dst[0], &src_xxxx, NULL, NULL,
7531                         inst->Instruction.Saturate,
7532                         inst->Instruction.Precise);
7533 
7534    return true;
7535 }
7536 
7537 
7538 /**
7539  * Emit code for TGSI_OPCODE_LIT instruction.
7540  */
7541 static bool
emit_lit(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7542 emit_lit(struct svga_shader_emitter_v10 *emit,
7543          const struct tgsi_full_instruction *inst)
7544 {
7545    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7546 
7547    /*
7548     * If dst and src are the same we need to create
7549     * a temporary for it and insert a extra move.
7550     */
7551    unsigned tmp_move = get_temp_index(emit);
7552    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
7553    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
7554 
7555    /*
7556     * dst.x = 1
7557     * dst.y = max(src.x, 0)
7558     * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
7559     * dst.w = 1
7560     */
7561 
7562    /* MOV dst.x, 1.0 */
7563    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7564       struct tgsi_full_dst_register dst_x =
7565          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
7566       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one);
7567    }
7568 
7569    /* MOV dst.w, 1.0 */
7570    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7571       struct tgsi_full_dst_register dst_w =
7572          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
7573       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
7574    }
7575 
7576    /* MAX dst.y, src.x, 0.0 */
7577    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7578       struct tgsi_full_dst_register dst_y =
7579          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
7580       struct tgsi_full_src_register zero =
7581          make_immediate_reg_float(emit, 0.0f);
7582       struct tgsi_full_src_register src_xxxx =
7583          swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7584                      TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7585 
7586       emit_instruction_opn(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx,
7587                            &zero, NULL, inst->Instruction.Saturate, false);
7588    }
7589 
7590    /*
7591     * tmp1 = clamp(src.w, -128, 128);
7592     *   MAX tmp1, src.w, -128
7593     *   MIN tmp1, tmp1, 128
7594     *
7595     * tmp2 = max(tmp2, 0);
7596     *   MAX tmp2, src.y, 0
7597     *
7598     * tmp1 = pow(tmp2, tmp1);
7599     *   LOG tmp2, tmp2
7600     *   MUL tmp1, tmp2, tmp1
7601     *   EXP tmp1, tmp1
7602     *
7603     * tmp1 = (src.w == 0) ? 1 : tmp1;
7604     *   EQ tmp2, 0, src.w
7605     *   MOVC tmp1, tmp2, 1.0, tmp1
7606     *
7607     * dst.z = (0 < src.x) ? tmp1 : 0;
7608     *   LT tmp2, 0, src.x
7609     *   MOVC dst.z, tmp2, tmp1, 0.0
7610     */
7611    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7612       struct tgsi_full_dst_register dst_z =
7613          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
7614 
7615       unsigned tmp1 = get_temp_index(emit);
7616       struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
7617       struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
7618       unsigned tmp2 = get_temp_index(emit);
7619       struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
7620       struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
7621 
7622       struct tgsi_full_src_register src_xxxx =
7623          scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
7624       struct tgsi_full_src_register src_yyyy =
7625          scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
7626       struct tgsi_full_src_register src_wwww =
7627          scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
7628 
7629       struct tgsi_full_src_register zero =
7630          make_immediate_reg_float(emit, 0.0f);
7631       struct tgsi_full_src_register lowerbound =
7632          make_immediate_reg_float(emit, -128.0f);
7633       struct tgsi_full_src_register upperbound =
7634          make_immediate_reg_float(emit, 128.0f);
7635 
7636       emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww,
7637                            &lowerbound);
7638       emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src,
7639                            &upperbound);
7640       emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy,
7641                            &zero);
7642 
7643       /* POW tmp1, tmp2, tmp1 */
7644       /* LOG tmp2, tmp2 */
7645       emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src);
7646 
7647       /* MUL tmp1, tmp2, tmp1 */
7648       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src,
7649                            &tmp1_src);
7650 
7651       /* EXP tmp1, tmp1 */
7652       emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src);
7653 
7654       /* EQ tmp2, 0, src.w */
7655       emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, &src_wwww);
7656       /* MOVC tmp1.z, tmp2, tmp1, 1.0 */
7657       emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst,
7658                            &tmp2_src, &one, &tmp1_src);
7659 
7660       /* LT tmp2, 0, src.x */
7661       emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, &src_xxxx);
7662       /* MOVC dst.z, tmp2, tmp1, 0.0 */
7663       emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z,
7664                            &tmp2_src, &tmp1_src, &zero);
7665    }
7666 
7667    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
7668    free_temp_indexes(emit);
7669 
7670    return true;
7671 }
7672 
7673 
7674 /**
7675  * Emit Level Of Detail Query (LODQ) instruction.
7676  */
7677 static bool
emit_lodq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7678 emit_lodq(struct svga_shader_emitter_v10 *emit,
7679           const struct tgsi_full_instruction *inst)
7680 {
7681    const uint unit = inst->Src[1].Register.Index;
7682 
7683    assert(emit->version >= 41);
7684 
7685    /* LOD dst, coord, resource, sampler */
7686    begin_emit_instruction(emit);
7687    emit_opcode(emit, VGPU10_OPCODE_LOD, false);
7688    emit_dst_register(emit, &inst->Dst[0]);
7689    emit_src_register(emit, &inst->Src[0]); /* coord */
7690    emit_resource_register(emit, unit);
7691    emit_sampler_register(emit, unit);
7692    end_emit_instruction(emit);
7693 
7694    return true;
7695 }
7696 
7697 
7698 /**
7699  * Emit code for TGSI_OPCODE_LOG instruction.
7700  */
7701 static bool
emit_log(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7702 emit_log(struct svga_shader_emitter_v10 *emit,
7703          const struct tgsi_full_instruction *inst)
7704 {
7705    /*
7706     * dst.x = floor(lg2(abs(s0.x)))
7707     * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x))))
7708     * dst.z = lg2(abs(s0.x))
7709     * dst.w = 1.0
7710     */
7711 
7712    struct tgsi_full_src_register src_xxxx =
7713       scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
7714    unsigned tmp = get_temp_index(emit);
7715    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7716    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7717    struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx);
7718 
7719    /* only use X component of temp reg */
7720    tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7721    tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7722 
7723    /* LOG tmp.x, abs(s0.x) */
7724    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
7725       emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &abs_src_xxxx);
7726    }
7727 
7728    /* MOV dst.z, tmp.x */
7729    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7730       struct tgsi_full_dst_register dst_z =
7731          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z);
7732 
7733       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7734                            &dst_z, &tmp_src, NULL, NULL,
7735                            inst->Instruction.Saturate, false);
7736    }
7737 
7738    /* FLR tmp.x, tmp.x */
7739    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
7740       emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, &tmp_src);
7741    }
7742 
7743    /* MOV dst.x, tmp.x */
7744    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7745       struct tgsi_full_dst_register dst_x =
7746          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
7747 
7748       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7749                            &dst_x, &tmp_src, NULL, NULL,
7750                            inst->Instruction.Saturate, false);
7751    }
7752 
7753    /* EXP tmp.x, tmp.x */
7754    /* DIV dst.y, abs(s0.x), tmp.x */
7755    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7756       struct tgsi_full_dst_register dst_y =
7757          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
7758 
7759       emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src);
7760       emit_instruction_opn(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx,
7761                            &tmp_src, NULL, inst->Instruction.Saturate, false);
7762    }
7763 
7764    /* MOV dst.w, 1.0 */
7765    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7766       struct tgsi_full_dst_register dst_w =
7767          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W);
7768       struct tgsi_full_src_register one =
7769          make_immediate_reg_float(emit, 1.0f);
7770 
7771       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
7772    }
7773 
7774    free_temp_indexes(emit);
7775 
7776    return true;
7777 }
7778 
7779 
7780 /**
7781  * Emit code for TGSI_OPCODE_LRP instruction.
7782  */
7783 static bool
emit_lrp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7784 emit_lrp(struct svga_shader_emitter_v10 *emit,
7785          const struct tgsi_full_instruction *inst)
7786 {
7787    /* dst = LRP(s0, s1, s2):
7788     *   dst = s0 * (s1 - s2) + s2
7789     * Translates into:
7790     *   SUB tmp, s1, s2;        tmp = s1 - s2
7791     *   MAD dst, s0, tmp, s2;   dst = s0 * t1 + s2
7792     */
7793    unsigned tmp = get_temp_index(emit);
7794    struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp);
7795    struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp);
7796    struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]);
7797 
7798    /* ADD tmp, s1, -s2 */
7799    emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_tmp,
7800                         &inst->Src[1], &neg_src2, NULL, false,
7801                         inst->Instruction.Precise);
7802 
7803    /* MAD dst, s1, tmp, s3 */
7804    emit_instruction_opn(emit, VGPU10_OPCODE_MAD, &inst->Dst[0],
7805                         &inst->Src[0], &src_tmp, &inst->Src[2],
7806                         inst->Instruction.Saturate,
7807                         inst->Instruction.Precise);
7808 
7809    free_temp_indexes(emit);
7810 
7811    return true;
7812 }
7813 
7814 
7815 /**
7816  * Emit code for TGSI_OPCODE_POW instruction.
7817  */
7818 static bool
emit_pow(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7819 emit_pow(struct svga_shader_emitter_v10 *emit,
7820          const struct tgsi_full_instruction *inst)
7821 {
7822    /* Note that TGSI_OPCODE_POW computes only one value from src0.x and
7823     * src1.x while VGPU10 computes four values.
7824     *
7825     * dst = POW(src0, src1):
7826     *   dst.xyzw = src0.x ^ src1.x
7827     */
7828    unsigned tmp = get_temp_index(emit);
7829    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7830    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7831    struct tgsi_full_src_register src0_xxxx =
7832       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7833                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7834    struct tgsi_full_src_register src1_xxxx =
7835       swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7836                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7837 
7838    /* LOG tmp, s0.xxxx */
7839    emit_instruction_opn(emit, VGPU10_OPCODE_LOG,
7840                         &tmp_dst, &src0_xxxx, NULL, NULL,
7841                         false, inst->Instruction.Precise);
7842 
7843    /* MUL tmp, tmp, s1.xxxx */
7844    emit_instruction_opn(emit, VGPU10_OPCODE_MUL,
7845                         &tmp_dst, &tmp_src, &src1_xxxx, NULL,
7846                         false, inst->Instruction.Precise);
7847 
7848    /* EXP tmp, s0.xxxx */
7849    emit_instruction_opn(emit, VGPU10_OPCODE_EXP,
7850                         &inst->Dst[0], &tmp_src, NULL, NULL,
7851                         inst->Instruction.Saturate,
7852                         inst->Instruction.Precise);
7853 
7854    /* free tmp */
7855    free_temp_indexes(emit);
7856 
7857    return true;
7858 }
7859 
7860 
7861 /**
7862  * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction.
7863  */
7864 static bool
emit_rcp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7865 emit_rcp(struct svga_shader_emitter_v10 *emit,
7866          const struct tgsi_full_instruction *inst)
7867 {
7868    if (emit->version >= 50) {
7869       /* use new RCP instruction.  But VGPU10_OPCODE_RCP is component-wise
7870        * while TGSI_OPCODE_RCP computes dst.xyzw = 1.0 / src.xxxx so we need
7871        * to manipulate the src register's swizzle.
7872        */
7873       struct tgsi_full_src_register src = inst->Src[0];
7874       src.Register.SwizzleY =
7875       src.Register.SwizzleZ =
7876       src.Register.SwizzleW = src.Register.SwizzleX;
7877 
7878       begin_emit_instruction(emit);
7879       emit_opcode_precise(emit, VGPU10_OPCODE_RCP,
7880                           inst->Instruction.Saturate,
7881                           inst->Instruction.Precise);
7882       emit_dst_register(emit, &inst->Dst[0]);
7883       emit_src_register(emit, &src);
7884       end_emit_instruction(emit);
7885    }
7886    else {
7887       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7888 
7889       unsigned tmp = get_temp_index(emit);
7890       struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7891       struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7892 
7893       struct tgsi_full_dst_register tmp_dst_x =
7894          writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7895       struct tgsi_full_src_register tmp_src_xxxx =
7896          scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7897 
7898       /* DIV tmp.x, 1.0, s0 */
7899       emit_instruction_opn(emit, VGPU10_OPCODE_DIV,
7900                            &tmp_dst_x, &one, &inst->Src[0], NULL,
7901                            false, inst->Instruction.Precise);
7902 
7903       /* MOV dst, tmp.xxxx */
7904       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7905                            &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7906                            inst->Instruction.Saturate,
7907                            inst->Instruction.Precise);
7908 
7909       free_temp_indexes(emit);
7910    }
7911 
7912    return true;
7913 }
7914 
7915 
7916 /**
7917  * Emit code for TGSI_OPCODE_RSQ instruction.
7918  */
7919 static bool
emit_rsq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7920 emit_rsq(struct svga_shader_emitter_v10 *emit,
7921          const struct tgsi_full_instruction *inst)
7922 {
7923    /* dst = RSQ(src):
7924     *   dst.xyzw = 1 / sqrt(src.x)
7925     * Translates into:
7926     *   RSQ tmp, src.x
7927     *   MOV dst, tmp.xxxx
7928     */
7929 
7930    unsigned tmp = get_temp_index(emit);
7931    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7932    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7933 
7934    struct tgsi_full_dst_register tmp_dst_x =
7935       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7936    struct tgsi_full_src_register tmp_src_xxxx =
7937       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7938 
7939    /* RSQ tmp, src.x */
7940    emit_instruction_opn(emit, VGPU10_OPCODE_RSQ,
7941                         &tmp_dst_x, &inst->Src[0], NULL, NULL,
7942                         false, inst->Instruction.Precise);
7943 
7944    /* MOV dst, tmp.xxxx */
7945    emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7946                         &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7947                         inst->Instruction.Saturate,
7948                         inst->Instruction.Precise);
7949 
7950    /* free tmp */
7951    free_temp_indexes(emit);
7952 
7953    return true;
7954 }
7955 
7956 
7957 /**
7958  * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
7959  */
7960 static bool
emit_seq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7961 emit_seq(struct svga_shader_emitter_v10 *emit,
7962          const struct tgsi_full_instruction *inst)
7963 {
7964    /* dst = SEQ(s0, s1):
7965     *   dst = s0 == s1 ? 1.0 : 0.0  (per component)
7966     * Translates into:
7967     *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
7968     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7969     */
7970    unsigned tmp = get_temp_index(emit);
7971    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7972    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7973    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7974    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7975 
7976    /* EQ tmp, s0, s1 */
7977    emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0],
7978                         &inst->Src[1]);
7979 
7980    /* MOVC dst, tmp, one, zero */
7981    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7982                         &one, &zero);
7983 
7984    free_temp_indexes(emit);
7985 
7986    return true;
7987 }
7988 
7989 
7990 /**
7991  * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction.
7992  */
7993 static bool
emit_sge(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7994 emit_sge(struct svga_shader_emitter_v10 *emit,
7995          const struct tgsi_full_instruction *inst)
7996 {
7997    /* dst = SGE(s0, s1):
7998     *   dst = s0 >= s1 ? 1.0 : 0.0  (per component)
7999     * Translates into:
8000     *   GE tmp, s0, s1;           tmp = s0 >= s1 : 0xffffffff : 0 (per comp)
8001     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
8002     */
8003    unsigned tmp = get_temp_index(emit);
8004    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8005    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8006    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8007    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8008 
8009    /* GE tmp, s0, s1 */
8010    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0],
8011                         &inst->Src[1]);
8012 
8013    /* MOVC dst, tmp, one, zero */
8014    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
8015                         &one, &zero);
8016 
8017    free_temp_indexes(emit);
8018 
8019    return true;
8020 }
8021 
8022 
8023 /**
8024  * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction.
8025  */
8026 static bool
emit_sgt(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8027 emit_sgt(struct svga_shader_emitter_v10 *emit,
8028          const struct tgsi_full_instruction *inst)
8029 {
8030    /* dst = SGT(s0, s1):
8031     *   dst = s0 > s1 ? 1.0 : 0.0  (per component)
8032     * Translates into:
8033     *   LT tmp, s1, s0;           tmp = s1 < s0 ? 0xffffffff : 0 (per comp)
8034     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
8035     */
8036    unsigned tmp = get_temp_index(emit);
8037    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8038    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8039    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8040    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8041 
8042    /* LT tmp, s1, s0 */
8043    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1],
8044                         &inst->Src[0]);
8045 
8046    /* MOVC dst, tmp, one, zero */
8047    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
8048                         &one, &zero);
8049 
8050    free_temp_indexes(emit);
8051 
8052    return true;
8053 }
8054 
8055 
8056 /**
8057  * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions.
8058  */
8059 static bool
emit_sincos(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8060 emit_sincos(struct svga_shader_emitter_v10 *emit,
8061          const struct tgsi_full_instruction *inst)
8062 {
8063    unsigned tmp = get_temp_index(emit);
8064    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8065    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8066 
8067    struct tgsi_full_src_register tmp_src_xxxx =
8068       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
8069    struct tgsi_full_dst_register tmp_dst_x =
8070       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
8071 
8072    begin_emit_instruction(emit);
8073    emit_opcode(emit, VGPU10_OPCODE_SINCOS, false);
8074 
8075    if(inst->Instruction.Opcode == TGSI_OPCODE_SIN)
8076    {
8077       emit_dst_register(emit, &tmp_dst_x);  /* first destination register */
8078       emit_null_dst_register(emit);  /* second destination register */
8079    }
8080    else {
8081       emit_null_dst_register(emit);
8082       emit_dst_register(emit, &tmp_dst_x);
8083    }
8084 
8085    emit_src_register(emit, &inst->Src[0]);
8086    end_emit_instruction(emit);
8087 
8088    emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
8089                         &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
8090                         inst->Instruction.Saturate,
8091                         inst->Instruction.Precise);
8092 
8093    free_temp_indexes(emit);
8094 
8095    return true;
8096 }
8097 
8098 
8099 /**
8100  * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction.
8101  */
8102 static bool
emit_sle(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8103 emit_sle(struct svga_shader_emitter_v10 *emit,
8104          const struct tgsi_full_instruction *inst)
8105 {
8106    /* dst = SLE(s0, s1):
8107     *   dst = s0 <= s1 ? 1.0 : 0.0  (per component)
8108     * Translates into:
8109     *   GE tmp, s1, s0;           tmp = s1 >= s0 : 0xffffffff : 0 (per comp)
8110     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
8111     */
8112    unsigned tmp = get_temp_index(emit);
8113    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8114    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8115    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8116    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8117 
8118    /* GE tmp, s1, s0 */
8119    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1],
8120                         &inst->Src[0]);
8121 
8122    /* MOVC dst, tmp, one, zero */
8123    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
8124                         &one, &zero);
8125 
8126    free_temp_indexes(emit);
8127 
8128    return true;
8129 }
8130 
8131 
8132 /**
8133  * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction.
8134  */
8135 static bool
emit_slt(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8136 emit_slt(struct svga_shader_emitter_v10 *emit,
8137          const struct tgsi_full_instruction *inst)
8138 {
8139    /* dst = SLT(s0, s1):
8140     *   dst = s0 < s1 ? 1.0 : 0.0  (per component)
8141     * Translates into:
8142     *   LT tmp, s0, s1;           tmp = s0 < s1 ? 0xffffffff : 0 (per comp)
8143     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
8144     */
8145    unsigned tmp = get_temp_index(emit);
8146    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8147    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8148    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8149    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8150 
8151    /* LT tmp, s0, s1 */
8152    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
8153                         &inst->Src[1]);
8154 
8155    /* MOVC dst, tmp, one, zero */
8156    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
8157                         &one, &zero);
8158 
8159    free_temp_indexes(emit);
8160 
8161    return true;
8162 }
8163 
8164 
8165 /**
8166  * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction.
8167  */
8168 static bool
emit_sne(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8169 emit_sne(struct svga_shader_emitter_v10 *emit,
8170          const struct tgsi_full_instruction *inst)
8171 {
8172    /* dst = SNE(s0, s1):
8173     *   dst = s0 != s1 ? 1.0 : 0.0  (per component)
8174     * Translates into:
8175     *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
8176     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
8177     */
8178    unsigned tmp = get_temp_index(emit);
8179    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8180    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8181    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8182    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8183 
8184    /* NE tmp, s0, s1 */
8185    emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0],
8186                         &inst->Src[1]);
8187 
8188    /* MOVC dst, tmp, one, zero */
8189    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
8190                         &one, &zero);
8191 
8192    free_temp_indexes(emit);
8193 
8194    return true;
8195 }
8196 
8197 
8198 /**
8199  * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction.
8200  */
8201 static bool
emit_ssg(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8202 emit_ssg(struct svga_shader_emitter_v10 *emit,
8203          const struct tgsi_full_instruction *inst)
8204 {
8205    /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
8206     * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
8207     * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
8208     * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
8209     * Translates into:
8210     *   LT tmp1, src, zero;           tmp1 = src < zero ? 0xffffffff : 0 (per comp)
8211     *   MOVC tmp2, tmp1, -1.0, 0.0;   tmp2 = tmp1 ? -1.0 : 0.0 (per component)
8212     *   LT tmp1, zero, src;           tmp1 = zero < src ? 0xffffffff : 0 (per comp)
8213     *   MOVC dst, tmp1, 1.0, tmp2;    dst = tmp1 ? 1.0 : tmp2 (per component)
8214     */
8215    struct tgsi_full_src_register zero =
8216       make_immediate_reg_float(emit, 0.0f);
8217    struct tgsi_full_src_register one =
8218       make_immediate_reg_float(emit, 1.0f);
8219    struct tgsi_full_src_register neg_one =
8220       make_immediate_reg_float(emit, -1.0f);
8221 
8222    unsigned tmp1 = get_temp_index(emit);
8223    struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
8224    struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
8225 
8226    unsigned tmp2 = get_temp_index(emit);
8227    struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
8228    struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
8229 
8230    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0],
8231                         &zero);
8232    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src,
8233                         &neg_one, &zero);
8234    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero,
8235                         &inst->Src[0]);
8236    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src,
8237                         &one, &tmp2_src);
8238 
8239    free_temp_indexes(emit);
8240 
8241    return true;
8242 }
8243 
8244 
8245 /**
8246  * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction.
8247  */
8248 static bool
emit_issg(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8249 emit_issg(struct svga_shader_emitter_v10 *emit,
8250           const struct tgsi_full_instruction *inst)
8251 {
8252    /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
8253     * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
8254     * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
8255     * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
8256     * Translates into:
8257     *   ILT tmp1, src, 0              tmp1 = src < 0 ? -1 : 0 (per component)
8258     *   ILT tmp2, 0, src              tmp2 = 0 < src ? -1 : 0 (per component)
8259     *   IADD dst, tmp1, neg(tmp2)     dst  = tmp1 - tmp2      (per component)
8260     */
8261    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8262 
8263    unsigned tmp1 = get_temp_index(emit);
8264    struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
8265    struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
8266 
8267    unsigned tmp2 = get_temp_index(emit);
8268    struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
8269    struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
8270 
8271    struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src);
8272 
8273    emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst,
8274                         &inst->Src[0], &zero);
8275    emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst,
8276                         &zero, &inst->Src[0]);
8277    emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0],
8278                         &tmp1_src, &neg_tmp2);
8279 
8280    free_temp_indexes(emit);
8281 
8282    return true;
8283 }
8284 
8285 
8286 /**
8287  * Emit a comparison instruction.  The dest register will get
8288  * 0 or ~0 values depending on the outcome of comparing src0 to src1.
8289  */
8290 static void
emit_comparison(struct svga_shader_emitter_v10 * emit,SVGA3dCmpFunc func,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src0,const struct tgsi_full_src_register * src1)8291 emit_comparison(struct svga_shader_emitter_v10 *emit,
8292                 SVGA3dCmpFunc func,
8293                 const struct tgsi_full_dst_register *dst,
8294                 const struct tgsi_full_src_register *src0,
8295                 const struct tgsi_full_src_register *src1)
8296 {
8297    struct tgsi_full_src_register immediate;
8298    VGPU10OpcodeToken0 opcode0;
8299    bool swapSrc = false;
8300 
8301    /* Sanity checks for svga vs. gallium enums */
8302    STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1));
8303    STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1));
8304 
8305    opcode0.value = 0;
8306 
8307    switch (func) {
8308    case SVGA3D_CMP_NEVER:
8309       immediate = make_immediate_reg_int(emit, 0);
8310       /* MOV dst, {0} */
8311       begin_emit_instruction(emit);
8312       emit_dword(emit, VGPU10_OPCODE_MOV);
8313       emit_dst_register(emit, dst);
8314       emit_src_register(emit, &immediate);
8315       end_emit_instruction(emit);
8316       return;
8317    case SVGA3D_CMP_ALWAYS:
8318       immediate = make_immediate_reg_int(emit, -1);
8319       /* MOV dst, {-1} */
8320       begin_emit_instruction(emit);
8321       emit_dword(emit, VGPU10_OPCODE_MOV);
8322       emit_dst_register(emit, dst);
8323       emit_src_register(emit, &immediate);
8324       end_emit_instruction(emit);
8325       return;
8326    case SVGA3D_CMP_LESS:
8327       opcode0.opcodeType = VGPU10_OPCODE_LT;
8328       break;
8329    case SVGA3D_CMP_EQUAL:
8330       opcode0.opcodeType = VGPU10_OPCODE_EQ;
8331       break;
8332    case SVGA3D_CMP_LESSEQUAL:
8333       opcode0.opcodeType = VGPU10_OPCODE_GE;
8334       swapSrc = true;
8335       break;
8336    case SVGA3D_CMP_GREATER:
8337       opcode0.opcodeType = VGPU10_OPCODE_LT;
8338       swapSrc = true;
8339       break;
8340    case SVGA3D_CMP_NOTEQUAL:
8341       opcode0.opcodeType = VGPU10_OPCODE_NE;
8342       break;
8343    case SVGA3D_CMP_GREATEREQUAL:
8344       opcode0.opcodeType = VGPU10_OPCODE_GE;
8345       break;
8346    default:
8347       assert(!"Unexpected comparison mode");
8348       opcode0.opcodeType = VGPU10_OPCODE_EQ;
8349    }
8350 
8351    begin_emit_instruction(emit);
8352    emit_dword(emit, opcode0.value);
8353    emit_dst_register(emit, dst);
8354    if (swapSrc) {
8355       emit_src_register(emit, src1);
8356       emit_src_register(emit, src0);
8357    }
8358    else {
8359       emit_src_register(emit, src0);
8360       emit_src_register(emit, src1);
8361    }
8362    end_emit_instruction(emit);
8363 }
8364 
8365 
8366 /**
8367  * Get texel/address offsets for a texture instruction.
8368  */
8369 static void
get_texel_offsets(const struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,int offsets[3])8370 get_texel_offsets(const struct svga_shader_emitter_v10 *emit,
8371                   const struct tgsi_full_instruction *inst, int offsets[3])
8372 {
8373    if (inst->Texture.NumOffsets == 1) {
8374       /* According to OpenGL Shader Language spec the offsets are only
8375        * fetched from a previously-declared immediate/literal.
8376        */
8377       const struct tgsi_texture_offset *off = inst->TexOffsets;
8378       const unsigned index = off[0].Index;
8379       const unsigned swizzleX = off[0].SwizzleX;
8380       const unsigned swizzleY = off[0].SwizzleY;
8381       const unsigned swizzleZ = off[0].SwizzleZ;
8382       const union tgsi_immediate_data *imm = emit->immediates[index];
8383 
8384       assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE);
8385 
8386       offsets[0] = imm[swizzleX].Int;
8387       offsets[1] = imm[swizzleY].Int;
8388       offsets[2] = imm[swizzleZ].Int;
8389    }
8390    else {
8391       offsets[0] = offsets[1] = offsets[2] = 0;
8392    }
8393 }
8394 
8395 
8396 /**
8397  * Set up the coordinate register for texture sampling.
8398  * When we're sampling from a RECT texture we have to scale the
8399  * unnormalized coordinate to a normalized coordinate.
8400  * We do that by multiplying the coordinate by an "extra" constant.
8401  * An alternative would be to use the RESINFO instruction to query the
8402  * texture's size.
8403  */
8404 static struct tgsi_full_src_register
setup_texcoord(struct svga_shader_emitter_v10 * emit,unsigned unit,const struct tgsi_full_src_register * coord)8405 setup_texcoord(struct svga_shader_emitter_v10 *emit,
8406                unsigned unit,
8407                const struct tgsi_full_src_register *coord)
8408 {
8409    if (emit->key.tex[unit].sampler_view && emit->key.tex[unit].unnormalized) {
8410       unsigned scale_index = emit->texcoord_scale_index[unit];
8411       unsigned tmp = get_temp_index(emit);
8412       struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8413       struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8414       struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index);
8415 
8416       if (emit->key.tex[unit].texel_bias) {
8417          /* to fix texture coordinate rounding issue, 0.0001 offset is
8418           * been added. This fixes piglit test fbo-blit-scaled-linear. */
8419          struct tgsi_full_src_register offset =
8420             make_immediate_reg_float(emit, 0.0001f);
8421 
8422          /* ADD tmp, coord, offset */
8423          emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_dst,
8424                               coord, &offset);
8425          /* MUL tmp, tmp, scale */
8426          emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
8427                               &tmp_src, &scale_src);
8428       }
8429       else {
8430          /* MUL tmp, coord, const[] */
8431          emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
8432                               coord, &scale_src);
8433       }
8434       return tmp_src;
8435    }
8436    else {
8437       /* use texcoord as-is */
8438       return *coord;
8439    }
8440 }
8441 
8442 
8443 /**
8444  * For SAMPLE_C instructions, emit the extra src register which indicates
8445  * the reference/comparision value.
8446  */
8447 static void
emit_tex_compare_refcoord(struct svga_shader_emitter_v10 * emit,enum tgsi_texture_type target,const struct tgsi_full_src_register * coord)8448 emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit,
8449                           enum tgsi_texture_type target,
8450                           const struct tgsi_full_src_register *coord)
8451 {
8452    struct tgsi_full_src_register coord_src_ref;
8453    int component;
8454 
8455    assert(tgsi_is_shadow_target(target));
8456 
8457    component = tgsi_util_get_shadow_ref_src_index(target) % 4;
8458    assert(component >= 0);
8459 
8460    coord_src_ref = scalar_src(coord, component);
8461 
8462    emit_src_register(emit, &coord_src_ref);
8463 }
8464 
8465 
8466 /**
8467  * Info for implementing texture swizzles.
8468  * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle()
8469  * functions use this to encapsulate the extra steps needed to perform
8470  * a texture swizzle, or shadow/depth comparisons.
8471  * The shadow/depth comparison is only done here if for the cases where
8472  * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare).
8473  */
8474 struct tex_swizzle_info
8475 {
8476    bool swizzled;
8477    bool shadow_compare;
8478    unsigned unit;
8479    enum tgsi_texture_type texture_target;  /**< TGSI_TEXTURE_x */
8480    struct tgsi_full_src_register tmp_src;
8481    struct tgsi_full_dst_register tmp_dst;
8482    const struct tgsi_full_dst_register *inst_dst;
8483    const struct tgsi_full_src_register *coord_src;
8484 };
8485 
8486 
8487 /**
8488  * Do setup for handling texture swizzles or shadow compares.
8489  * \param unit  the texture unit
8490  * \param inst  the TGSI texture instruction
8491  * \param shadow_compare  do shadow/depth comparison?
8492  * \param swz  returns the swizzle info
8493  */
8494 static void
begin_tex_swizzle(struct svga_shader_emitter_v10 * emit,unsigned unit,const struct tgsi_full_instruction * inst,bool shadow_compare,struct tex_swizzle_info * swz)8495 begin_tex_swizzle(struct svga_shader_emitter_v10 *emit,
8496                   unsigned unit,
8497                   const struct tgsi_full_instruction *inst,
8498                   bool shadow_compare,
8499                   struct tex_swizzle_info *swz)
8500 {
8501    swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X ||
8502                     emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y ||
8503                     emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z ||
8504                     emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W);
8505 
8506    swz->shadow_compare = shadow_compare;
8507    swz->texture_target = inst->Texture.Texture;
8508 
8509    if (swz->swizzled || shadow_compare) {
8510       /* Allocate temp register for the result of the SAMPLE instruction
8511        * and the source of the MOV/compare/swizzle instructions.
8512        */
8513       unsigned tmp = get_temp_index(emit);
8514       swz->tmp_src = make_src_temp_reg(tmp);
8515       swz->tmp_dst = make_dst_temp_reg(tmp);
8516 
8517       swz->unit = unit;
8518    }
8519    swz->inst_dst = &inst->Dst[0];
8520    swz->coord_src = &inst->Src[0];
8521 
8522    emit->shadow_compare_units |= shadow_compare << unit;
8523 }
8524 
8525 
8526 /**
8527  * Returns the register to put the SAMPLE instruction results into.
8528  * This will either be the original instruction dst reg (if no swizzle
8529  * and no shadow comparison) or a temporary reg if there is a swizzle.
8530  */
8531 static const struct tgsi_full_dst_register *
get_tex_swizzle_dst(const struct tex_swizzle_info * swz)8532 get_tex_swizzle_dst(const struct tex_swizzle_info *swz)
8533 {
8534    return (swz->swizzled || swz->shadow_compare)
8535       ? &swz->tmp_dst : swz->inst_dst;
8536 }
8537 
8538 
8539 /**
8540  * This emits the MOV instruction that actually implements a texture swizzle
8541  * and/or shadow comparison.
8542  */
8543 static void
end_tex_swizzle(struct svga_shader_emitter_v10 * emit,const struct tex_swizzle_info * swz)8544 end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
8545                 const struct tex_swizzle_info *swz)
8546 {
8547    if (swz->shadow_compare) {
8548       /* Emit extra instructions to compare the fetched texel value against
8549        * a texture coordinate component.  The result of the comparison
8550        * is 0.0 or 1.0.
8551        */
8552       struct tgsi_full_src_register coord_src;
8553       struct tgsi_full_src_register texel_src =
8554          scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X);
8555       struct tgsi_full_src_register one =
8556          make_immediate_reg_float(emit, 1.0f);
8557       /* convert gallium comparison func to SVGA comparison func */
8558       SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1;
8559 
8560       int component =
8561          tgsi_util_get_shadow_ref_src_index(swz->texture_target) % 4;
8562       assert(component >= 0);
8563       coord_src = scalar_src(swz->coord_src, component);
8564 
8565       /* COMPARE tmp, coord, texel */
8566       emit_comparison(emit, compare_func,
8567                       &swz->tmp_dst, &coord_src, &texel_src);
8568 
8569       /* AND dest, tmp, {1.0} */
8570       begin_emit_instruction(emit);
8571       emit_opcode(emit, VGPU10_OPCODE_AND, false);
8572       if (swz->swizzled) {
8573          emit_dst_register(emit, &swz->tmp_dst);
8574       }
8575       else {
8576          emit_dst_register(emit, swz->inst_dst);
8577       }
8578       emit_src_register(emit, &swz->tmp_src);
8579       emit_src_register(emit, &one);
8580       end_emit_instruction(emit);
8581    }
8582 
8583    if (swz->swizzled) {
8584       unsigned swz_r = emit->key.tex[swz->unit].swizzle_r;
8585       unsigned swz_g = emit->key.tex[swz->unit].swizzle_g;
8586       unsigned swz_b = emit->key.tex[swz->unit].swizzle_b;
8587       unsigned swz_a = emit->key.tex[swz->unit].swizzle_a;
8588       unsigned writemask_0 = 0, writemask_1 = 0;
8589       bool int_tex = is_integer_type(emit->sampler_return_type[swz->unit]);
8590 
8591       /* Swizzle w/out zero/one terms */
8592       struct tgsi_full_src_register src_swizzled =
8593          swizzle_src(&swz->tmp_src,
8594                      swz_r < PIPE_SWIZZLE_0 ? swz_r : PIPE_SWIZZLE_X,
8595                      swz_g < PIPE_SWIZZLE_0 ? swz_g : PIPE_SWIZZLE_Y,
8596                      swz_b < PIPE_SWIZZLE_0 ? swz_b : PIPE_SWIZZLE_Z,
8597                      swz_a < PIPE_SWIZZLE_0 ? swz_a : PIPE_SWIZZLE_W);
8598 
8599       /* MOV dst, color(tmp).<swizzle> */
8600       emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
8601                            swz->inst_dst, &src_swizzled);
8602 
8603       /* handle swizzle zero terms */
8604       writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) |
8605                      ((swz_g == PIPE_SWIZZLE_0) << 1) |
8606                      ((swz_b == PIPE_SWIZZLE_0) << 2) |
8607                      ((swz_a == PIPE_SWIZZLE_0) << 3));
8608       writemask_0 &= swz->inst_dst->Register.WriteMask;
8609 
8610       if (writemask_0) {
8611          struct tgsi_full_src_register zero = int_tex ?
8612             make_immediate_reg_int(emit, 0) :
8613             make_immediate_reg_float(emit, 0.0f);
8614          struct tgsi_full_dst_register dst =
8615             writemask_dst(swz->inst_dst, writemask_0);
8616 
8617          /* MOV dst.writemask_0, {0,0,0,0} */
8618          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &zero);
8619       }
8620 
8621       /* handle swizzle one terms */
8622       writemask_1 = (((swz_r == PIPE_SWIZZLE_1) << 0) |
8623                      ((swz_g == PIPE_SWIZZLE_1) << 1) |
8624                      ((swz_b == PIPE_SWIZZLE_1) << 2) |
8625                      ((swz_a == PIPE_SWIZZLE_1) << 3));
8626       writemask_1 &= swz->inst_dst->Register.WriteMask;
8627 
8628       if (writemask_1) {
8629          struct tgsi_full_src_register one = int_tex ?
8630             make_immediate_reg_int(emit, 1) :
8631             make_immediate_reg_float(emit, 1.0f);
8632          struct tgsi_full_dst_register dst =
8633             writemask_dst(swz->inst_dst, writemask_1);
8634 
8635          /* MOV dst.writemask_1, {1,1,1,1} */
8636          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one);
8637       }
8638    }
8639 }
8640 
8641 
8642 /**
8643  * Emit code for TGSI_OPCODE_SAMPLE instruction.
8644  */
8645 static bool
emit_sample(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8646 emit_sample(struct svga_shader_emitter_v10 *emit,
8647             const struct tgsi_full_instruction *inst)
8648 {
8649    const unsigned resource_unit = inst->Src[1].Register.Index;
8650    const unsigned sampler_unit = inst->Src[2].Register.Index;
8651    struct tgsi_full_src_register coord;
8652    int offsets[3];
8653    struct tex_swizzle_info swz_info;
8654 
8655    begin_tex_swizzle(emit, sampler_unit, inst, false, &swz_info);
8656 
8657    get_texel_offsets(emit, inst, offsets);
8658 
8659    coord = setup_texcoord(emit, resource_unit, &inst->Src[0]);
8660 
8661    /* SAMPLE dst, coord(s0), resource, sampler */
8662    begin_emit_instruction(emit);
8663 
8664    /* NOTE: for non-fragment shaders, we should use VGPU10_OPCODE_SAMPLE_L
8665     * with LOD=0.  But our virtual GPU accepts this as-is.
8666     */
8667    emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE,
8668                       inst->Instruction.Saturate, offsets);
8669    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8670    emit_src_register(emit, &coord);
8671    emit_resource_register(emit, resource_unit);
8672    emit_sampler_register(emit, sampler_unit);
8673    end_emit_instruction(emit);
8674 
8675    end_tex_swizzle(emit, &swz_info);
8676 
8677    free_temp_indexes(emit);
8678 
8679    return true;
8680 }
8681 
8682 
8683 /**
8684  * Check if a texture instruction is valid.
8685  * An example of an invalid texture instruction is doing shadow comparison
8686  * with an integer-valued texture.
8687  * If we detect an invalid texture instruction, we replace it with:
8688  *   MOV dst, {1,1,1,1};
8689  * \return TRUE if valid, FALSE if invalid.
8690  */
8691 static bool
is_valid_tex_instruction(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8692 is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit,
8693                          const struct tgsi_full_instruction *inst)
8694 {
8695    const unsigned unit = inst->Src[1].Register.Index;
8696    const enum tgsi_texture_type target = inst->Texture.Texture;
8697    bool valid = true;
8698 
8699    if (tgsi_is_shadow_target(target) &&
8700        is_integer_type(emit->sampler_return_type[unit])) {
8701       debug_printf("Invalid SAMPLE_C with an integer texture!\n");
8702       valid = false;
8703    }
8704    /* XXX might check for other conditions in the future here */
8705 
8706    if (!valid) {
8707       /* emit a MOV dst, {1,1,1,1} instruction. */
8708       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8709       begin_emit_instruction(emit);
8710       emit_opcode(emit, VGPU10_OPCODE_MOV, false);
8711       emit_dst_register(emit, &inst->Dst[0]);
8712       emit_src_register(emit, &one);
8713       end_emit_instruction(emit);
8714    }
8715 
8716    return valid;
8717 }
8718 
8719 
8720 /**
8721  * Emit code for TGSI_OPCODE_TEX (simple texture lookup)
8722  */
8723 static bool
emit_tex(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8724 emit_tex(struct svga_shader_emitter_v10 *emit,
8725          const struct tgsi_full_instruction *inst)
8726 {
8727    const uint unit = inst->Src[1].Register.Index;
8728    const enum tgsi_texture_type target = inst->Texture.Texture;
8729    VGPU10_OPCODE_TYPE opcode;
8730    struct tgsi_full_src_register coord;
8731    int offsets[3];
8732    struct tex_swizzle_info swz_info;
8733    bool compare_in_shader;
8734 
8735    /* check that the sampler returns a float */
8736    if (!is_valid_tex_instruction(emit, inst))
8737       return true;
8738 
8739    compare_in_shader = tgsi_is_shadow_target(target) &&
8740                        emit->key.tex[unit].compare_in_shader;
8741 
8742    begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info);
8743 
8744    get_texel_offsets(emit, inst, offsets);
8745 
8746    coord = setup_texcoord(emit, unit, &inst->Src[0]);
8747 
8748    /* SAMPLE dst, coord(s0), resource, sampler */
8749    begin_emit_instruction(emit);
8750 
8751    if (tgsi_is_shadow_target(target) && !compare_in_shader)
8752       opcode = VGPU10_OPCODE_SAMPLE_C;
8753    else
8754       opcode = VGPU10_OPCODE_SAMPLE;
8755 
8756    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8757    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8758    emit_src_register(emit, &coord);
8759    emit_resource_register(emit, unit);
8760    emit_sampler_register(emit, unit);
8761    if (opcode == VGPU10_OPCODE_SAMPLE_C) {
8762       emit_tex_compare_refcoord(emit, target, &coord);
8763    }
8764    end_emit_instruction(emit);
8765 
8766    end_tex_swizzle(emit, &swz_info);
8767 
8768    free_temp_indexes(emit);
8769 
8770    return true;
8771 }
8772 
8773 /**
8774  * Emit code for TGSI_OPCODE_TG4 (texture lookup for texture gather)
8775  */
8776 static bool
emit_tg4(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8777 emit_tg4(struct svga_shader_emitter_v10 *emit,
8778          const struct tgsi_full_instruction *inst)
8779 {
8780    const uint unit = inst->Src[2].Register.Index;
8781    struct tgsi_full_src_register src;
8782    struct tgsi_full_src_register offset_src, sampler, ref;
8783    int offsets[3];
8784 
8785    /* check that the sampler returns a float */
8786    if (!is_valid_tex_instruction(emit, inst))
8787       return true;
8788 
8789    if (emit->version >= 50) {
8790       unsigned target = inst->Texture.Texture;
8791       int index = inst->Src[1].Register.Index;
8792       const union tgsi_immediate_data *imm = emit->immediates[index];
8793       int select_comp  = imm[inst->Src[1].Register.SwizzleX].Int;
8794       unsigned select_swizzle = PIPE_SWIZZLE_X;
8795 
8796       if (!tgsi_is_shadow_target(target)) {
8797          switch (select_comp) {
8798          case 0:
8799             select_swizzle = emit->key.tex[unit].swizzle_r;
8800             break;
8801          case 1:
8802             select_swizzle = emit->key.tex[unit].swizzle_g;
8803             break;
8804          case 2:
8805             select_swizzle = emit->key.tex[unit].swizzle_b;
8806             break;
8807          case 3:
8808             select_swizzle = emit->key.tex[unit].swizzle_a;
8809             break;
8810          default:
8811             assert(!"Unexpected component in texture gather swizzle");
8812          }
8813       }
8814       else {
8815          select_swizzle = emit->key.tex[unit].swizzle_r;
8816       }
8817 
8818       if (select_swizzle == PIPE_SWIZZLE_1) {
8819          src = make_immediate_reg_float(emit, 1.0);
8820          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8821          return true;
8822       }
8823       else if (select_swizzle == PIPE_SWIZZLE_0) {
8824          src = make_immediate_reg_float(emit, 0.0);
8825          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8826          return true;
8827       }
8828 
8829       src = setup_texcoord(emit, unit, &inst->Src[0]);
8830 
8831       /* GATHER4 dst, coord, resource, sampler */
8832       /* GATHER4_C dst, coord, resource, sampler ref */
8833       /* GATHER4_PO dst, coord, offset resource, sampler */
8834       /* GATHER4_PO_C dst, coord, offset resource, sampler, ref */
8835       begin_emit_instruction(emit);
8836       if (inst->Texture.NumOffsets == 1) {
8837          if (tgsi_is_shadow_target(target)) {
8838             emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO_C,
8839                         inst->Instruction.Saturate);
8840          }
8841          else {
8842             emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO,
8843                         inst->Instruction.Saturate);
8844          }
8845       }
8846       else {
8847          if (tgsi_is_shadow_target(target)) {
8848             emit_opcode(emit, VGPU10_OPCODE_GATHER4_C,
8849                         inst->Instruction.Saturate);
8850          }
8851          else {
8852             emit_opcode(emit, VGPU10_OPCODE_GATHER4,
8853                         inst->Instruction.Saturate);
8854          }
8855       }
8856 
8857       emit_dst_register(emit, &inst->Dst[0]);
8858       emit_src_register(emit, &src);
8859       if (inst->Texture.NumOffsets == 1) {
8860          /* offset */
8861          offset_src = make_src_reg(inst->TexOffsets[0].File,
8862                                    inst->TexOffsets[0].Index);
8863          offset_src = swizzle_src(&offset_src, inst->TexOffsets[0].SwizzleX,
8864                                   inst->TexOffsets[0].SwizzleY,
8865                                   inst->TexOffsets[0].SwizzleZ,
8866                                   TGSI_SWIZZLE_W);
8867          emit_src_register(emit, &offset_src);
8868       }
8869 
8870       /* resource */
8871       emit_resource_register(emit, unit);
8872 
8873       /* sampler */
8874       sampler = make_src_reg(TGSI_FILE_SAMPLER,
8875                              emit->key.tex[unit].sampler_index);
8876       sampler.Register.SwizzleX =
8877       sampler.Register.SwizzleY =
8878       sampler.Register.SwizzleZ =
8879       sampler.Register.SwizzleW = select_swizzle;
8880       emit_src_register(emit, &sampler);
8881 
8882       if (tgsi_is_shadow_target(target)) {
8883          /* ref */
8884          if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
8885             ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8886             emit_tex_compare_refcoord(emit, target, &ref);
8887          }
8888          else {
8889             emit_tex_compare_refcoord(emit, target, &src);
8890          }
8891       }
8892 
8893       end_emit_instruction(emit);
8894       free_temp_indexes(emit);
8895    }
8896    else {
8897       /* Only a single channel is supported in SM4_1 and we report
8898        * PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 1.
8899        * Only the 0th component will be gathered.
8900        */
8901       switch (emit->key.tex[unit].swizzle_r) {
8902       case PIPE_SWIZZLE_X:
8903          get_texel_offsets(emit, inst, offsets);
8904          src = setup_texcoord(emit, unit, &inst->Src[0]);
8905 
8906          /* Gather dst, coord, resource, sampler */
8907          begin_emit_instruction(emit);
8908          emit_sample_opcode(emit, VGPU10_OPCODE_GATHER4,
8909                             inst->Instruction.Saturate, offsets);
8910          emit_dst_register(emit, &inst->Dst[0]);
8911          emit_src_register(emit, &src);
8912          emit_resource_register(emit, unit);
8913 
8914          /* sampler */
8915          sampler = make_src_reg(TGSI_FILE_SAMPLER,
8916                                 emit->key.tex[unit].sampler_index);
8917          sampler.Register.SwizzleX =
8918          sampler.Register.SwizzleY =
8919          sampler.Register.SwizzleZ =
8920          sampler.Register.SwizzleW = PIPE_SWIZZLE_X;
8921          emit_src_register(emit, &sampler);
8922 
8923          end_emit_instruction(emit);
8924          break;
8925       case PIPE_SWIZZLE_W:
8926       case PIPE_SWIZZLE_1:
8927          src = make_immediate_reg_float(emit, 1.0);
8928          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8929          break;
8930       case PIPE_SWIZZLE_Y:
8931       case PIPE_SWIZZLE_Z:
8932       case PIPE_SWIZZLE_0:
8933       default:
8934          src = make_immediate_reg_float(emit, 0.0);
8935          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8936          break;
8937       }
8938    }
8939 
8940    return true;
8941 }
8942 
8943 
8944 
8945 /**
8946  * Emit code for TGSI_OPCODE_TEX2 (texture lookup for shadow cube map arrays)
8947  */
8948 static bool
emit_tex2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8949 emit_tex2(struct svga_shader_emitter_v10 *emit,
8950          const struct tgsi_full_instruction *inst)
8951 {
8952    const uint unit = inst->Src[2].Register.Index;
8953    unsigned target = inst->Texture.Texture;
8954    struct tgsi_full_src_register coord, ref;
8955    int offsets[3];
8956    struct tex_swizzle_info swz_info;
8957    VGPU10_OPCODE_TYPE opcode;
8958    bool compare_in_shader;
8959 
8960    /* check that the sampler returns a float */
8961    if (!is_valid_tex_instruction(emit, inst))
8962       return true;
8963 
8964    compare_in_shader = emit->key.tex[unit].compare_in_shader;
8965    if (compare_in_shader)
8966       opcode = VGPU10_OPCODE_SAMPLE;
8967    else
8968       opcode = VGPU10_OPCODE_SAMPLE_C;
8969 
8970    begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info);
8971 
8972    get_texel_offsets(emit, inst, offsets);
8973 
8974    coord = setup_texcoord(emit, unit, &inst->Src[0]);
8975    ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8976 
8977    /* SAMPLE_C dst, coord, resource, sampler, ref */
8978    begin_emit_instruction(emit);
8979    emit_sample_opcode(emit, opcode,
8980                       inst->Instruction.Saturate, offsets);
8981    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8982    emit_src_register(emit, &coord);
8983    emit_resource_register(emit, unit);
8984    emit_sampler_register(emit, unit);
8985    if (opcode == VGPU10_OPCODE_SAMPLE_C) {
8986       emit_tex_compare_refcoord(emit, target, &ref);
8987    }
8988    end_emit_instruction(emit);
8989 
8990    end_tex_swizzle(emit, &swz_info);
8991 
8992    free_temp_indexes(emit);
8993 
8994    return true;
8995 }
8996 
8997 
8998 /**
8999  * Emit code for TGSI_OPCODE_TXP (projective texture)
9000  */
9001 static bool
emit_txp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9002 emit_txp(struct svga_shader_emitter_v10 *emit,
9003          const struct tgsi_full_instruction *inst)
9004 {
9005    const uint unit = inst->Src[1].Register.Index;
9006    const enum tgsi_texture_type target = inst->Texture.Texture;
9007    VGPU10_OPCODE_TYPE opcode;
9008    int offsets[3];
9009    unsigned tmp = get_temp_index(emit);
9010    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9011    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9012    struct tgsi_full_src_register src0_wwww =
9013       scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
9014    struct tgsi_full_src_register coord;
9015    struct tex_swizzle_info swz_info;
9016    bool compare_in_shader;
9017 
9018    /* check that the sampler returns a float */
9019    if (!is_valid_tex_instruction(emit, inst))
9020       return true;
9021 
9022    compare_in_shader = tgsi_is_shadow_target(target) &&
9023                        emit->key.tex[unit].compare_in_shader;
9024 
9025    begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info);
9026 
9027    get_texel_offsets(emit, inst, offsets);
9028 
9029    coord = setup_texcoord(emit, unit, &inst->Src[0]);
9030 
9031    /* DIV tmp, coord, coord.wwww */
9032    emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst,
9033                         &coord, &src0_wwww);
9034 
9035    /* SAMPLE dst, coord(tmp), resource, sampler */
9036    begin_emit_instruction(emit);
9037 
9038    if (tgsi_is_shadow_target(target) && !compare_in_shader)
9039       /* NOTE: for non-fragment shaders, we should use
9040        * VGPU10_OPCODE_SAMPLE_C_LZ, but our virtual GPU accepts this as-is.
9041        */
9042       opcode = VGPU10_OPCODE_SAMPLE_C;
9043    else
9044       opcode = VGPU10_OPCODE_SAMPLE;
9045 
9046    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
9047    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9048    emit_src_register(emit, &tmp_src);  /* projected coord */
9049    emit_resource_register(emit, unit);
9050    emit_sampler_register(emit, unit);
9051    if (opcode == VGPU10_OPCODE_SAMPLE_C) {
9052       emit_tex_compare_refcoord(emit, target, &tmp_src);
9053    }
9054    end_emit_instruction(emit);
9055 
9056    end_tex_swizzle(emit, &swz_info);
9057 
9058    free_temp_indexes(emit);
9059 
9060    return true;
9061 }
9062 
9063 
9064 /**
9065  * Emit code for TGSI_OPCODE_TXD (explicit derivatives)
9066  */
9067 static bool
emit_txd(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9068 emit_txd(struct svga_shader_emitter_v10 *emit,
9069          const struct tgsi_full_instruction *inst)
9070 {
9071    const uint unit = inst->Src[3].Register.Index;
9072    const enum tgsi_texture_type target = inst->Texture.Texture;
9073    int offsets[3];
9074    struct tgsi_full_src_register coord;
9075    struct tex_swizzle_info swz_info;
9076 
9077    begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
9078                      &swz_info);
9079 
9080    get_texel_offsets(emit, inst, offsets);
9081 
9082    coord = setup_texcoord(emit, unit, &inst->Src[0]);
9083 
9084    /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */
9085    begin_emit_instruction(emit);
9086    emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D,
9087                       inst->Instruction.Saturate, offsets);
9088    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9089    emit_src_register(emit, &coord);
9090    emit_resource_register(emit, unit);
9091    emit_sampler_register(emit, unit);
9092    emit_src_register(emit, &inst->Src[1]);  /* Xderiv */
9093    emit_src_register(emit, &inst->Src[2]);  /* Yderiv */
9094    end_emit_instruction(emit);
9095 
9096    end_tex_swizzle(emit, &swz_info);
9097 
9098    free_temp_indexes(emit);
9099 
9100    return true;
9101 }
9102 
9103 
9104 /**
9105  * Emit code for TGSI_OPCODE_TXF (texel fetch)
9106  */
9107 static bool
emit_txf(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9108 emit_txf(struct svga_shader_emitter_v10 *emit,
9109          const struct tgsi_full_instruction *inst)
9110 {
9111    const uint unit = inst->Src[1].Register.Index;
9112    const bool msaa = tgsi_is_msaa_target(inst->Texture.Texture)
9113       && emit->key.tex[unit].num_samples > 1;
9114    int offsets[3];
9115    struct tex_swizzle_info swz_info;
9116 
9117    begin_tex_swizzle(emit, unit, inst, false, &swz_info);
9118 
9119    get_texel_offsets(emit, inst, offsets);
9120 
9121    if (msaa) {
9122       assert(emit->key.tex[unit].num_samples > 1);
9123 
9124       /* Fetch one sample from an MSAA texture */
9125       struct tgsi_full_src_register sampleIndex =
9126          scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
9127       /* LD_MS dst, coord(s0), resource, sampleIndex */
9128       begin_emit_instruction(emit);
9129       emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS,
9130                          inst->Instruction.Saturate, offsets);
9131       emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9132       emit_src_register(emit, &inst->Src[0]);
9133       emit_resource_register(emit, unit);
9134       emit_src_register(emit, &sampleIndex);
9135       end_emit_instruction(emit);
9136    }
9137    else {
9138       /* Fetch one texel specified by integer coordinate */
9139       /* LD dst, coord(s0), resource */
9140       begin_emit_instruction(emit);
9141       emit_sample_opcode(emit, VGPU10_OPCODE_LD,
9142                          inst->Instruction.Saturate, offsets);
9143       emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9144       emit_src_register(emit, &inst->Src[0]);
9145       emit_resource_register(emit, unit);
9146       end_emit_instruction(emit);
9147    }
9148 
9149    end_tex_swizzle(emit, &swz_info);
9150 
9151    free_temp_indexes(emit);
9152 
9153    return true;
9154 }
9155 
9156 
9157 /**
9158  * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias)
9159  * or TGSI_OPCODE_TXB2 (for cube shadow maps).
9160  */
9161 static bool
emit_txl_txb(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9162 emit_txl_txb(struct svga_shader_emitter_v10 *emit,
9163              const struct tgsi_full_instruction *inst)
9164 {
9165    const enum tgsi_texture_type target = inst->Texture.Texture;
9166    VGPU10_OPCODE_TYPE opcode;
9167    unsigned unit;
9168    int offsets[3];
9169    struct tgsi_full_src_register coord, lod_bias;
9170    struct tex_swizzle_info swz_info;
9171 
9172    assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
9173           inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
9174           inst->Instruction.Opcode == TGSI_OPCODE_TXB2);
9175 
9176    if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
9177       lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
9178       unit = inst->Src[2].Register.Index;
9179    }
9180    else {
9181       lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
9182       unit = inst->Src[1].Register.Index;
9183    }
9184 
9185    begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
9186                      &swz_info);
9187 
9188    get_texel_offsets(emit, inst, offsets);
9189 
9190    coord = setup_texcoord(emit, unit, &inst->Src[0]);
9191 
9192    /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */
9193    begin_emit_instruction(emit);
9194    if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
9195       opcode = VGPU10_OPCODE_SAMPLE_L;
9196    }
9197    else {
9198       opcode = VGPU10_OPCODE_SAMPLE_B;
9199    }
9200    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
9201    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9202    emit_src_register(emit, &coord);
9203    emit_resource_register(emit, unit);
9204    emit_sampler_register(emit, unit);
9205    emit_src_register(emit, &lod_bias);
9206    end_emit_instruction(emit);
9207 
9208    end_tex_swizzle(emit, &swz_info);
9209 
9210    free_temp_indexes(emit);
9211 
9212    return true;
9213 }
9214 
9215 
9216 /**
9217  * Emit code for TGSI_OPCODE_TXL2 (explicit LOD) for cubemap array.
9218  */
9219 static bool
emit_txl2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9220 emit_txl2(struct svga_shader_emitter_v10 *emit,
9221           const struct tgsi_full_instruction *inst)
9222 {
9223    unsigned target = inst->Texture.Texture;
9224    unsigned opcode, unit;
9225    int offsets[3];
9226    struct tgsi_full_src_register coord, lod;
9227    struct tex_swizzle_info swz_info;
9228 
9229    assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL2);
9230 
9231    lod = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
9232    unit = inst->Src[2].Register.Index;
9233 
9234    begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
9235                      &swz_info);
9236 
9237    get_texel_offsets(emit, inst, offsets);
9238 
9239    coord = setup_texcoord(emit, unit, &inst->Src[0]);
9240 
9241    /* SAMPLE_L dst, coord(s0), resource, sampler, lod(s3) */
9242    begin_emit_instruction(emit);
9243    opcode = VGPU10_OPCODE_SAMPLE_L;
9244    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
9245    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9246    emit_src_register(emit, &coord);
9247    emit_resource_register(emit, unit);
9248    emit_sampler_register(emit, unit);
9249    emit_src_register(emit, &lod);
9250    end_emit_instruction(emit);
9251 
9252    end_tex_swizzle(emit, &swz_info);
9253 
9254    free_temp_indexes(emit);
9255 
9256    return true;
9257 }
9258 
9259 
9260 /**
9261  * Emit code for TGSI_OPCODE_TXQ (texture query) instruction.
9262  */
9263 static bool
emit_txq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9264 emit_txq(struct svga_shader_emitter_v10 *emit,
9265          const struct tgsi_full_instruction *inst)
9266 {
9267    const uint unit = inst->Src[1].Register.Index;
9268 
9269    if (emit->key.tex[unit].target == PIPE_BUFFER) {
9270       /* RESINFO does not support querying texture buffers, so we instead
9271        * store texture buffer sizes in shader constants, then copy them to
9272        * implement TXQ instead of emitting RESINFO.
9273        * MOV dst, const[texture_buffer_size_index[unit]]
9274        */
9275       struct tgsi_full_src_register size_src =
9276          make_src_const_reg(emit->texture_buffer_size_index[unit]);
9277       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src);
9278    } else {
9279       /* RESINFO dst, srcMipLevel, resource */
9280       begin_emit_instruction(emit);
9281       emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
9282       emit_dst_register(emit, &inst->Dst[0]);
9283       emit_src_register(emit, &inst->Src[0]);
9284       emit_resource_register(emit, unit);
9285       end_emit_instruction(emit);
9286    }
9287 
9288    free_temp_indexes(emit);
9289 
9290    return true;
9291 }
9292 
9293 
9294 /**
9295  * Does this opcode produce a double-precision result?
9296  * XXX perhaps move this to a TGSI utility.
9297  */
9298 static bool
opcode_has_dbl_dst(unsigned opcode)9299 opcode_has_dbl_dst(unsigned opcode)
9300 {
9301    switch (opcode) {
9302    case TGSI_OPCODE_F2D:
9303    case TGSI_OPCODE_DABS:
9304    case TGSI_OPCODE_DADD:
9305    case TGSI_OPCODE_DFRAC:
9306    case TGSI_OPCODE_DMAX:
9307    case TGSI_OPCODE_DMIN:
9308    case TGSI_OPCODE_DMUL:
9309    case TGSI_OPCODE_DNEG:
9310    case TGSI_OPCODE_I2D:
9311    case TGSI_OPCODE_U2D:
9312    case TGSI_OPCODE_DFMA:
9313       // XXX more TBD
9314       return true;
9315    default:
9316       return false;
9317    }
9318 }
9319 
9320 
9321 /**
9322  * Does this opcode use double-precision source registers?
9323  */
9324 static bool
opcode_has_dbl_src(unsigned opcode)9325 opcode_has_dbl_src(unsigned opcode)
9326 {
9327    switch (opcode) {
9328    case TGSI_OPCODE_D2F:
9329    case TGSI_OPCODE_DABS:
9330    case TGSI_OPCODE_DADD:
9331    case TGSI_OPCODE_DFRAC:
9332    case TGSI_OPCODE_DMAX:
9333    case TGSI_OPCODE_DMIN:
9334    case TGSI_OPCODE_DMUL:
9335    case TGSI_OPCODE_DNEG:
9336    case TGSI_OPCODE_D2I:
9337    case TGSI_OPCODE_D2U:
9338    case TGSI_OPCODE_DFMA:
9339    case TGSI_OPCODE_DSLT:
9340    case TGSI_OPCODE_DSGE:
9341    case TGSI_OPCODE_DSEQ:
9342    case TGSI_OPCODE_DSNE:
9343    case TGSI_OPCODE_DRCP:
9344    case TGSI_OPCODE_DSQRT:
9345    case TGSI_OPCODE_DMAD:
9346    case TGSI_OPCODE_DLDEXP:
9347    case TGSI_OPCODE_DRSQ:
9348    case TGSI_OPCODE_DTRUNC:
9349    case TGSI_OPCODE_DCEIL:
9350    case TGSI_OPCODE_DFLR:
9351    case TGSI_OPCODE_DROUND:
9352    case TGSI_OPCODE_DSSG:
9353       return true;
9354    default:
9355       return false;
9356    }
9357 }
9358 
9359 
9360 /**
9361  * Check that the swizzle for reading from a double-precision register
9362  * is valid. If not valid, move the source to a temporary register first.
9363  */
9364 static struct tgsi_full_src_register
check_double_src(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * reg)9365 check_double_src(struct svga_shader_emitter_v10 *emit,
9366                  const struct tgsi_full_src_register *reg)
9367 {
9368    struct tgsi_full_src_register src;
9369 
9370    if (((reg->Register.SwizzleX == PIPE_SWIZZLE_X &&
9371          reg->Register.SwizzleY == PIPE_SWIZZLE_Y) ||
9372         (reg->Register.SwizzleX == PIPE_SWIZZLE_Z &&
9373          reg->Register.SwizzleY == PIPE_SWIZZLE_W)) &&
9374        ((reg->Register.SwizzleZ == PIPE_SWIZZLE_X &&
9375          reg->Register.SwizzleW == PIPE_SWIZZLE_Y) ||
9376         (reg->Register.SwizzleZ == PIPE_SWIZZLE_Z &&
9377          reg->Register.SwizzleW == PIPE_SWIZZLE_W))) {
9378       src = *reg;
9379    } else {
9380       /* move the src to a temporary to fix the swizzle */
9381       unsigned tmp = get_temp_index(emit);
9382       struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9383       struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9384       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp_dst, reg);
9385       src = tmp_src;
9386 
9387       /* The temporary index will be released in the caller */
9388    }
9389    return src;
9390 }
9391 
9392 /**
9393  * Check that the writemask for a double-precision instruction is valid.
9394  */
9395 static void
check_double_dst_writemask(const struct tgsi_full_instruction * inst)9396 check_double_dst_writemask(const struct tgsi_full_instruction *inst)
9397 {
9398    ASSERTED unsigned writemask = inst->Dst[0].Register.WriteMask;
9399 
9400    switch (inst->Instruction.Opcode) {
9401    case TGSI_OPCODE_DABS:
9402    case TGSI_OPCODE_DADD:
9403    case TGSI_OPCODE_DFRAC:
9404    case TGSI_OPCODE_DNEG:
9405    case TGSI_OPCODE_DMAD:
9406    case TGSI_OPCODE_DMAX:
9407    case TGSI_OPCODE_DMIN:
9408    case TGSI_OPCODE_DMUL:
9409    case TGSI_OPCODE_DRCP:
9410    case TGSI_OPCODE_DSQRT:
9411    case TGSI_OPCODE_F2D:
9412    case TGSI_OPCODE_DFMA:
9413       assert(writemask == TGSI_WRITEMASK_XYZW ||
9414              writemask == TGSI_WRITEMASK_XY ||
9415              writemask == TGSI_WRITEMASK_ZW);
9416       break;
9417    case TGSI_OPCODE_DSEQ:
9418    case TGSI_OPCODE_DSGE:
9419    case TGSI_OPCODE_DSNE:
9420    case TGSI_OPCODE_DSLT:
9421    case TGSI_OPCODE_D2I:
9422    case TGSI_OPCODE_D2U:
9423       /* Write to 1 or 2 components only */
9424       assert(util_bitcount(writemask) <= 2);
9425       break;
9426    default:
9427       /* XXX this list may be incomplete */
9428       ;
9429    }
9430 }
9431 
9432 
9433 /**
9434  * Double-precision absolute value.
9435  */
9436 static bool
emit_dabs(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9437 emit_dabs(struct svga_shader_emitter_v10 *emit,
9438           const struct tgsi_full_instruction *inst)
9439 {
9440    assert(emit->version >= 50);
9441 
9442    struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
9443    check_double_dst_writemask(inst);
9444 
9445    struct tgsi_full_src_register abs_src = absolute_src(&src);
9446 
9447    /* DMOV dst, |src| */
9448    emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &abs_src);
9449 
9450    free_temp_indexes(emit);
9451    return true;
9452 }
9453 
9454 
9455 /**
9456  * Double-precision negation
9457  */
9458 static bool
emit_dneg(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9459 emit_dneg(struct svga_shader_emitter_v10 *emit,
9460           const struct tgsi_full_instruction *inst)
9461 {
9462    assert(emit->version >= 50);
9463    struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
9464    check_double_dst_writemask(inst);
9465 
9466    struct tgsi_full_src_register neg_src = negate_src(&src);
9467 
9468    /* DMOV dst, -src */
9469    emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &neg_src);
9470 
9471    free_temp_indexes(emit);
9472    return true;
9473 }
9474 
9475 
9476 /**
9477  * SM5 has no DMAD opcode.  Implement negation with DMUL/DADD.
9478  */
9479 static bool
emit_dmad(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9480 emit_dmad(struct svga_shader_emitter_v10 *emit,
9481           const struct tgsi_full_instruction *inst)
9482 {
9483    assert(emit->version >= 50);
9484    struct tgsi_full_src_register src0 = check_double_src(emit, &inst->Src[0]);
9485    struct tgsi_full_src_register src1 = check_double_src(emit, &inst->Src[1]);
9486    struct tgsi_full_src_register src2 = check_double_src(emit, &inst->Src[2]);
9487    check_double_dst_writemask(inst);
9488 
9489    unsigned tmp = get_temp_index(emit);
9490    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9491    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9492 
9493    /* DMUL tmp, src[0], src[1] */
9494    emit_instruction_opn(emit, VGPU10_OPCODE_DMUL,
9495                         &tmp_dst, &src0, &src1, NULL,
9496                         false, inst->Instruction.Precise);
9497 
9498    /* DADD dst, tmp, src[2] */
9499    emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
9500                         &inst->Dst[0], &tmp_src, &src2, NULL,
9501                         inst->Instruction.Saturate, inst->Instruction.Precise);
9502    free_temp_indexes(emit);
9503 
9504    return true;
9505 }
9506 
9507 
9508 /**
9509  * Double precision reciprocal square root
9510  */
9511 static bool
emit_drsq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)9512 emit_drsq(struct svga_shader_emitter_v10 *emit,
9513           const struct tgsi_full_dst_register *dst,
9514           const struct tgsi_full_src_register *src)
9515 {
9516    assert(emit->version >= 50);
9517 
9518    VGPU10OpcodeToken0 token0;
9519    struct tgsi_full_src_register dsrc = check_double_src(emit, src);
9520 
9521    begin_emit_instruction(emit);
9522 
9523    token0.value = 0;
9524    token0.opcodeType = VGPU10_OPCODE_VMWARE;
9525    token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DRSQ;
9526    emit_dword(emit, token0.value);
9527    emit_dst_register(emit, dst);
9528    emit_src_register(emit, &dsrc);
9529    end_emit_instruction(emit);
9530 
9531    free_temp_indexes(emit);
9532 
9533    return true;
9534 }
9535 
9536 
9537 /**
9538  * There is no SM5 opcode for double precision square root.
9539  * It will be implemented with DRSQ.
9540  * dst = src * DRSQ(src)
9541  */
9542 static bool
emit_dsqrt(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9543 emit_dsqrt(struct svga_shader_emitter_v10 *emit,
9544           const struct tgsi_full_instruction *inst)
9545 {
9546    assert(emit->version >= 50);
9547 
9548    struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
9549 
9550    /* temporary register to hold the source */
9551    unsigned tmp = get_temp_index(emit);
9552    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9553    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9554 
9555    /* temporary register to hold the DEQ result */
9556    unsigned tmp_cond = get_temp_index(emit);
9557    struct tgsi_full_dst_register tmp_cond_dst = make_dst_temp_reg(tmp_cond);
9558    struct tgsi_full_dst_register tmp_cond_dst_xy =
9559       writemask_dst(&tmp_cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
9560    struct tgsi_full_src_register tmp_cond_src = make_src_temp_reg(tmp_cond);
9561    struct tgsi_full_src_register tmp_cond_src_xy =
9562          swizzle_src(&tmp_cond_src,
9563                      PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
9564                      PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y);
9565 
9566    /* The reciprocal square root of zero yields INF.
9567     * So if the source is 0, we replace it with 1 in the tmp register.
9568     * The later multiplication of zero in the original source will yield 0
9569     * in the result.
9570     */
9571 
9572    /* tmp1 = (src == 0) ? 1 : src;
9573     *   EQ tmp1, 0, src
9574     *   MOVC tmp, tmp1, 1.0, src
9575     */
9576    struct tgsi_full_src_register zero =
9577                make_immediate_reg_double(emit, 0);
9578 
9579    struct tgsi_full_src_register one =
9580                make_immediate_reg_double(emit, 1.0);
9581 
9582    emit_instruction_op2(emit, VGPU10_OPCODE_DEQ, &tmp_cond_dst_xy,
9583                         &zero, &src);
9584    emit_instruction_op3(emit, VGPU10_OPCODE_DMOVC, &tmp_dst,
9585                         &tmp_cond_src_xy, &one, &src);
9586 
9587    struct tgsi_full_dst_register tmp_rsq_dst = make_dst_temp_reg(tmp);
9588    struct tgsi_full_src_register tmp_rsq_src = make_src_temp_reg(tmp);
9589 
9590    /* DRSQ tmp_rsq, tmp */
9591    emit_drsq(emit, &tmp_rsq_dst, &tmp_src);
9592 
9593    /* DMUL dst, tmp_rsq, src[0] */
9594    emit_instruction_op2(emit, VGPU10_OPCODE_DMUL, &inst->Dst[0],
9595                         &tmp_rsq_src, &src);
9596 
9597    free_temp_indexes(emit);
9598 
9599    return true;
9600 }
9601 
9602 
9603 /**
9604  * glsl-nir path does not lower DTRUNC, so we need to
9605  * add the translation here.
9606  *
9607  * frac = DFRAC(src)
9608  * tmp = src - frac
9609  * dst = src >= 0 ? tmp : (tmp + (frac==0 ? 0 : 1))
9610  */
9611 static bool
emit_dtrunc(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9612 emit_dtrunc(struct svga_shader_emitter_v10 *emit,
9613             const struct tgsi_full_instruction *inst)
9614 {
9615    assert(emit->version >= 50);
9616 
9617    struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
9618 
9619    /* frac = DFRAC(src) */
9620    unsigned frac_index = get_temp_index(emit);
9621    struct tgsi_full_dst_register frac_dst = make_dst_temp_reg(frac_index);
9622    struct tgsi_full_src_register frac_src = make_src_temp_reg(frac_index);
9623 
9624    VGPU10OpcodeToken0 token0;
9625    begin_emit_instruction(emit);
9626    token0.value = 0;
9627    token0.opcodeType = VGPU10_OPCODE_VMWARE;
9628    token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DFRC;
9629    emit_dword(emit, token0.value);
9630    emit_dst_register(emit, &frac_dst);
9631    emit_src_register(emit, &src);
9632    end_emit_instruction(emit);
9633 
9634    /* tmp = src - frac */
9635    unsigned tmp_index = get_temp_index(emit);
9636    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp_index);
9637    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index);
9638    struct tgsi_full_src_register negate_frac_src = negate_src(&frac_src);
9639    emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
9640                         &tmp_dst, &src, &negate_frac_src, NULL,
9641                         inst->Instruction.Saturate, inst->Instruction.Precise);
9642 
9643    /* cond = frac==0 */
9644    unsigned cond_index = get_temp_index(emit);
9645    struct tgsi_full_dst_register cond_dst = make_dst_temp_reg(cond_index);
9646    struct tgsi_full_src_register cond_src = make_src_temp_reg(cond_index);
9647    struct tgsi_full_src_register zero =
9648                make_immediate_reg_double(emit, 0);
9649 
9650    /* Only use one or two components for double opcode */
9651    cond_dst = writemask_dst(&cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
9652 
9653    emit_instruction_opn(emit, VGPU10_OPCODE_DEQ,
9654                         &cond_dst, &frac_src, &zero, NULL,
9655                         inst->Instruction.Saturate, inst->Instruction.Precise);
9656 
9657    /* tmp2 = cond ? 0 : 1 */
9658    unsigned tmp2_index = get_temp_index(emit);
9659    struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2_index);
9660    struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2_index);
9661    struct tgsi_full_src_register cond_src_xy =
9662       swizzle_src(&cond_src, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
9663 		             PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y);
9664    struct tgsi_full_src_register one =
9665                make_immediate_reg_double(emit, 1.0);
9666 
9667    emit_instruction_opn(emit, VGPU10_OPCODE_DMOVC,
9668                         &tmp2_dst, &cond_src_xy, &zero, &one,
9669                         inst->Instruction.Saturate, inst->Instruction.Precise);
9670 
9671    /* tmp2 = tmp + tmp2 */
9672    emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
9673                         &tmp2_dst, &tmp_src, &tmp2_src, NULL,
9674                         inst->Instruction.Saturate, inst->Instruction.Precise);
9675 
9676    /* cond = src>=0 */
9677    emit_instruction_opn(emit, VGPU10_OPCODE_DGE,
9678                         &cond_dst, &src, &zero, NULL,
9679                         inst->Instruction.Saturate, inst->Instruction.Precise);
9680 
9681    /* dst = cond ? tmp : tmp2 */
9682    emit_instruction_opn(emit, VGPU10_OPCODE_DMOVC,
9683                         &inst->Dst[0], &cond_src_xy, &tmp_src, &tmp2_src,
9684                         inst->Instruction.Saturate, inst->Instruction.Precise);
9685 
9686    free_temp_indexes(emit);
9687    return true;
9688 }
9689 
9690 
9691 static bool
emit_interp_offset(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9692 emit_interp_offset(struct svga_shader_emitter_v10 *emit,
9693                    const struct tgsi_full_instruction *inst)
9694 {
9695    assert(emit->version >= 50);
9696 
9697    /* The src1.xy offset is a float with values in the range [-0.5, 0.5]
9698     * where (0,0) is the center of the pixel.  We need to translate that
9699     * into an integer offset on a 16x16 grid in the range [-8/16, 7/16].
9700     * Also need to flip the Y axis (I think).
9701     */
9702    unsigned tmp = get_temp_index(emit);
9703    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9704    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9705    struct tgsi_full_dst_register tmp_dst_xy =
9706       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
9707    struct tgsi_full_src_register const16 =
9708       make_immediate_reg_float4(emit, 16.0f, -16.0, 0, 0);
9709 
9710    /* MUL tmp.xy, src1, {16, -16, 0, 0} */
9711    emit_instruction_op2(emit, VGPU10_OPCODE_MUL,
9712                         &tmp_dst_xy, &inst->Src[1], &const16);
9713 
9714    /* FTOI tmp.xy, tmp */
9715    emit_instruction_op1(emit, VGPU10_OPCODE_FTOI, &tmp_dst_xy, &tmp_src);
9716 
9717    /* EVAL_SNAPPED dst, src0, tmp */
9718    emit_instruction_op2(emit, VGPU10_OPCODE_EVAL_SNAPPED,
9719                         &inst->Dst[0], &inst->Src[0], &tmp_src);
9720 
9721    free_temp_indexes(emit);
9722 
9723    return true;
9724 }
9725 
9726 
9727 /**
9728  * Emit a simple instruction (like ADD, MUL, MIN, etc).
9729  */
9730 static bool
emit_simple(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9731 emit_simple(struct svga_shader_emitter_v10 *emit,
9732             const struct tgsi_full_instruction *inst)
9733 {
9734    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9735    const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
9736    const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
9737    const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
9738    unsigned i;
9739 
9740    struct tgsi_full_src_register src[3];
9741 
9742    if (inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) {
9743       emit->current_loop_depth++;
9744    }
9745    else if (inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP) {
9746       emit->current_loop_depth--;
9747    }
9748 
9749    for (i = 0; i < op->num_src; i++) {
9750       if (dbl_src)
9751          src[i] = check_double_src(emit, &inst->Src[i]);
9752       else
9753          src[i] = inst->Src[i];
9754    }
9755 
9756    begin_emit_instruction(emit);
9757    emit_opcode_precise(emit, translate_opcode(inst->Instruction.Opcode),
9758                        inst->Instruction.Saturate,
9759                        inst->Instruction.Precise);
9760    for (i = 0; i < op->num_dst; i++) {
9761       if (dbl_dst) {
9762          check_double_dst_writemask(inst);
9763       }
9764       emit_dst_register(emit, &inst->Dst[i]);
9765    }
9766    for (i = 0; i < op->num_src; i++) {
9767       emit_src_register(emit, &src[i]);
9768    }
9769    end_emit_instruction(emit);
9770 
9771    free_temp_indexes(emit);
9772    return true;
9773 }
9774 
9775 
9776 /**
9777  * Emit MSB instruction (like IMSB, UMSB).
9778  *
9779  * GLSL returns the index starting from the LSB;
9780  * whereas in SM5, firstbit_hi/shi returns the index starting from the MSB.
9781  * To get correct location as per glsl from SM5 device, we should
9782  * return (31 - index) if returned index is not -1.
9783  */
9784 static bool
emit_msb(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9785 emit_msb(struct svga_shader_emitter_v10 *emit,
9786          const struct tgsi_full_instruction *inst)
9787 {
9788    const struct tgsi_full_dst_register *index_dst = &inst->Dst[0];
9789 
9790    assert(index_dst->Register.File != TGSI_FILE_OUTPUT);
9791 
9792    struct tgsi_full_src_register index_src =
9793       make_src_reg(index_dst->Register.File, index_dst->Register.Index);
9794    struct tgsi_full_src_register imm31 =
9795       make_immediate_reg_int(emit, 31);
9796    imm31 = scalar_src(&imm31, TGSI_SWIZZLE_X);
9797    struct tgsi_full_src_register neg_one =
9798       make_immediate_reg_int(emit, -1);
9799    neg_one = scalar_src(&neg_one, TGSI_SWIZZLE_X);
9800    unsigned tmp = get_temp_index(emit);
9801    const struct tgsi_full_dst_register tmp_dst =
9802       make_dst_temp_reg(tmp);
9803    const struct tgsi_full_dst_register tmp_dst_x =
9804       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
9805    const struct tgsi_full_src_register tmp_src_x =
9806        make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp, TGSI_SWIZZLE_X);
9807    int writemask = TGSI_WRITEMASK_X;
9808    int src_swizzle = TGSI_SWIZZLE_X;
9809    int dst_writemask = index_dst->Register.WriteMask;
9810 
9811    emit_simple(emit, inst);
9812 
9813    /* index conversion from SM5 to GLSL */
9814    while (writemask & dst_writemask) {
9815       struct tgsi_full_src_register index_src_comp =
9816          scalar_src(&index_src, src_swizzle);
9817       struct tgsi_full_dst_register index_dst_comp =
9818          writemask_dst(index_dst, writemask);
9819 
9820       /* check if index_src_comp != -1 */
9821       emit_instruction_op2(emit, VGPU10_OPCODE_INE,
9822                            &tmp_dst_x, &index_src_comp, &neg_one);
9823 
9824       /* if */
9825       emit_if(emit, &tmp_src_x);
9826 
9827       index_src_comp = negate_src(&index_src_comp);
9828       /* SUB DST, IMM{31}, DST */
9829       emit_instruction_op2(emit, VGPU10_OPCODE_IADD,
9830                            &index_dst_comp, &imm31, &index_src_comp);
9831 
9832       /* endif */
9833       emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9834 
9835       writemask = writemask << 1;
9836       src_swizzle = src_swizzle + 1;
9837    }
9838    free_temp_indexes(emit);
9839    return true;
9840 }
9841 
9842 
9843 /**
9844  * Emit a BFE instruction (like UBFE, IBFE).
9845  * tgsi representation:
9846  * U/IBFE dst, value, offset, width
9847  * SM5 representation:
9848  * U/IBFE dst, width, offset, value
9849  * Note: SM5 has width & offset range (0-31);
9850  *      whereas GLSL has width & offset range (0-32)
9851  */
9852 static bool
emit_bfe(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9853 emit_bfe(struct svga_shader_emitter_v10 *emit,
9854          const struct tgsi_full_instruction *inst)
9855 {
9856    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9857    struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32);
9858    imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X);
9859    struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
9860    zero = scalar_src(&zero, TGSI_SWIZZLE_X);
9861 
9862    unsigned tmp1 = get_temp_index(emit);
9863    const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1);
9864    const struct tgsi_full_dst_register cond1_dst_x =
9865       writemask_dst(&cond1_dst, TGSI_WRITEMASK_X);
9866    const struct tgsi_full_src_register cond1_src_x =
9867       make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X);
9868 
9869    unsigned tmp2 = get_temp_index(emit);
9870    const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2);
9871    const struct tgsi_full_dst_register cond2_dst_x =
9872       writemask_dst(&cond2_dst, TGSI_WRITEMASK_X);
9873    const struct tgsi_full_src_register cond2_src_x =
9874       make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X);
9875 
9876    /**
9877     * In SM5, when width = 32  and offset = 0, it returns 0.
9878     * On the other hand GLSL, expects value to be copied as it is, to dst.
9879     */
9880 
9881    /* cond1 = width ! = 32 */
9882    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9883                         &cond1_dst_x, &inst->Src[2], &imm32);
9884 
9885    /* cond2 = offset ! = 0 */
9886    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9887                         &cond2_dst_x, &inst->Src[1], &zero);
9888 
9889    /* cond 2 = cond1 & cond 2 */
9890    emit_instruction_op2(emit, VGPU10_OPCODE_AND, &cond2_dst_x,
9891                         &cond2_src_x,
9892                         &cond1_src_x);
9893    /* IF */
9894    emit_if(emit, &cond2_src_x);
9895 
9896    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
9897                         &inst->Src[0]);
9898 
9899    /* ELSE */
9900    emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
9901 
9902    /* U/IBFE dst, width, offset, value */
9903    emit_instruction_op3(emit, translate_opcode(opcode), &inst->Dst[0],
9904                         &inst->Src[2], &inst->Src[1], &inst->Src[0]);
9905 
9906    /* ENDIF */
9907    emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9908 
9909    free_temp_indexes(emit);
9910    return true;
9911 }
9912 
9913 
9914 /**
9915  * Emit BFI  instruction
9916  * tgsi representation:
9917  * BFI dst, base, insert, offset, width
9918  * SM5 representation:
9919  * BFI dst, width, offset, insert, base
9920  * Note: SM5 has width & offset range (0-31);
9921  *      whereas GLSL has width & offset range (0-32)
9922  */
9923 static bool
emit_bfi(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9924 emit_bfi(struct svga_shader_emitter_v10 *emit,
9925          const struct tgsi_full_instruction *inst)
9926 {
9927    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9928    struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32);
9929    imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X);
9930 
9931    struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
9932    zero = scalar_src(&zero, TGSI_SWIZZLE_X);
9933 
9934    unsigned tmp1 = get_temp_index(emit);
9935    const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1);
9936    const struct tgsi_full_dst_register cond1_dst_x =
9937       writemask_dst(&cond1_dst, TGSI_WRITEMASK_X);
9938    const struct tgsi_full_src_register cond1_src_x =
9939       make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X);
9940 
9941    unsigned tmp2 = get_temp_index(emit);
9942    const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2);
9943    const struct tgsi_full_dst_register cond2_dst_x =
9944       writemask_dst(&cond2_dst, TGSI_WRITEMASK_X);
9945    const struct tgsi_full_src_register cond2_src_x =
9946       make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X);
9947 
9948    /**
9949     * In SM5, when width = 32  and offset = 0, it returns 0.
9950     * On the other hand GLSL, expects insert to be copied as it is, to dst.
9951     */
9952 
9953    /* cond1 = width == 32 */
9954    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9955                         &cond1_dst_x, &inst->Src[3], &imm32);
9956 
9957    /* cond1 = offset == 0 */
9958    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9959                         &cond2_dst_x, &inst->Src[2], &zero);
9960 
9961    /* cond2 = cond1 & cond2 */
9962    emit_instruction_op2(emit, VGPU10_OPCODE_AND,
9963                         &cond2_dst_x, &cond2_src_x, &cond1_src_x);
9964 
9965    /* if */
9966    emit_if(emit, &cond2_src_x);
9967 
9968    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
9969                         &inst->Src[1]);
9970 
9971    /* else */
9972    emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
9973 
9974    /* BFI dst, width, offset, insert, base */
9975    begin_emit_instruction(emit);
9976    emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
9977    emit_dst_register(emit, &inst->Dst[0]);
9978    emit_src_register(emit, &inst->Src[3]);
9979    emit_src_register(emit, &inst->Src[2]);
9980    emit_src_register(emit, &inst->Src[1]);
9981    emit_src_register(emit, &inst->Src[0]);
9982    end_emit_instruction(emit);
9983 
9984    /* endif */
9985    emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9986 
9987    free_temp_indexes(emit);
9988    return true;
9989 }
9990 
9991 
9992 /**
9993  * We only special case the MOV instruction to try to detect constant
9994  * color writes in the fragment shader.
9995  */
9996 static bool
emit_mov(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9997 emit_mov(struct svga_shader_emitter_v10 *emit,
9998          const struct tgsi_full_instruction *inst)
9999 {
10000    const struct tgsi_full_src_register *src = &inst->Src[0];
10001    const struct tgsi_full_dst_register *dst = &inst->Dst[0];
10002 
10003    if (emit->unit == PIPE_SHADER_FRAGMENT &&
10004        dst->Register.File == TGSI_FILE_OUTPUT &&
10005        dst->Register.Index == 0 &&
10006        src->Register.File == TGSI_FILE_CONSTANT &&
10007        !src->Register.Indirect) {
10008       emit->constant_color_output = true;
10009    }
10010 
10011    return emit_simple(emit, inst);
10012 }
10013 
10014 
10015 /**
10016  * Emit a simple VGPU10 instruction which writes to multiple dest registers,
10017  * where TGSI only uses one dest register.
10018  */
10019 static bool
emit_simple_1dst(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,unsigned dst_count,unsigned dst_index)10020 emit_simple_1dst(struct svga_shader_emitter_v10 *emit,
10021                  const struct tgsi_full_instruction *inst,
10022                  unsigned dst_count,
10023                  unsigned dst_index)
10024 {
10025    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
10026    const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
10027    unsigned i;
10028 
10029    begin_emit_instruction(emit);
10030    emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
10031 
10032    for (i = 0; i < dst_count; i++) {
10033       if (i == dst_index) {
10034          emit_dst_register(emit, &inst->Dst[0]);
10035       } else {
10036          emit_null_dst_register(emit);
10037       }
10038    }
10039 
10040    for (i = 0; i < op->num_src; i++) {
10041       emit_src_register(emit, &inst->Src[i]);
10042    }
10043    end_emit_instruction(emit);
10044 
10045    return true;
10046 }
10047 
10048 
10049 /**
10050  * Emit a vmware specific VGPU10 instruction.
10051  */
10052 static bool
emit_vmware(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,VGPU10_VMWARE_OPCODE_TYPE subopcode)10053 emit_vmware(struct svga_shader_emitter_v10 *emit,
10054             const struct tgsi_full_instruction *inst,
10055             VGPU10_VMWARE_OPCODE_TYPE subopcode)
10056 {
10057    VGPU10OpcodeToken0 token0;
10058    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
10059    const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
10060    const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
10061    const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
10062    unsigned i;
10063    struct tgsi_full_src_register src[3];
10064 
10065    for (i = 0; i < op->num_src; i++) {
10066       if (dbl_src)
10067          src[i] = check_double_src(emit, &inst->Src[i]);
10068       else
10069          src[i] = inst->Src[i];
10070    }
10071 
10072    begin_emit_instruction(emit);
10073 
10074    assert((subopcode > 0 && emit->version >= 50) || subopcode == 0);
10075 
10076    token0.value = 0;
10077    token0.opcodeType = VGPU10_OPCODE_VMWARE;
10078    token0.vmwareOpcodeType = subopcode;
10079    emit_dword(emit, token0.value);
10080 
10081    if (subopcode == VGPU10_VMWARE_OPCODE_IDIV) {
10082       /* IDIV only uses the first dest register. */
10083       emit_dst_register(emit, &inst->Dst[0]);
10084       emit_null_dst_register(emit);
10085    } else {
10086       for (i = 0; i < op->num_dst; i++) {
10087          if (dbl_dst) {
10088             check_double_dst_writemask(inst);
10089          }
10090          emit_dst_register(emit, &inst->Dst[i]);
10091       }
10092    }
10093 
10094    for (i = 0; i < op->num_src; i++) {
10095       emit_src_register(emit, &src[i]);
10096    }
10097    end_emit_instruction(emit);
10098 
10099    free_temp_indexes(emit);
10100    return true;
10101 }
10102 
10103 /**
10104  * Emit a memory register
10105  */
10106 
10107 typedef enum {
10108    MEM_STORE = 0,
10109    MEM_LOAD = 1,
10110    MEM_ATOMIC_COUNTER
10111 } memory_op;
10112 
10113 static void
emit_memory_register(struct svga_shader_emitter_v10 * emit,memory_op mem_op,const struct tgsi_full_instruction * inst,unsigned regIndex,unsigned writemask)10114 emit_memory_register(struct svga_shader_emitter_v10 *emit,
10115                      memory_op mem_op,
10116                      const struct tgsi_full_instruction *inst,
10117                      unsigned regIndex, unsigned writemask)
10118 {
10119    VGPU10OperandToken0 operand0;
10120    unsigned resIndex = 0;
10121 
10122    operand0.value = 0;
10123    operand0.operandType = VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY;
10124    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
10125    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
10126 
10127    switch (mem_op) {
10128    case MEM_ATOMIC_COUNTER:
10129    {
10130       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
10131       resIndex = inst->Src[regIndex].Register.Index;
10132       break;
10133    }
10134    case MEM_STORE:
10135    {
10136       const struct tgsi_full_dst_register *reg = &inst->Dst[regIndex];
10137 
10138       operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
10139       operand0.mask = writemask;
10140       resIndex = reg->Register.Index;
10141       break;
10142    }
10143    case MEM_LOAD:
10144    {
10145       const struct tgsi_full_src_register *reg = &inst->Src[regIndex];
10146 
10147       operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
10148       operand0.swizzleX = reg->Register.SwizzleX;
10149       operand0.swizzleY = reg->Register.SwizzleY;
10150       operand0.swizzleZ = reg->Register.SwizzleZ;
10151       operand0.swizzleW = reg->Register.SwizzleW;
10152       resIndex = reg->Register.Index;
10153       break;
10154    }
10155    default:
10156       assert(!"Unexpected memory opcode");
10157       break;
10158    }
10159 
10160    emit_dword(emit, operand0.value);
10161    emit_dword(emit, resIndex);
10162 }
10163 
10164 
10165 typedef enum {
10166    UAV_STORE = 0,
10167    UAV_LOAD = 1,
10168    UAV_ATOMIC = 2,
10169    UAV_RESQ = 3,
10170 } UAV_OP;
10171 
10172 
10173 /**
10174  * Emit a uav register
10175  * \param uav_index     index of resource register
10176  * \param uav_op        UAV_STORE/ UAV_LOAD/ UAV_ATOMIC depending on opcode
10177  * \param resourceType  resource file type
10178  * \param writemask     resource writemask
10179  */
10180 
10181 static void
emit_uav_register(struct svga_shader_emitter_v10 * emit,unsigned res_index,UAV_OP uav_op,enum tgsi_file_type resourceType,unsigned writemask)10182 emit_uav_register(struct svga_shader_emitter_v10 *emit,
10183                   unsigned res_index, UAV_OP uav_op,
10184                   enum tgsi_file_type resourceType, unsigned writemask)
10185 {
10186    VGPU10OperandToken0 operand0;
10187    unsigned uav_index = INVALID_INDEX;
10188 
10189    operand0.value = 0;
10190    operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
10191    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
10192    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
10193 
10194    switch (resourceType) {
10195    case TGSI_FILE_IMAGE:
10196       uav_index = emit->key.images[res_index].uav_index;
10197       break;
10198    case TGSI_FILE_BUFFER:
10199       uav_index = emit->key.shader_buf_uav_index[res_index];
10200       break;
10201    case TGSI_FILE_HW_ATOMIC:
10202       uav_index = emit->key.atomic_buf_uav_index[res_index];
10203       break;
10204    default:
10205       assert(0);
10206    }
10207 
10208    switch (uav_op) {
10209    case UAV_ATOMIC:
10210       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
10211       break;
10212 
10213    case UAV_STORE:
10214       operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
10215       operand0.mask = writemask;
10216       break;
10217 
10218    case UAV_LOAD:
10219    case UAV_RESQ:
10220       operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
10221       operand0.swizzleX = VGPU10_COMPONENT_X;
10222       operand0.swizzleY = VGPU10_COMPONENT_Y;
10223       operand0.swizzleZ = VGPU10_COMPONENT_Z;
10224       operand0.swizzleW = VGPU10_COMPONENT_W;
10225       break;
10226 
10227    default:
10228       break;
10229    }
10230 
10231    emit_dword(emit, operand0.value);
10232    emit_dword(emit, uav_index);
10233 }
10234 
10235 
10236 /**
10237  * A helper function to emit the uav address.
10238  * For memory, buffer, and image resource, it is set to the specified address.
10239  * For HW atomic counter, the address is the sum of the address offset and the
10240  * offset into the HW atomic buffer as specified by the register index.
10241  * It is also possible to specify the counter index as an indirect address.
10242  * And in this case, the uav address will be the sum of the address offset and the
10243  * counter index specified in the indirect address.
10244  */
10245 static
10246 struct tgsi_full_src_register
emit_uav_addr_offset(struct svga_shader_emitter_v10 * emit,enum tgsi_file_type resourceType,unsigned resourceIndex,unsigned resourceIndirect,unsigned resourceIndirectIndex,const struct tgsi_full_src_register * addr_reg)10247 emit_uav_addr_offset(struct svga_shader_emitter_v10 *emit,
10248                      enum tgsi_file_type resourceType,
10249                      unsigned resourceIndex,
10250                      unsigned resourceIndirect,
10251                      unsigned resourceIndirectIndex,
10252                      const struct tgsi_full_src_register *addr_reg)
10253 {
10254    unsigned addr_tmp;
10255    struct tgsi_full_dst_register addr_dst;
10256    struct tgsi_full_src_register addr_src;
10257    struct tgsi_full_src_register two = make_immediate_reg_int(emit, 2);
10258    struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
10259 
10260    addr_tmp = get_temp_index(emit);
10261    addr_dst = make_dst_temp_reg(addr_tmp);
10262    addr_src = make_src_temp_reg(addr_tmp);
10263 
10264    /* specified address offset */
10265    if (addr_reg)
10266       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst, addr_reg);
10267    else
10268       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst, &zero);
10269 
10270    /* For HW atomic counter, we need to find the index to the
10271     * HW atomic buffer.
10272     */
10273    if (resourceType == TGSI_FILE_HW_ATOMIC) {
10274       if (resourceIndirect) {
10275 
10276          /**
10277           * uav addr offset  = counter layout offset +
10278           *                    counter indirect index address + address offset
10279           */
10280 
10281          /* counter layout offset */
10282          struct tgsi_full_src_register layout_offset;
10283          layout_offset =
10284             make_immediate_reg_int(emit, resourceIndex);
10285 
10286          /* counter layout offset + address offset */
10287          emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &addr_dst,
10288                               &addr_src, &layout_offset);
10289 
10290          /* counter indirect index address */
10291          unsigned indirect_addr =
10292             emit->address_reg_index[resourceIndirectIndex];
10293 
10294          struct tgsi_full_src_register indirect_addr_src =
10295             make_src_temp_reg(indirect_addr);
10296 
10297          indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
10298 
10299          /* counter layout offset + address offset + counter indirect address */
10300          emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &addr_dst,
10301                               &addr_src, &indirect_addr_src);
10302 
10303       } else {
10304          struct tgsi_full_src_register index_src;
10305 
10306          index_src = make_immediate_reg_int(emit, resourceIndex);
10307 
10308          /* uav addr offset  = counter index address + address offset */
10309          emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &addr_dst,
10310                               &addr_src, &index_src);
10311       }
10312 
10313       /* HW atomic buffer is declared as raw buffer, so the buffer address is
10314        * the byte offset, so we need to multiple the counter addr offset by 4.
10315        */
10316       emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &addr_dst,
10317                            &addr_src, &two);
10318    }
10319    else if (resourceType == TGSI_FILE_IMAGE) {
10320       if ((emit->key.images[resourceIndex].resource_target == PIPE_TEXTURE_3D)
10321              && emit->key.images[resourceIndex].is_single_layer) {
10322 
10323          struct tgsi_full_dst_register addr_dst_z =
10324             writemask_dst(&addr_dst, TGSI_WRITEMASK_Z);
10325 
10326          /* For non-layered 3D texture image view, we have to make sure the z
10327           * component of the address offset is set to 0.
10328           */
10329          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst_z,
10330                               &zero);
10331       }
10332    }
10333 
10334    return addr_src;
10335 }
10336 
10337 
10338 
10339 /**
10340  * A helper function to expand indirect indexing to uav resource
10341  * by looping through the resource array, compare the indirect index and
10342  * emit the instruction for each resource in the array.
10343  */
10344 static void
loop_instruction(unsigned index,unsigned count,struct tgsi_full_src_register * addr_index,void (* fb)(struct svga_shader_emitter_v10 *,const struct tgsi_full_instruction *,unsigned),struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10345 loop_instruction(unsigned index, unsigned count,
10346                  struct tgsi_full_src_register *addr_index,
10347                  void (*fb)(struct svga_shader_emitter_v10 *,
10348                             const struct tgsi_full_instruction *, unsigned),
10349                  struct svga_shader_emitter_v10 *emit,
10350                  const struct tgsi_full_instruction *inst)
10351 {
10352    if (count == 0)
10353       return;
10354 
10355    if (index > 0) {
10356       /* ELSE */
10357       emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
10358    }
10359 
10360    struct tgsi_full_src_register index_src =
10361                                     make_immediate_reg_int(emit, index);
10362 
10363    unsigned tmp_index = get_temp_index(emit);
10364    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index);
10365    struct tgsi_full_src_register tmp_src_x =
10366                 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
10367    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp_index);
10368 
10369    /* IEQ tmp, addr_tmp_index, index */
10370    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ, &tmp_dst,
10371                         addr_index, &index_src);
10372 
10373    /* IF tmp */
10374    emit_if(emit, &tmp_src_x);
10375 
10376    free_temp_indexes(emit);
10377 
10378    (*fb)(emit, inst, index);
10379 
10380    loop_instruction(index+1, count-1, addr_index, fb, emit, inst);
10381 
10382    /* ENDIF */
10383    emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
10384 }
10385 
10386 
10387 /**
10388  * A helper function to emit the load instruction.
10389  */
10390 static void
emit_load_instruction(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,unsigned resourceIndex)10391 emit_load_instruction(struct svga_shader_emitter_v10 *emit,
10392                       const struct tgsi_full_instruction *inst,
10393                       unsigned resourceIndex)
10394 {
10395    VGPU10OpcodeToken0 token0;
10396    struct tgsi_full_src_register addr_src;
10397    enum tgsi_file_type resourceType = inst->Src[0].Register.File;
10398 
10399    /* Resolve the resource address for this resource first */
10400    addr_src = emit_uav_addr_offset(emit, resourceType, resourceIndex,
10401                                    inst->Src[0].Register.Indirect,
10402                                    inst->Src[0].Indirect.Index,
10403                                    &inst->Src[1]);
10404 
10405    /* LOAD resource, address, src */
10406    begin_emit_instruction(emit);
10407 
10408    token0.value = 0;
10409 
10410    if (resourceType == TGSI_FILE_MEMORY ||
10411        resourceType == TGSI_FILE_BUFFER ||
10412        resourceType == TGSI_FILE_HW_ATOMIC) {
10413       token0.opcodeType = VGPU10_OPCODE_LD_RAW;
10414       addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X);
10415    }
10416    else {
10417       token0.opcodeType = VGPU10_OPCODE_LD_UAV_TYPED;
10418    }
10419 
10420    token0.saturate = inst->Instruction.Saturate,
10421    emit_dword(emit, token0.value);
10422 
10423    emit_dst_register(emit, &inst->Dst[0]);
10424    emit_src_register(emit, &addr_src);
10425 
10426    if (resourceType == TGSI_FILE_MEMORY) {
10427       emit_memory_register(emit, MEM_LOAD, inst, 0, 0);
10428    } else if (resourceType == TGSI_FILE_HW_ATOMIC) {
10429       emit_uav_register(emit, inst->Src[0].Dimension.Index,
10430                         UAV_LOAD, inst->Src[0].Register.File, 0);
10431    } else if (resourceType == TGSI_FILE_BUFFER) {
10432       if (emit->raw_shaderbufs & (1 << resourceIndex))
10433          emit_resource_register(emit, resourceIndex +
10434                                       emit->raw_shaderbuf_srv_start_index);
10435       else
10436          emit_uav_register(emit, resourceIndex,
10437                            UAV_LOAD, inst->Src[0].Register.File, 0);
10438    } else {
10439       emit_uav_register(emit, resourceIndex,
10440                         UAV_LOAD, inst->Src[0].Register.File, 0);
10441    }
10442 
10443    end_emit_instruction(emit);
10444 
10445    free_temp_indexes(emit);
10446 }
10447 
10448 
10449 /**
10450  * Emit uav / memory load instruction
10451  */
10452 static bool
emit_load(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10453 emit_load(struct svga_shader_emitter_v10 *emit,
10454           const struct tgsi_full_instruction *inst)
10455 {
10456    enum tgsi_file_type resourceType = inst->Src[0].Register.File;
10457    unsigned resourceIndex = inst->Src[0].Register.Index;
10458 
10459    /* If the resource register has indirect index, we will need
10460     * to expand it since SM5 device does not support indirect indexing
10461     * for uav.
10462     */
10463    if (inst->Src[0].Register.Indirect &&
10464        (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) {
10465 
10466       unsigned indirect_index = inst->Src[0].Indirect.Index;
10467       unsigned num_resources =
10468          resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs :
10469                                             emit->num_images;
10470 
10471       /* indirect index tmp register */
10472       unsigned indirect_addr = emit->address_reg_index[indirect_index];
10473       struct tgsi_full_src_register indirect_addr_src =
10474          make_src_temp_reg(indirect_addr);
10475       indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
10476 
10477       /* Add offset to the indirect index */
10478       if (inst->Src[0].Register.Index != 0) {
10479          struct tgsi_full_src_register offset =
10480             make_immediate_reg_int(emit, inst->Src[0].Register.Index);
10481          struct tgsi_full_dst_register indirect_addr_dst =
10482             make_dst_temp_reg(indirect_addr);
10483          emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &indirect_addr_dst,
10484                               &indirect_addr_src, &offset);
10485       }
10486 
10487       /* Loop through the resource array to find which resource to use.
10488        */
10489       loop_instruction(0, num_resources, &indirect_addr_src,
10490                        emit_load_instruction, emit, inst);
10491    }
10492    else {
10493       emit_load_instruction(emit, inst, resourceIndex);
10494    }
10495 
10496    free_temp_indexes(emit);
10497 
10498    return true;
10499 }
10500 
10501 
10502 /**
10503  * A helper function to emit a store instruction.
10504  */
10505 static void
emit_store_instruction(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,unsigned resourceIndex)10506 emit_store_instruction(struct svga_shader_emitter_v10 *emit,
10507                        const struct tgsi_full_instruction *inst,
10508                        unsigned resourceIndex)
10509 {
10510    VGPU10OpcodeToken0 token0;
10511    enum tgsi_file_type resourceType = inst->Dst[0].Register.File;
10512    unsigned writemask = inst->Dst[0].Register.WriteMask;
10513    struct tgsi_full_src_register addr_src;
10514 
10515    unsigned tmp_index = get_temp_index(emit);
10516    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index);
10517    struct tgsi_full_dst_register tmp_dst_xyzw = make_dst_temp_reg(tmp_index);
10518    struct tgsi_full_dst_register tmp_dst;
10519 
10520    struct tgsi_full_src_register src = inst->Src[1];
10521    struct tgsi_full_src_register four = make_immediate_reg_int(emit, 4);
10522 
10523    bool needLoad = false;
10524    bool needPerComponentStore = false;
10525    unsigned swizzles = 0;
10526 
10527    /* Resolve the resource address for this resource first */
10528    addr_src = emit_uav_addr_offset(emit, resourceType,
10529                                    inst->Dst[0].Register.Index,
10530                                    inst->Dst[0].Register.Indirect,
10531                                    inst->Dst[0].Indirect.Index,
10532                                    &inst->Src[0]);
10533 
10534    /* First check the writemask to see if it can be supported
10535     * by the store instruction.
10536     * store_raw only allows .x, .xy, .xyz, .xyzw. For the typeless memory,
10537     * we can adjust the address offset, and do a per-component store.
10538     * store_uav_typed only allows .xyzw. In this case, we need to
10539     * do a load first, update the temporary and then issue the
10540     * store. This does have a small risk that if different threads
10541     * update different components of the same address, data might not be
10542     * in sync.
10543     */
10544    if (resourceType == TGSI_FILE_IMAGE) {
10545       needLoad = (writemask == TGSI_WRITEMASK_XYZW) ? false : true;
10546    }
10547    else if (resourceType == TGSI_FILE_BUFFER ||
10548             resourceType == TGSI_FILE_MEMORY) {
10549       if (!(writemask == TGSI_WRITEMASK_X || writemask == TGSI_WRITEMASK_XY ||
10550             writemask == TGSI_WRITEMASK_XYZ ||
10551             writemask == TGSI_WRITEMASK_XYZW)) {
10552          needPerComponentStore = true;
10553       }
10554    }
10555 
10556    if (needLoad) {
10557       assert(resourceType == TGSI_FILE_IMAGE);
10558 
10559       /* LOAD resource, address, src */
10560       begin_emit_instruction(emit);
10561 
10562       token0.value = 0;
10563       token0.opcodeType = VGPU10_OPCODE_LD_UAV_TYPED;
10564       token0.saturate = inst->Instruction.Saturate,
10565       emit_dword(emit, token0.value);
10566 
10567       emit_dst_register(emit, &tmp_dst_xyzw);
10568       emit_src_register(emit, &addr_src);
10569       emit_uav_register(emit, resourceIndex, UAV_LOAD, resourceType, 0);
10570 
10571       end_emit_instruction(emit);
10572 
10573       /* MOV tmp(writemask) src */
10574       tmp_dst = writemask_dst(&tmp_dst_xyzw, writemask);
10575       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp_dst, &inst->Src[1]);
10576 
10577       /* Now set the writemask to xyzw for the store_uav_typed instruction */
10578       writemask = TGSI_WRITEMASK_XYZW;
10579    }
10580    else if (needPerComponentStore) {
10581       /* Save the src swizzles */
10582       swizzles = src.Register.SwizzleX |
10583                  src.Register.SwizzleY << 2 |
10584                  src.Register.SwizzleZ << 4 |
10585                  src.Register.SwizzleW << 6;
10586    }
10587 
10588    bool storeDone = false;
10589    unsigned perComponentWritemask = writemask;
10590    unsigned shift = 0;
10591    struct tgsi_full_src_register shift_src;
10592 
10593    while (!storeDone) {
10594 
10595       if (needPerComponentStore) {
10596          assert(perComponentWritemask);
10597          while (!(perComponentWritemask & TGSI_WRITEMASK_X)) {
10598             shift++;
10599             perComponentWritemask >>= 1;
10600          }
10601 
10602          /* First adjust the addr_src to the next component */
10603          if (shift != 0) {
10604             struct tgsi_full_dst_register addr_dst =
10605                make_dst_temp_reg(addr_src.Register.Index);
10606             shift_src = make_immediate_reg_int(emit, shift);
10607             emit_instruction_op3(emit, VGPU10_OPCODE_UMAD, &addr_dst, &four,
10608                                  &shift_src, &addr_src);
10609 
10610             /* Adjust the src swizzle as well */
10611             swizzles >>= (shift * 2);
10612          }
10613 
10614          /* Now the address offset is set to the next component,
10615           * we can set the writemask to .x and make sure to set
10616           * the src swizzle as well.
10617           */
10618          src.Register.SwizzleX = swizzles & 0x3;
10619          writemask = TGSI_WRITEMASK_X;
10620 
10621          /* Shift for the next component check */
10622          perComponentWritemask >>= 1;
10623          shift = 1;
10624       }
10625 
10626       /* STORE resource, address, src */
10627       begin_emit_instruction(emit);
10628 
10629       token0.value = 0;
10630       token0.saturate = inst->Instruction.Saturate;
10631 
10632       if (resourceType == TGSI_FILE_MEMORY) {
10633          token0.opcodeType = VGPU10_OPCODE_STORE_RAW;
10634          addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X);
10635          emit_dword(emit, token0.value);
10636          emit_memory_register(emit, MEM_STORE, inst, 0, writemask);
10637       }
10638       else if (resourceType == TGSI_FILE_BUFFER ||
10639                resourceType == TGSI_FILE_HW_ATOMIC) {
10640          token0.opcodeType = VGPU10_OPCODE_STORE_RAW;
10641          addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X);
10642          emit_dword(emit, token0.value);
10643          emit_uav_register(emit, resourceIndex, UAV_STORE,
10644                            resourceType, writemask);
10645       }
10646       else {
10647          token0.opcodeType = VGPU10_OPCODE_STORE_UAV_TYPED;
10648          emit_dword(emit, token0.value);
10649          emit_uav_register(emit, resourceIndex, UAV_STORE,
10650                            resourceType, writemask);
10651       }
10652 
10653       emit_src_register(emit, &addr_src);
10654 
10655       if (needLoad)
10656          emit_src_register(emit, &tmp_src);
10657       else
10658          emit_src_register(emit, &src);
10659 
10660       end_emit_instruction(emit);
10661 
10662       if (!needPerComponentStore || !perComponentWritemask)
10663          storeDone = true;
10664    }
10665 
10666    free_temp_indexes(emit);
10667 }
10668 
10669 
10670 /**
10671  * Emit uav / memory store instruction
10672  */
10673 static bool
emit_store(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10674 emit_store(struct svga_shader_emitter_v10 *emit,
10675            const struct tgsi_full_instruction *inst)
10676 {
10677    enum tgsi_file_type resourceType = inst->Dst[0].Register.File;
10678    unsigned resourceIndex = inst->Dst[0].Register.Index;
10679 
10680    /* If the resource register has indirect index, we will need
10681     * to expand it since SM5 device does not support indirect indexing
10682     * for uav.
10683     */
10684    if (inst->Dst[0].Register.Indirect &&
10685        (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) {
10686 
10687       unsigned indirect_index = inst->Dst[0].Indirect.Index;
10688       unsigned num_resources =
10689          resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs :
10690                                             emit->num_images;
10691 
10692       /* Indirect index tmp register */
10693       unsigned indirect_addr = emit->address_reg_index[indirect_index];
10694       struct tgsi_full_src_register indirect_addr_src =
10695          make_src_temp_reg(indirect_addr);
10696       indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
10697 
10698       /* Add offset to the indirect index */
10699       if (inst->Dst[0].Register.Index != 0) {
10700          struct tgsi_full_src_register offset =
10701             make_immediate_reg_int(emit, inst->Dst[0].Register.Index);
10702          struct tgsi_full_dst_register indirect_addr_dst =
10703             make_dst_temp_reg(indirect_addr);
10704          emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &indirect_addr_dst,
10705                               &indirect_addr_src, &offset);
10706       }
10707 
10708       /* Loop through the resource array to find which resource to use.
10709        */
10710       loop_instruction(0, num_resources, &indirect_addr_src,
10711                        emit_store_instruction, emit, inst);
10712    }
10713    else {
10714       emit_store_instruction(emit, inst, resourceIndex);
10715    }
10716 
10717    free_temp_indexes(emit);
10718 
10719    return true;
10720 }
10721 
10722 
10723 /**
10724  * A helper function to emit an atomic instruction.
10725  */
10726 
10727 static void
emit_atomic_instruction(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,unsigned resourceIndex)10728 emit_atomic_instruction(struct svga_shader_emitter_v10 *emit,
10729                         const struct tgsi_full_instruction *inst,
10730                         unsigned resourceIndex)
10731 {
10732    VGPU10OpcodeToken0 token0;
10733    enum tgsi_file_type resourceType = inst->Src[0].Register.File;
10734    struct tgsi_full_src_register addr_src;
10735    VGPU10_OPCODE_TYPE opcode = emit->cur_atomic_opcode;
10736    const struct tgsi_full_src_register *offset;
10737 
10738    /* ntt does not specify offset for HWATOMIC. So just set offset to NULL. */
10739    offset = resourceType == TGSI_FILE_HW_ATOMIC ? NULL : &inst->Src[1];
10740 
10741    /* Resolve the resource address */
10742    addr_src = emit_uav_addr_offset(emit, resourceType,
10743                                    inst->Src[0].Register.Index,
10744                                    inst->Src[0].Register.Indirect,
10745                                    inst->Src[0].Indirect.Index,
10746                                    offset);
10747 
10748    /* Emit the atomic operation */
10749    begin_emit_instruction(emit);
10750 
10751    token0.value = 0;
10752    token0.opcodeType = opcode;
10753    token0.saturate = inst->Instruction.Saturate,
10754    emit_dword(emit, token0.value);
10755 
10756    emit_dst_register(emit, &inst->Dst[0]);
10757 
10758    if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
10759       emit_memory_register(emit, MEM_ATOMIC_COUNTER, inst, 0, 0);
10760    } else if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC) {
10761       assert(inst->Src[0].Register.Dimension == 1);
10762       emit_uav_register(emit, inst->Src[0].Dimension.Index,
10763                         UAV_ATOMIC, inst->Src[0].Register.File, 0);
10764    } else {
10765       emit_uav_register(emit, resourceIndex,
10766                         UAV_ATOMIC, inst->Src[0].Register.File, 0);
10767    }
10768 
10769    /* resource address offset */
10770    emit_src_register(emit, &addr_src);
10771 
10772    struct tgsi_full_src_register src0_x =
10773          swizzle_src(&inst->Src[2], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
10774                      TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
10775    emit_src_register(emit, &src0_x);
10776 
10777    if (opcode == VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH) {
10778       struct tgsi_full_src_register src1_x =
10779          swizzle_src(&inst->Src[3], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
10780                      TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
10781 
10782       emit_src_register(emit, &src1_x);
10783    }
10784 
10785    end_emit_instruction(emit);
10786 
10787    free_temp_indexes(emit);
10788 }
10789 
10790 
10791 /**
10792  * Emit atomic instruction
10793  */
10794 static bool
emit_atomic(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,VGPU10_OPCODE_TYPE opcode)10795 emit_atomic(struct svga_shader_emitter_v10 *emit,
10796             const struct tgsi_full_instruction *inst,
10797             VGPU10_OPCODE_TYPE opcode)
10798 {
10799    enum tgsi_file_type resourceType = inst->Src[0].Register.File;
10800    unsigned resourceIndex = inst->Src[0].Register.Index;
10801 
10802    emit->cur_atomic_opcode = opcode;
10803 
10804    /* If the resource register has indirect index, we will need
10805     * to expand it since SM5 device does not support indirect indexing
10806     * for uav.
10807     */
10808    if (inst->Dst[0].Register.Indirect &&
10809        (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) {
10810 
10811       unsigned indirect_index = inst->Dst[0].Indirect.Index;
10812       unsigned num_resources =
10813          resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs :
10814                                             emit->num_images;
10815 
10816       /* indirect index tmp register */
10817       unsigned indirect_addr = emit->address_reg_index[indirect_index];
10818       struct tgsi_full_src_register indirect_addr_src =
10819          make_src_temp_reg(indirect_addr);
10820       indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
10821 
10822       /* Loop through the resource array to find which resource to use.
10823        */
10824       loop_instruction(0, num_resources, &indirect_addr_src,
10825                        emit_atomic_instruction, emit, inst);
10826    }
10827    else {
10828       emit_atomic_instruction(emit, inst, resourceIndex);
10829    }
10830 
10831    free_temp_indexes(emit);
10832 
10833    return true;
10834 }
10835 
10836 
10837 /**
10838  * Emit barrier instruction
10839  */
10840 static bool
emit_barrier(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10841 emit_barrier(struct svga_shader_emitter_v10 *emit,
10842              const struct tgsi_full_instruction *inst)
10843 {
10844    VGPU10OpcodeToken0 token0;
10845 
10846    assert(emit->version >= 50);
10847 
10848    token0.value = 0;
10849    token0.opcodeType = VGPU10_OPCODE_SYNC;
10850 
10851    if (emit->unit == PIPE_SHADER_TESS_CTRL && emit->version == 50) {
10852       /* SM5 device doesn't support BARRIER in tcs . If barrier is used
10853        * in shader, don't do anything for this opcode and continue rest
10854        * of shader translation
10855        */
10856       util_debug_message(&emit->svga_debug_callback, INFO,
10857                          "barrier instruction is not supported in tessellation control shader\n");
10858       return true;
10859    }
10860    else if (emit->unit == PIPE_SHADER_COMPUTE) {
10861       if (emit->cs.shared_memory_declared)
10862          token0.syncThreadGroupShared = 1;
10863 
10864       if (emit->uav_declared)
10865          token0.syncUAVMemoryGroup = 1;
10866 
10867       token0.syncThreadsInGroup = 1;
10868    } else {
10869       token0.syncUAVMemoryGlobal = 1;
10870    }
10871 
10872    assert(token0.syncUAVMemoryGlobal || token0.syncUAVMemoryGroup ||
10873           token0.syncThreadGroupShared);
10874 
10875    begin_emit_instruction(emit);
10876    emit_dword(emit, token0.value);
10877    end_emit_instruction(emit);
10878 
10879    return true;
10880 }
10881 
10882 /**
10883  * Emit memory barrier instruction
10884  */
10885 static bool
emit_memory_barrier(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10886 emit_memory_barrier(struct svga_shader_emitter_v10 *emit,
10887                     const struct tgsi_full_instruction *inst)
10888 {
10889    unsigned index = inst->Src[0].Register.Index;
10890    unsigned swizzle = inst->Src[0].Register.SwizzleX;
10891    unsigned bartype = emit->immediates[index][swizzle].Int;
10892    VGPU10OpcodeToken0 token0;
10893 
10894    token0.value = 0;
10895    token0.opcodeType = VGPU10_OPCODE_SYNC;
10896 
10897    if (emit->unit == PIPE_SHADER_COMPUTE) {
10898 
10899       /* For compute shader, issue sync opcode with different options
10900        * depending on the memory barrier type.
10901        *
10902        * Bit 0: Shader storage buffers
10903        * Bit 1: Atomic buffers
10904        * Bit 2: Images
10905        * Bit 3: Shared memory
10906        * Bit 4: Thread group
10907        */
10908 
10909       if (bartype & (TGSI_MEMBAR_SHADER_BUFFER | TGSI_MEMBAR_ATOMIC_BUFFER |
10910                      TGSI_MEMBAR_SHADER_IMAGE))
10911          token0.syncUAVMemoryGlobal = 1;
10912       else if (bartype & TGSI_MEMBAR_THREAD_GROUP)
10913          token0.syncUAVMemoryGroup = 1;
10914 
10915       if (bartype & TGSI_MEMBAR_SHARED)
10916          token0.syncThreadGroupShared = 1;
10917    }
10918    else {
10919       /**
10920        * For graphics stages, only sync_uglobal is available.
10921        */
10922       if (bartype & (TGSI_MEMBAR_SHADER_BUFFER | TGSI_MEMBAR_ATOMIC_BUFFER |
10923                      TGSI_MEMBAR_SHADER_IMAGE))
10924          token0.syncUAVMemoryGlobal = 1;
10925    }
10926 
10927    assert(token0.syncUAVMemoryGlobal || token0.syncUAVMemoryGroup ||
10928           token0.syncThreadGroupShared);
10929 
10930    begin_emit_instruction(emit);
10931    emit_dword(emit, token0.value);
10932    end_emit_instruction(emit);
10933 
10934    return true;
10935 }
10936 
10937 
10938 /**
10939  * Emit code for TGSI_OPCODE_RESQ (image size) instruction.
10940  */
10941 static bool
emit_resq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10942 emit_resq(struct svga_shader_emitter_v10 *emit,
10943           const struct tgsi_full_instruction *inst)
10944 {
10945    struct tgsi_full_src_register zero =
10946       make_immediate_reg_int(emit, 0);
10947 
10948    unsigned uav_resource = emit->image[inst->Src[0].Register.Index].Resource;
10949 
10950    if (uav_resource == TGSI_TEXTURE_CUBE_ARRAY) {
10951       struct tgsi_full_src_register image_src;
10952 
10953       image_src = make_src_const_reg(emit->image_size_index + inst->Src[0].Register.Index);
10954 
10955       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &image_src);
10956       return true;
10957    }
10958 
10959    begin_emit_instruction(emit);
10960    if (uav_resource == TGSI_TEXTURE_BUFFER) {
10961       emit_opcode(emit, VGPU10_OPCODE_BUFINFO, false);
10962       emit_dst_register(emit, &inst->Dst[0]);
10963    }
10964    else {
10965       emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
10966       emit_dst_register(emit, &inst->Dst[0]);
10967       emit_src_register(emit, &zero);
10968    }
10969    emit_uav_register(emit, inst->Src[0].Register.Index,
10970                      UAV_RESQ, inst->Src[0].Register.File, 0);
10971    end_emit_instruction(emit);
10972 
10973    return true;
10974 }
10975 
10976 
10977 static bool
emit_instruction(struct svga_shader_emitter_v10 * emit,unsigned inst_number,const struct tgsi_full_instruction * inst)10978 emit_instruction(struct svga_shader_emitter_v10 *emit,
10979                  unsigned inst_number,
10980                  const struct tgsi_full_instruction *inst)
10981 {
10982    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
10983 
10984    switch (opcode) {
10985    case TGSI_OPCODE_ADD:
10986    case TGSI_OPCODE_AND:
10987    case TGSI_OPCODE_BGNLOOP:
10988    case TGSI_OPCODE_BRK:
10989    case TGSI_OPCODE_CEIL:
10990    case TGSI_OPCODE_CONT:
10991    case TGSI_OPCODE_DDX:
10992    case TGSI_OPCODE_DDY:
10993    case TGSI_OPCODE_DIV:
10994    case TGSI_OPCODE_DP2:
10995    case TGSI_OPCODE_DP3:
10996    case TGSI_OPCODE_DP4:
10997    case TGSI_OPCODE_ELSE:
10998    case TGSI_OPCODE_ENDIF:
10999    case TGSI_OPCODE_ENDLOOP:
11000    case TGSI_OPCODE_ENDSUB:
11001    case TGSI_OPCODE_F2I:
11002    case TGSI_OPCODE_F2U:
11003    case TGSI_OPCODE_FLR:
11004    case TGSI_OPCODE_FRC:
11005    case TGSI_OPCODE_FSEQ:
11006    case TGSI_OPCODE_FSGE:
11007    case TGSI_OPCODE_FSLT:
11008    case TGSI_OPCODE_FSNE:
11009    case TGSI_OPCODE_I2F:
11010    case TGSI_OPCODE_IMAX:
11011    case TGSI_OPCODE_IMIN:
11012    case TGSI_OPCODE_INEG:
11013    case TGSI_OPCODE_ISGE:
11014    case TGSI_OPCODE_ISHR:
11015    case TGSI_OPCODE_ISLT:
11016    case TGSI_OPCODE_MAD:
11017    case TGSI_OPCODE_MAX:
11018    case TGSI_OPCODE_MIN:
11019    case TGSI_OPCODE_MUL:
11020    case TGSI_OPCODE_NOP:
11021    case TGSI_OPCODE_NOT:
11022    case TGSI_OPCODE_OR:
11023    case TGSI_OPCODE_UADD:
11024    case TGSI_OPCODE_USEQ:
11025    case TGSI_OPCODE_USGE:
11026    case TGSI_OPCODE_USLT:
11027    case TGSI_OPCODE_UMIN:
11028    case TGSI_OPCODE_UMAD:
11029    case TGSI_OPCODE_UMAX:
11030    case TGSI_OPCODE_ROUND:
11031    case TGSI_OPCODE_SQRT:
11032    case TGSI_OPCODE_SHL:
11033    case TGSI_OPCODE_TRUNC:
11034    case TGSI_OPCODE_U2F:
11035    case TGSI_OPCODE_UCMP:
11036    case TGSI_OPCODE_USHR:
11037    case TGSI_OPCODE_USNE:
11038    case TGSI_OPCODE_XOR:
11039    /* Begin SM5 opcodes */
11040    case TGSI_OPCODE_F2D:
11041    case TGSI_OPCODE_D2F:
11042    case TGSI_OPCODE_DADD:
11043    case TGSI_OPCODE_DMUL:
11044    case TGSI_OPCODE_DMAX:
11045    case TGSI_OPCODE_DMIN:
11046    case TGSI_OPCODE_DSGE:
11047    case TGSI_OPCODE_DSLT:
11048    case TGSI_OPCODE_DSEQ:
11049    case TGSI_OPCODE_DSNE:
11050    case TGSI_OPCODE_BREV:
11051    case TGSI_OPCODE_POPC:
11052    case TGSI_OPCODE_LSB:
11053    case TGSI_OPCODE_INTERP_CENTROID:
11054    case TGSI_OPCODE_INTERP_SAMPLE:
11055       /* simple instructions */
11056       return emit_simple(emit, inst);
11057    case TGSI_OPCODE_RET:
11058       if (emit->unit == PIPE_SHADER_TESS_CTRL &&
11059           !emit->tcs.control_point_phase) {
11060 
11061          /* store the tessellation levels in the patch constant phase only */
11062          store_tesslevels(emit);
11063       }
11064       return emit_simple(emit, inst);
11065 
11066    case TGSI_OPCODE_IMSB:
11067    case TGSI_OPCODE_UMSB:
11068       return emit_msb(emit, inst);
11069    case TGSI_OPCODE_IBFE:
11070    case TGSI_OPCODE_UBFE:
11071       return emit_bfe(emit, inst);
11072    case TGSI_OPCODE_BFI:
11073       return emit_bfi(emit, inst);
11074    case TGSI_OPCODE_MOV:
11075       return emit_mov(emit, inst);
11076    case TGSI_OPCODE_EMIT:
11077       return emit_vertex(emit, inst);
11078    case TGSI_OPCODE_ENDPRIM:
11079       return emit_endprim(emit, inst);
11080    case TGSI_OPCODE_IABS:
11081       return emit_iabs(emit, inst);
11082    case TGSI_OPCODE_ARL:
11083       FALLTHROUGH;
11084    case TGSI_OPCODE_UARL:
11085       return emit_arl_uarl(emit, inst);
11086    case TGSI_OPCODE_BGNSUB:
11087       /* no-op */
11088       return true;
11089    case TGSI_OPCODE_CAL:
11090       return emit_cal(emit, inst);
11091    case TGSI_OPCODE_CMP:
11092       return emit_cmp(emit, inst);
11093    case TGSI_OPCODE_COS:
11094       return emit_sincos(emit, inst);
11095    case TGSI_OPCODE_DST:
11096       return emit_dst(emit, inst);
11097    case TGSI_OPCODE_EX2:
11098       return emit_ex2(emit, inst);
11099    case TGSI_OPCODE_EXP:
11100       return emit_exp(emit, inst);
11101    case TGSI_OPCODE_IF:
11102       return emit_if(emit, &inst->Src[0]);
11103    case TGSI_OPCODE_KILL:
11104       return emit_discard(emit, inst);
11105    case TGSI_OPCODE_KILL_IF:
11106       return emit_cond_discard(emit, inst);
11107    case TGSI_OPCODE_LG2:
11108       return emit_lg2(emit, inst);
11109    case TGSI_OPCODE_LIT:
11110       return emit_lit(emit, inst);
11111    case TGSI_OPCODE_LODQ:
11112       return emit_lodq(emit, inst);
11113    case TGSI_OPCODE_LOG:
11114       return emit_log(emit, inst);
11115    case TGSI_OPCODE_LRP:
11116       return emit_lrp(emit, inst);
11117    case TGSI_OPCODE_POW:
11118       return emit_pow(emit, inst);
11119    case TGSI_OPCODE_RCP:
11120       return emit_rcp(emit, inst);
11121    case TGSI_OPCODE_RSQ:
11122       return emit_rsq(emit, inst);
11123    case TGSI_OPCODE_SAMPLE:
11124       return emit_sample(emit, inst);
11125    case TGSI_OPCODE_SEQ:
11126       return emit_seq(emit, inst);
11127    case TGSI_OPCODE_SGE:
11128       return emit_sge(emit, inst);
11129    case TGSI_OPCODE_SGT:
11130       return emit_sgt(emit, inst);
11131    case TGSI_OPCODE_SIN:
11132       return emit_sincos(emit, inst);
11133    case TGSI_OPCODE_SLE:
11134       return emit_sle(emit, inst);
11135    case TGSI_OPCODE_SLT:
11136       return emit_slt(emit, inst);
11137    case TGSI_OPCODE_SNE:
11138       return emit_sne(emit, inst);
11139    case TGSI_OPCODE_SSG:
11140       return emit_ssg(emit, inst);
11141    case TGSI_OPCODE_ISSG:
11142       return emit_issg(emit, inst);
11143    case TGSI_OPCODE_TEX:
11144       return emit_tex(emit, inst);
11145    case TGSI_OPCODE_TG4:
11146       return emit_tg4(emit, inst);
11147    case TGSI_OPCODE_TEX2:
11148       return emit_tex2(emit, inst);
11149    case TGSI_OPCODE_TXP:
11150       return emit_txp(emit, inst);
11151    case TGSI_OPCODE_TXB:
11152    case TGSI_OPCODE_TXB2:
11153    case TGSI_OPCODE_TXL:
11154       return emit_txl_txb(emit, inst);
11155    case TGSI_OPCODE_TXD:
11156       return emit_txd(emit, inst);
11157    case TGSI_OPCODE_TXF:
11158       return emit_txf(emit, inst);
11159    case TGSI_OPCODE_TXL2:
11160       return emit_txl2(emit, inst);
11161    case TGSI_OPCODE_TXQ:
11162       return emit_txq(emit, inst);
11163    case TGSI_OPCODE_UIF:
11164       return emit_if(emit, &inst->Src[0]);
11165    case TGSI_OPCODE_UMUL_HI:
11166    case TGSI_OPCODE_IMUL_HI:
11167    case TGSI_OPCODE_UDIV:
11168       /* These cases use only the FIRST of two destination registers */
11169       return emit_simple_1dst(emit, inst, 2, 0);
11170    case TGSI_OPCODE_IDIV:
11171       return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_IDIV);
11172    case TGSI_OPCODE_UMUL:
11173    case TGSI_OPCODE_UMOD:
11174    case TGSI_OPCODE_MOD:
11175       /* These cases use only the SECOND of two destination registers */
11176       return emit_simple_1dst(emit, inst, 2, 1);
11177 
11178    /* Begin SM5 opcodes */
11179    case TGSI_OPCODE_DABS:
11180       return emit_dabs(emit, inst);
11181    case TGSI_OPCODE_DNEG:
11182       return emit_dneg(emit, inst);
11183    case TGSI_OPCODE_DRCP:
11184       return emit_simple(emit, inst);
11185    case TGSI_OPCODE_DSQRT:
11186       return emit_dsqrt(emit, inst);
11187    case TGSI_OPCODE_DMAD:
11188       return emit_dmad(emit, inst);
11189    case TGSI_OPCODE_DFRAC:
11190       return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_DFRC);
11191    case TGSI_OPCODE_D2I:
11192    case TGSI_OPCODE_D2U:
11193       return emit_simple(emit, inst);
11194    case TGSI_OPCODE_I2D:
11195    case TGSI_OPCODE_U2D:
11196       return emit_simple(emit, inst);
11197    case TGSI_OPCODE_DRSQ:
11198       return emit_drsq(emit, &inst->Dst[0], &inst->Src[0]);
11199    case TGSI_OPCODE_DDIV:
11200       return emit_simple(emit, inst);
11201    case TGSI_OPCODE_INTERP_OFFSET:
11202       return emit_interp_offset(emit, inst);
11203    case TGSI_OPCODE_FMA:
11204    case TGSI_OPCODE_DFMA:
11205       return emit_simple(emit, inst);
11206 
11207    case TGSI_OPCODE_DTRUNC:
11208       return emit_dtrunc(emit, inst);
11209 
11210    /* The following opcodes should never be seen here.  We return zero
11211     * for PIPE_CAP_TGSI_DROUND_SUPPORTED.
11212     */
11213    case TGSI_OPCODE_LDEXP:
11214    case TGSI_OPCODE_DSSG:
11215    case TGSI_OPCODE_DLDEXP:
11216    case TGSI_OPCODE_DCEIL:
11217    case TGSI_OPCODE_DFLR:
11218       debug_printf("Unexpected TGSI opcode %s.  "
11219                    "Should have been translated away by the GLSL compiler.\n",
11220                    tgsi_get_opcode_name(opcode));
11221       return false;
11222 
11223    case TGSI_OPCODE_LOAD:
11224       return emit_load(emit, inst);
11225 
11226    case TGSI_OPCODE_STORE:
11227       return emit_store(emit, inst);
11228 
11229    case TGSI_OPCODE_ATOMAND:
11230       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_AND);
11231 
11232    case TGSI_OPCODE_ATOMCAS:
11233       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH);
11234 
11235    case TGSI_OPCODE_ATOMIMAX:
11236       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IMAX);
11237 
11238    case TGSI_OPCODE_ATOMIMIN:
11239       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IMIN);
11240 
11241    case TGSI_OPCODE_ATOMOR:
11242       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_OR);
11243 
11244    case TGSI_OPCODE_ATOMUADD:
11245       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IADD);
11246 
11247    case TGSI_OPCODE_ATOMUMAX:
11248       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_UMAX);
11249 
11250    case TGSI_OPCODE_ATOMUMIN:
11251       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_UMIN);
11252 
11253    case TGSI_OPCODE_ATOMXCHG:
11254       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_EXCH);
11255 
11256    case TGSI_OPCODE_ATOMXOR:
11257       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_XOR);
11258 
11259    case TGSI_OPCODE_BARRIER:
11260       return emit_barrier(emit, inst);
11261 
11262    case TGSI_OPCODE_MEMBAR:
11263       return emit_memory_barrier(emit, inst);
11264 
11265    case TGSI_OPCODE_RESQ:
11266       return emit_resq(emit, inst);
11267 
11268    case TGSI_OPCODE_END:
11269       if (!emit_post_helpers(emit))
11270          return false;
11271       return emit_simple(emit, inst);
11272 
11273    default:
11274       debug_printf("Unimplemented tgsi instruction %s\n",
11275                    tgsi_get_opcode_name(opcode));
11276       return false;
11277    }
11278 
11279    return true;
11280 }
11281 
11282 
11283 /**
11284  * Translate a single TGSI instruction to VGPU10.
11285  */
11286 static bool
emit_vgpu10_instruction(struct svga_shader_emitter_v10 * emit,unsigned inst_number,const struct tgsi_full_instruction * inst)11287 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
11288                         unsigned inst_number,
11289                         const struct tgsi_full_instruction *inst)
11290 {
11291    if (emit->skip_instruction)
11292       return true;
11293 
11294    bool ret = true;
11295    unsigned start_token = emit_get_num_tokens(emit);
11296 
11297    emit->reemit_tgsi_instruction = false;
11298 
11299    ret = emit_instruction(emit, inst_number, inst);
11300 
11301    if (emit->reemit_tgsi_instruction) {
11302       /**
11303        * Reset emit->ptr to where the translation of this tgsi instruction
11304        * started.
11305        */
11306       VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
11307       emit->ptr = (char *) (tokens + start_token);
11308 
11309       emit->reemit_tgsi_instruction = false;
11310    }
11311    return ret;
11312 }
11313 
11314 
11315 /**
11316  * Emit the extra instructions to adjust the vertex position.
11317  * There are two possible adjustments:
11318  * 1. Converting from Gallium to VGPU10 coordinate space by applying the
11319  *    "prescale" and "pretranslate" values.
11320  * 2. Undoing the viewport transformation when we use the swtnl/draw path.
11321  * \param vs_pos_tmp_index  which temporary register contains the vertex pos.
11322  */
11323 static void
emit_vpos_instructions(struct svga_shader_emitter_v10 * emit)11324 emit_vpos_instructions(struct svga_shader_emitter_v10 *emit)
11325 {
11326    struct tgsi_full_src_register tmp_pos_src;
11327    struct tgsi_full_dst_register pos_dst;
11328    const unsigned vs_pos_tmp_index = emit->vposition.tmp_index;
11329 
11330    /* Don't bother to emit any extra vertex instructions if vertex position is
11331     * not written out
11332     */
11333    if (emit->vposition.out_index == INVALID_INDEX)
11334       return;
11335 
11336    /**
11337     * Reset the temporary vertex position register index
11338     * so that emit_dst_register() will use the real vertex position output
11339     */
11340    emit->vposition.tmp_index = INVALID_INDEX;
11341 
11342    tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index);
11343    pos_dst = make_dst_output_reg(emit->vposition.out_index);
11344 
11345    /* If non-adjusted vertex position register index
11346     * is valid, copy the vertex position from the temporary
11347     * vertex position register before it is modified by the
11348     * prescale computation.
11349     */
11350    if (emit->vposition.so_index != INVALID_INDEX) {
11351       struct tgsi_full_dst_register pos_so_dst =
11352          make_dst_output_reg(emit->vposition.so_index);
11353 
11354       /* MOV pos_so, tmp_pos */
11355       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, &tmp_pos_src);
11356    }
11357 
11358    if (emit->vposition.need_prescale) {
11359       /* This code adjusts the vertex position to match the VGPU10 convention.
11360        * If p is the position computed by the shader (usually by applying the
11361        * modelview and projection matrices), the new position q is computed by:
11362        *
11363        * q.x = p.w * trans.x + p.x * scale.x
11364        * q.y = p.w * trans.y + p.y * scale.y
11365        * q.z = p.w * trans.z + p.z * scale.z;
11366        * q.w = p.w * trans.w + p.w;
11367        */
11368       struct tgsi_full_src_register tmp_pos_src_w =
11369          scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
11370       struct tgsi_full_dst_register tmp_pos_dst =
11371          make_dst_temp_reg(vs_pos_tmp_index);
11372       struct tgsi_full_dst_register tmp_pos_dst_xyz =
11373          writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ);
11374 
11375       struct tgsi_full_src_register prescale_scale =
11376          make_src_temp_reg(emit->vposition.prescale_scale_index);
11377       struct tgsi_full_src_register prescale_trans =
11378          make_src_temp_reg(emit->vposition.prescale_trans_index);
11379 
11380       /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */
11381       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz,
11382                            &tmp_pos_src, &prescale_scale);
11383 
11384       /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */
11385       emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w,
11386                            &prescale_trans, &tmp_pos_src);
11387    }
11388    else if (emit->key.vs.undo_viewport) {
11389       /* This code computes the final vertex position from the temporary
11390        * vertex position by undoing the viewport transformation and the
11391        * divide-by-W operation (we convert window coords back to clip coords).
11392        * This is needed when we use the 'draw' module for fallbacks.
11393        * If p is the temp pos in window coords, then the NDC coord q is:
11394        *   q.x = (p.x - vp.x_trans) / vp.x_scale * p.w
11395        *   q.y = (p.y - vp.y_trans) / vp.y_scale * p.w
11396        *   q.z = p.z * p.w
11397        *   q.w = p.w
11398        * CONST[vs_viewport_index] contains:
11399        *   { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans }
11400        */
11401       struct tgsi_full_dst_register tmp_pos_dst =
11402          make_dst_temp_reg(vs_pos_tmp_index);
11403       struct tgsi_full_dst_register tmp_pos_dst_xy =
11404          writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY);
11405       struct tgsi_full_src_register tmp_pos_src_wwww =
11406          scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
11407 
11408       struct tgsi_full_dst_register pos_dst_xyz =
11409          writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ);
11410       struct tgsi_full_dst_register pos_dst_w =
11411          writemask_dst(&pos_dst, TGSI_WRITEMASK_W);
11412 
11413       struct tgsi_full_src_register vp_xyzw =
11414          make_src_const_reg(emit->vs.viewport_index);
11415       struct tgsi_full_src_register vp_zwww =
11416          swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
11417                      TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
11418 
11419       /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */
11420       emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy,
11421                            &tmp_pos_src, &vp_zwww);
11422 
11423       /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */
11424       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy,
11425                            &tmp_pos_src, &vp_xyzw);
11426 
11427       /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */
11428       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz,
11429                            &tmp_pos_src, &tmp_pos_src_wwww);
11430 
11431       /* MOV pos.w, tmp_pos.w */
11432       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, &tmp_pos_src);
11433    }
11434    else if (vs_pos_tmp_index != INVALID_INDEX) {
11435       /* This code is to handle the case where the temporary vertex
11436        * position register is created when the vertex shader has stream
11437        * output and prescale is disabled because rasterization is to be
11438        * discarded.
11439        */
11440       struct tgsi_full_dst_register pos_dst =
11441          make_dst_output_reg(emit->vposition.out_index);
11442 
11443       /* MOV pos, tmp_pos */
11444       begin_emit_instruction(emit);
11445       emit_opcode(emit, VGPU10_OPCODE_MOV, false);
11446       emit_dst_register(emit, &pos_dst);
11447       emit_src_register(emit, &tmp_pos_src);
11448       end_emit_instruction(emit);
11449    }
11450 
11451    /* Restore original vposition.tmp_index value for the next GS vertex.
11452     * It doesn't matter for VS.
11453     */
11454    emit->vposition.tmp_index = vs_pos_tmp_index;
11455 }
11456 
11457 static void
emit_clipping_instructions(struct svga_shader_emitter_v10 * emit)11458 emit_clipping_instructions(struct svga_shader_emitter_v10 *emit)
11459 {
11460    if (emit->clip_mode == CLIP_DISTANCE) {
11461       /* Copy from copy distance temporary to CLIPDIST & the shadow copy */
11462       emit_clip_distance_instructions(emit);
11463 
11464    } else if (emit->clip_mode == CLIP_VERTEX &&
11465               emit->key.last_vertex_stage) {
11466       /* Convert TGSI CLIPVERTEX to CLIPDIST */
11467       emit_clip_vertex_instructions(emit);
11468    }
11469 
11470    /**
11471     * Emit vertex position and take care of legacy user planes only if
11472     * there is a valid vertex position register index.
11473     * This is to take care of the case
11474     * where the shader doesn't output vertex position. Then in
11475     * this case, don't bother to emit more vertex instructions.
11476     */
11477    if (emit->vposition.out_index == INVALID_INDEX)
11478       return;
11479 
11480    /**
11481     * Emit per-vertex clipping instructions for legacy user defined clip planes.
11482     * NOTE: we must emit the clip distance instructions before the
11483     * emit_vpos_instructions() call since the later function will change
11484     * the TEMP[vs_pos_tmp_index] value.
11485     */
11486    if (emit->clip_mode == CLIP_LEGACY && emit->key.last_vertex_stage) {
11487       /* Emit CLIPDIST for legacy user defined clip planes */
11488       emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index);
11489    }
11490 }
11491 
11492 
11493 /**
11494  * Emit extra per-vertex instructions.  This includes clip-coordinate
11495  * space conversion and computing clip distances.  This is called for
11496  * each GS emit-vertex instruction and at the end of VS translation.
11497  */
11498 static void
emit_vertex_instructions(struct svga_shader_emitter_v10 * emit)11499 emit_vertex_instructions(struct svga_shader_emitter_v10 *emit)
11500 {
11501    /* Emit clipping instructions based on clipping mode */
11502    emit_clipping_instructions(emit);
11503 
11504    /* Emit vertex position instructions */
11505    emit_vpos_instructions(emit);
11506 }
11507 
11508 
11509 /**
11510  * Translate the TGSI_OPCODE_EMIT GS instruction.
11511  */
11512 static bool
emit_vertex(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)11513 emit_vertex(struct svga_shader_emitter_v10 *emit,
11514             const struct tgsi_full_instruction *inst)
11515 {
11516    unsigned ret = true;
11517 
11518    assert(emit->unit == PIPE_SHADER_GEOMETRY);
11519 
11520    /**
11521     * Emit the viewport array index for the first vertex.
11522     */
11523    if (emit->gs.viewport_index_out_index != INVALID_INDEX) {
11524       struct tgsi_full_dst_register viewport_index_out =
11525          make_dst_output_reg(emit->gs.viewport_index_out_index);
11526       struct tgsi_full_dst_register viewport_index_out_x =
11527          writemask_dst(&viewport_index_out, TGSI_WRITEMASK_X);
11528       struct tgsi_full_src_register viewport_index_tmp =
11529          make_src_temp_reg(emit->gs.viewport_index_tmp_index);
11530 
11531       /* Set the out index to INVALID_INDEX, so it will not
11532        * be assigned to a temp again in emit_dst_register, and
11533        * the viewport index will not be assigned again in the
11534        * subsequent vertices.
11535        */
11536       emit->gs.viewport_index_out_index = INVALID_INDEX;
11537       emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
11538                            &viewport_index_out_x, &viewport_index_tmp);
11539    }
11540 
11541    /**
11542     * Find the stream index associated with this emit vertex instruction.
11543     */
11544    assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE);
11545    unsigned streamIndex = find_stream_index(emit, &inst->Src[0]);
11546 
11547    /**
11548     * According to the ARB_gpu_shader5 spec, the built-in geometry shader
11549     * outputs are always associated with vertex stream zero.
11550     * So emit the extra vertex instructions for position or clip distance
11551     * for stream zero only.
11552     */
11553    if (streamIndex == 0) {
11554       /**
11555        * Before emitting vertex instructions, emit the temporaries for
11556        * the prescale constants based on the viewport index if needed.
11557        */
11558       if (emit->vposition.need_prescale && !emit->vposition.have_prescale)
11559          emit_temp_prescale_instructions(emit);
11560 
11561       emit_vertex_instructions(emit);
11562    }
11563 
11564    begin_emit_instruction(emit);
11565    if (emit->version >= 50) {
11566       if (emit->info.num_stream_output_components[streamIndex] == 0) {
11567          /**
11568           * If there is no output for this stream, discard this instruction.
11569           */
11570          emit->discard_instruction = true;
11571       }
11572       else {
11573          emit_opcode(emit, VGPU10_OPCODE_EMIT_STREAM, false);
11574          emit_stream_register(emit, streamIndex);
11575       }
11576    }
11577    else {
11578       emit_opcode(emit, VGPU10_OPCODE_EMIT, false);
11579    }
11580    end_emit_instruction(emit);
11581 
11582    return ret;
11583 }
11584 
11585 
11586 /**
11587  * Emit the extra code to convert from VGPU10's boolean front-face
11588  * register to TGSI's signed front-face register.
11589  *
11590  * TODO: Make temporary front-face register a scalar.
11591  */
11592 static void
emit_frontface_instructions(struct svga_shader_emitter_v10 * emit)11593 emit_frontface_instructions(struct svga_shader_emitter_v10 *emit)
11594 {
11595    assert(emit->unit == PIPE_SHADER_FRAGMENT);
11596 
11597    if (emit->fs.face_input_index != INVALID_INDEX) {
11598       /* convert vgpu10 boolean face register to gallium +/-1 value */
11599       struct tgsi_full_dst_register tmp_dst =
11600          make_dst_temp_reg(emit->fs.face_tmp_index);
11601       struct tgsi_full_src_register one =
11602          make_immediate_reg_float(emit, 1.0f);
11603       struct tgsi_full_src_register neg_one =
11604          make_immediate_reg_float(emit, -1.0f);
11605 
11606       /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */
11607       begin_emit_instruction(emit);
11608       emit_opcode(emit, VGPU10_OPCODE_MOVC, false);
11609       emit_dst_register(emit, &tmp_dst);
11610       emit_face_register(emit);
11611       emit_src_register(emit, &one);
11612       emit_src_register(emit, &neg_one);
11613       end_emit_instruction(emit);
11614    }
11615 }
11616 
11617 
11618 /**
11619  * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w.
11620  */
11621 static void
emit_fragcoord_instructions(struct svga_shader_emitter_v10 * emit)11622 emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit)
11623 {
11624    assert(emit->unit == PIPE_SHADER_FRAGMENT);
11625 
11626    if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
11627       struct tgsi_full_dst_register tmp_dst =
11628          make_dst_temp_reg(emit->fs.fragcoord_tmp_index);
11629       struct tgsi_full_dst_register tmp_dst_xyz =
11630          writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ);
11631       struct tgsi_full_dst_register tmp_dst_w =
11632          writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
11633       struct tgsi_full_src_register one =
11634          make_immediate_reg_float(emit, 1.0f);
11635       struct tgsi_full_src_register fragcoord =
11636          make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index);
11637 
11638       /* save the input index */
11639       unsigned fragcoord_input_index = emit->fs.fragcoord_input_index;
11640       /* set to invalid to prevent substitution in emit_src_register() */
11641       emit->fs.fragcoord_input_index = INVALID_INDEX;
11642 
11643       /* MOV fragcoord_tmp.xyz, fragcoord.xyz */
11644       begin_emit_instruction(emit);
11645       emit_opcode(emit, VGPU10_OPCODE_MOV, false);
11646       emit_dst_register(emit, &tmp_dst_xyz);
11647       emit_src_register(emit, &fragcoord);
11648       end_emit_instruction(emit);
11649 
11650       /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */
11651       begin_emit_instruction(emit);
11652       emit_opcode(emit, VGPU10_OPCODE_DIV, false);
11653       emit_dst_register(emit, &tmp_dst_w);
11654       emit_src_register(emit, &one);
11655       emit_src_register(emit, &fragcoord);
11656       end_emit_instruction(emit);
11657 
11658       /* restore saved value */
11659       emit->fs.fragcoord_input_index = fragcoord_input_index;
11660    }
11661 }
11662 
11663 
11664 /**
11665  * Emit the extra code to get the current sample position value and
11666  * put it into a temp register.
11667  */
11668 static void
emit_sample_position_instructions(struct svga_shader_emitter_v10 * emit)11669 emit_sample_position_instructions(struct svga_shader_emitter_v10 *emit)
11670 {
11671    assert(emit->unit == PIPE_SHADER_FRAGMENT);
11672 
11673    if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
11674       assert(emit->version >= 41);
11675 
11676       struct tgsi_full_dst_register tmp_dst =
11677          make_dst_temp_reg(emit->fs.sample_pos_tmp_index);
11678       struct tgsi_full_src_register half =
11679          make_immediate_reg_float4(emit, 0.5, 0.5, 0.0, 0.0);
11680 
11681       struct tgsi_full_src_register tmp_src =
11682          make_src_temp_reg(emit->fs.sample_pos_tmp_index);
11683       struct tgsi_full_src_register sample_index_reg =
11684          make_src_scalar_reg(TGSI_FILE_SYSTEM_VALUE,
11685                              emit->fs.sample_id_sys_index, TGSI_SWIZZLE_X);
11686 
11687       /* The first src register is a shader resource (if we want a
11688        * multisampled resource sample position) or the rasterizer register
11689        * (if we want the current sample position in the color buffer).  We
11690        * want the later.
11691        */
11692 
11693       /* SAMPLE_POS dst, RASTERIZER, sampleIndex */
11694       begin_emit_instruction(emit);
11695       emit_opcode(emit, VGPU10_OPCODE_SAMPLE_POS, false);
11696       emit_dst_register(emit, &tmp_dst);
11697       emit_rasterizer_register(emit);
11698       emit_src_register(emit, &sample_index_reg);
11699       end_emit_instruction(emit);
11700 
11701       /* Convert from D3D coords to GL coords by adding 0.5 bias */
11702       /* ADD dst, dst, half */
11703       begin_emit_instruction(emit);
11704       emit_opcode(emit, VGPU10_OPCODE_ADD, false);
11705       emit_dst_register(emit, &tmp_dst);
11706       emit_src_register(emit, &tmp_src);
11707       emit_src_register(emit, &half);
11708       end_emit_instruction(emit);
11709    }
11710 }
11711 
11712 
11713 /**
11714  * Emit extra instructions to adjust VS inputs/attributes.  This can
11715  * mean casting a vertex attribute from int to float or setting the
11716  * W component to 1, or both.
11717  */
11718 static void
emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 * emit)11719 emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
11720 {
11721    const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1;
11722    const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof;
11723    const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof;
11724    const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra;
11725    const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm;
11726    const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled;
11727    const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled;
11728 
11729    unsigned adjust_mask = (save_w_1_mask |
11730                            save_itof_mask |
11731                            save_utof_mask |
11732                            save_is_bgra_mask |
11733                            save_puint_to_snorm_mask |
11734                            save_puint_to_uscaled_mask |
11735                            save_puint_to_sscaled_mask);
11736 
11737    assert(emit->unit == PIPE_SHADER_VERTEX);
11738 
11739    if (adjust_mask) {
11740       struct tgsi_full_src_register one =
11741          make_immediate_reg_float(emit, 1.0f);
11742 
11743       struct tgsi_full_src_register one_int =
11744          make_immediate_reg_int(emit, 1);
11745 
11746       /* We need to turn off these bitmasks while emitting the
11747        * instructions below, then restore them afterward.
11748        */
11749       emit->key.vs.adjust_attrib_w_1 = 0;
11750       emit->key.vs.adjust_attrib_itof = 0;
11751       emit->key.vs.adjust_attrib_utof = 0;
11752       emit->key.vs.attrib_is_bgra = 0;
11753       emit->key.vs.attrib_puint_to_snorm = 0;
11754       emit->key.vs.attrib_puint_to_uscaled = 0;
11755       emit->key.vs.attrib_puint_to_sscaled = 0;
11756 
11757       while (adjust_mask) {
11758          unsigned index = u_bit_scan(&adjust_mask);
11759 
11760          /* skip the instruction if this vertex attribute is not being used */
11761          if (emit->info.input_usage_mask[index] == 0)
11762             continue;
11763 
11764          unsigned tmp = emit->vs.adjusted_input[index];
11765          struct tgsi_full_src_register input_src =
11766             make_src_reg(TGSI_FILE_INPUT, index);
11767 
11768          struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
11769          struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
11770          struct tgsi_full_dst_register tmp_dst_w =
11771             writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
11772 
11773          /* ITOF/UTOF/MOV tmp, input[index] */
11774          if (save_itof_mask & (1 << index)) {
11775             emit_instruction_op1(emit, VGPU10_OPCODE_ITOF,
11776                                  &tmp_dst, &input_src);
11777          }
11778          else if (save_utof_mask & (1 << index)) {
11779             emit_instruction_op1(emit, VGPU10_OPCODE_UTOF,
11780                                  &tmp_dst, &input_src);
11781          }
11782          else if (save_puint_to_snorm_mask & (1 << index)) {
11783             emit_puint_to_snorm(emit, &tmp_dst, &input_src);
11784          }
11785          else if (save_puint_to_uscaled_mask & (1 << index)) {
11786             emit_puint_to_uscaled(emit, &tmp_dst, &input_src);
11787          }
11788          else if (save_puint_to_sscaled_mask & (1 << index)) {
11789             emit_puint_to_sscaled(emit, &tmp_dst, &input_src);
11790          }
11791          else {
11792             assert((save_w_1_mask | save_is_bgra_mask) & (1 << index));
11793             emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
11794                                  &tmp_dst, &input_src);
11795          }
11796 
11797          if (save_is_bgra_mask & (1 << index)) {
11798             emit_swap_r_b(emit, &tmp_dst, &tmp_src);
11799          }
11800 
11801          if (save_w_1_mask & (1 << index)) {
11802             /* MOV tmp.w, 1.0 */
11803             if (emit->key.vs.attrib_is_pure_int & (1 << index)) {
11804                emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
11805                                     &tmp_dst_w, &one_int);
11806             }
11807             else {
11808                emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
11809                                     &tmp_dst_w, &one);
11810             }
11811          }
11812       }
11813 
11814       emit->key.vs.adjust_attrib_w_1 = save_w_1_mask;
11815       emit->key.vs.adjust_attrib_itof = save_itof_mask;
11816       emit->key.vs.adjust_attrib_utof = save_utof_mask;
11817       emit->key.vs.attrib_is_bgra = save_is_bgra_mask;
11818       emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask;
11819       emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask;
11820       emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask;
11821    }
11822 }
11823 
11824 
11825 /* Find zero-value immedate for default layer index */
11826 static void
emit_default_layer_instructions(struct svga_shader_emitter_v10 * emit)11827 emit_default_layer_instructions(struct svga_shader_emitter_v10 *emit)
11828 {
11829    assert(emit->unit == PIPE_SHADER_FRAGMENT);
11830 
11831    /* immediate for default layer index 0 */
11832    if (emit->fs.layer_input_index != INVALID_INDEX) {
11833       union tgsi_immediate_data imm;
11834       imm.Int = 0;
11835       emit->fs.layer_imm_index = find_immediate(emit, imm, 0);
11836    }
11837 }
11838 
11839 
11840 static void
emit_temp_prescale_from_cbuf(struct svga_shader_emitter_v10 * emit,unsigned cbuf_index,struct tgsi_full_dst_register * scale,struct tgsi_full_dst_register * translate)11841 emit_temp_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit,
11842                              unsigned cbuf_index,
11843                              struct tgsi_full_dst_register *scale,
11844                              struct tgsi_full_dst_register *translate)
11845 {
11846    struct tgsi_full_src_register scale_cbuf = make_src_const_reg(cbuf_index);
11847    struct tgsi_full_src_register trans_cbuf = make_src_const_reg(cbuf_index+1);
11848 
11849    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, scale, &scale_cbuf);
11850    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, translate, &trans_cbuf);
11851 }
11852 
11853 
11854 /**
11855  * A recursive helper function to find the prescale from the constant buffer
11856  */
11857 static void
find_prescale_from_cbuf(struct svga_shader_emitter_v10 * emit,unsigned index,unsigned num_prescale,struct tgsi_full_src_register * vp_index,struct tgsi_full_dst_register * scale,struct tgsi_full_dst_register * translate,struct tgsi_full_src_register * tmp_src,struct tgsi_full_dst_register * tmp_dst)11858 find_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit,
11859                         unsigned index, unsigned num_prescale,
11860                         struct tgsi_full_src_register *vp_index,
11861                         struct tgsi_full_dst_register *scale,
11862                         struct tgsi_full_dst_register *translate,
11863                         struct tgsi_full_src_register *tmp_src,
11864                         struct tgsi_full_dst_register *tmp_dst)
11865 {
11866    if (num_prescale == 0)
11867       return;
11868 
11869    if (index > 0) {
11870       /* ELSE */
11871       emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
11872    }
11873 
11874    struct tgsi_full_src_register index_src =
11875 	                            make_immediate_reg_int(emit, index);
11876 
11877    if (index == 0) {
11878       /* GE tmp, vp_index, index */
11879       emit_instruction_op2(emit, VGPU10_OPCODE_GE, tmp_dst,
11880                            vp_index, &index_src);
11881    } else {
11882       /* EQ tmp, vp_index, index */
11883       emit_instruction_op2(emit, VGPU10_OPCODE_EQ, tmp_dst,
11884                            vp_index, &index_src);
11885    }
11886 
11887    /* IF tmp */
11888    emit_if(emit, tmp_src);
11889    emit_temp_prescale_from_cbuf(emit,
11890                                 emit->vposition.prescale_cbuf_index + 2 * index,
11891                                 scale, translate);
11892 
11893    find_prescale_from_cbuf(emit, index+1, num_prescale-1,
11894                            vp_index, scale, translate,
11895                            tmp_src, tmp_dst);
11896 
11897    /* ENDIF */
11898    emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
11899 }
11900 
11901 
11902 /**
11903  * This helper function emits instructions to set the prescale
11904  * and translate temporaries to the correct constants from the
11905  * constant buffer according to the designated viewport.
11906  */
11907 static void
emit_temp_prescale_instructions(struct svga_shader_emitter_v10 * emit)11908 emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit)
11909 {
11910    struct tgsi_full_dst_register prescale_scale =
11911          make_dst_temp_reg(emit->vposition.prescale_scale_index);
11912    struct tgsi_full_dst_register prescale_translate =
11913          make_dst_temp_reg(emit->vposition.prescale_trans_index);
11914 
11915    unsigned prescale_cbuf_index = emit->vposition.prescale_cbuf_index;
11916 
11917    if (emit->vposition.num_prescale == 1) {
11918       emit_temp_prescale_from_cbuf(emit,
11919                                    prescale_cbuf_index,
11920                                    &prescale_scale, &prescale_translate);
11921    } else {
11922       /**
11923        * Since SM5 device does not support dynamic indexing, we need
11924        * to do the if-else to find the prescale constants for the
11925        * specified viewport.
11926        */
11927       struct tgsi_full_src_register vp_index_src =
11928          make_src_temp_reg(emit->gs.viewport_index_tmp_index);
11929 
11930       struct tgsi_full_src_register vp_index_src_x =
11931          scalar_src(&vp_index_src, TGSI_SWIZZLE_X);
11932 
11933       unsigned tmp = get_temp_index(emit);
11934       struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
11935       struct tgsi_full_src_register tmp_src_x =
11936                 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
11937       struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
11938 
11939       find_prescale_from_cbuf(emit, 0, emit->vposition.num_prescale,
11940                               &vp_index_src_x,
11941 		              &prescale_scale, &prescale_translate,
11942                               &tmp_src_x, &tmp_dst);
11943    }
11944 
11945    /* Mark prescale temporaries are emitted */
11946    emit->vposition.have_prescale = 1;
11947 }
11948 
11949 
11950 /**
11951  * A helper function to emit an instruction in a vertex shader to add a bias
11952  * to the VertexID system value. This patches the VertexID in the SVGA vertex
11953  * shader to include the base vertex of an indexed primitive or the start index
11954  * of a non-indexed primitive.
11955  */
11956 static void
emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10 * emit)11957 emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10 *emit)
11958 {
11959    struct tgsi_full_src_register vertex_id_bias_index =
11960       make_src_const_reg(emit->vs.vertex_id_bias_index);
11961    struct tgsi_full_src_register vertex_id_sys_src =
11962       make_src_reg(TGSI_FILE_SYSTEM_VALUE, emit->vs.vertex_id_sys_index);
11963    struct tgsi_full_src_register vertex_id_sys_src_x =
11964       scalar_src(&vertex_id_sys_src, TGSI_SWIZZLE_X);
11965    struct tgsi_full_dst_register vertex_id_tmp_dst =
11966       make_dst_temp_reg(emit->vs.vertex_id_tmp_index);
11967 
11968    /* IADD vertex_id_tmp, vertex_id_sys, vertex_id_bias */
11969    unsigned vertex_id_tmp_index = emit->vs.vertex_id_tmp_index;
11970    emit->vs.vertex_id_tmp_index = INVALID_INDEX;
11971    emit_instruction_opn(emit, VGPU10_OPCODE_IADD, &vertex_id_tmp_dst,
11972                         &vertex_id_sys_src_x, &vertex_id_bias_index, NULL, false,
11973                         false);
11974    emit->vs.vertex_id_tmp_index = vertex_id_tmp_index;
11975 }
11976 
11977 /**
11978  * Hull Shader must have control point outputs. But tessellation
11979  * control shader can return without writing to control point output.
11980  * In this case, the control point output is assumed to be passthrough
11981  * from the control point input.
11982  * This helper function is to write out a control point output first in case
11983  * the tessellation control shader returns before writing a
11984  * control point output.
11985  */
11986 static void
emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 * emit)11987 emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 *emit)
11988 {
11989    assert(emit->unit == PIPE_SHADER_TESS_CTRL);
11990    assert(emit->tcs.control_point_phase);
11991    assert(emit->tcs.control_point_out_index != INVALID_INDEX);
11992    assert(emit->tcs.invocation_id_sys_index != INVALID_INDEX);
11993 
11994    struct tgsi_full_dst_register output_control_point;
11995    output_control_point =
11996       make_dst_output_reg(emit->tcs.control_point_out_index);
11997 
11998    if (emit->tcs.control_point_input_index == INVALID_INDEX) {
11999       /* MOV OUTPUT 0.0f */
12000       struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
12001       begin_emit_instruction(emit);
12002       emit_opcode_precise(emit, VGPU10_OPCODE_MOV, false, false);
12003       emit_dst_register(emit, &output_control_point);
12004       emit_src_register(emit, &zero);
12005       end_emit_instruction(emit);
12006    }
12007    else {
12008       /* UARL ADDR[INDEX].x INVOCATION.xxxx */
12009 
12010       struct tgsi_full_src_register invocation_src;
12011       struct tgsi_full_dst_register addr_dst;
12012       struct tgsi_full_dst_register addr_dst_x;
12013       unsigned addr_tmp;
12014 
12015       addr_tmp = emit->address_reg_index[emit->tcs.control_point_addr_index];
12016       addr_dst = make_dst_temp_reg(addr_tmp);
12017       addr_dst_x = writemask_dst(&addr_dst, TGSI_WRITEMASK_X);
12018 
12019       invocation_src = make_src_reg(TGSI_FILE_SYSTEM_VALUE,
12020                                     emit->tcs.invocation_id_sys_index);
12021 
12022       begin_emit_instruction(emit);
12023       emit_opcode_precise(emit, VGPU10_OPCODE_MOV, false, false);
12024       emit_dst_register(emit, &addr_dst_x);
12025       emit_src_register(emit, &invocation_src);
12026       end_emit_instruction(emit);
12027 
12028 
12029       /* MOV OUTPUT INPUT[ADDR[INDEX].x][POSITION] */
12030 
12031       struct tgsi_full_src_register input_control_point;
12032       input_control_point = make_src_reg(TGSI_FILE_INPUT,
12033                                          emit->tcs.control_point_input_index);
12034       input_control_point.Register.Dimension = 1;
12035       input_control_point.Dimension.Indirect = 1;
12036       input_control_point.DimIndirect.File = TGSI_FILE_ADDRESS;
12037       input_control_point.DimIndirect.Index =
12038          emit->tcs.control_point_addr_index;
12039 
12040       begin_emit_instruction(emit);
12041       emit_opcode_precise(emit, VGPU10_OPCODE_MOV, false, false);
12042       emit_dst_register(emit, &output_control_point);
12043       emit_src_register(emit, &input_control_point);
12044       end_emit_instruction(emit);
12045    }
12046 }
12047 
12048 /**
12049  * This functions constructs temporary tessfactor from VGPU10*_TESSFACTOR
12050  * values in domain shader. SM5 has tessfactors as floating point values where
12051  * as tgsi emit them as vector. This function allows to construct temp
12052  * tessfactor vector similar to TGSI_SEMANTIC_TESSINNER/OUTER filled with
12053  * values from VGPU10*_TESSFACTOR. Use this constructed vector whenever
12054  * TGSI_SEMANTIC_TESSINNER/OUTER is used in shader.
12055  */
12056 static void
emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 * emit)12057 emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 *emit)
12058 {
12059    struct tgsi_full_src_register src;
12060    struct tgsi_full_dst_register dst;
12061 
12062    if (emit->tes.inner.tgsi_index != INVALID_INDEX) {
12063       dst = make_dst_temp_reg(emit->tes.inner.temp_index);
12064 
12065       switch (emit->tes.prim_mode) {
12066       case MESA_PRIM_QUADS:
12067          src = make_src_scalar_reg(TGSI_FILE_INPUT,
12068                   emit->tes.inner.in_index + 1, TGSI_SWIZZLE_X);
12069          dst = writemask_dst(&dst, TGSI_WRITEMASK_Y);
12070          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12071          FALLTHROUGH;
12072       case MESA_PRIM_TRIANGLES:
12073          src = make_src_scalar_reg(TGSI_FILE_INPUT,
12074                   emit->tes.inner.in_index, TGSI_SWIZZLE_X);
12075          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
12076          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12077          break;
12078       case MESA_PRIM_LINES:
12079          /**
12080           * As per SM5 spec, InsideTessFactor for isolines are unused.
12081           * In fact glsl tessInnerLevel for isolines doesn't mean anything but if
12082           * any application try to read tessInnerLevel in TES when primitive type
12083           * is isolines, then instead of driver throwing segfault for accesing it,
12084           * return atleast vec(1.0f)
12085           */
12086          src = make_immediate_reg_float(emit, 1.0f);
12087          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12088          break;
12089       default:
12090          break;
12091       }
12092    }
12093 
12094    if (emit->tes.outer.tgsi_index != INVALID_INDEX) {
12095       dst = make_dst_temp_reg(emit->tes.outer.temp_index);
12096 
12097       switch (emit->tes.prim_mode) {
12098       case MESA_PRIM_QUADS:
12099          src = make_src_scalar_reg(TGSI_FILE_INPUT,
12100                   emit->tes.outer.in_index + 3, TGSI_SWIZZLE_X);
12101          dst = writemask_dst(&dst, TGSI_WRITEMASK_W);
12102          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12103          FALLTHROUGH;
12104       case MESA_PRIM_TRIANGLES:
12105          src = make_src_scalar_reg(TGSI_FILE_INPUT,
12106                   emit->tes.outer.in_index + 2, TGSI_SWIZZLE_X);
12107          dst = writemask_dst(&dst, TGSI_WRITEMASK_Z);
12108          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12109          FALLTHROUGH;
12110       case MESA_PRIM_LINES:
12111          src = make_src_scalar_reg(TGSI_FILE_INPUT,
12112                   emit->tes.outer.in_index + 1, TGSI_SWIZZLE_X);
12113          dst = writemask_dst(&dst, TGSI_WRITEMASK_Y);
12114          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12115 
12116          src = make_src_scalar_reg(TGSI_FILE_INPUT,
12117                   emit->tes.outer.in_index , TGSI_SWIZZLE_X);
12118          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
12119          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12120 
12121          break;
12122       default:
12123          break;
12124       }
12125    }
12126 }
12127 
12128 
12129 static void
emit_initialize_temp_instruction(struct svga_shader_emitter_v10 * emit)12130 emit_initialize_temp_instruction(struct svga_shader_emitter_v10 *emit)
12131 {
12132    struct tgsi_full_src_register src;
12133    struct tgsi_full_dst_register dst;
12134    unsigned vgpu10_temp_index = remap_temp_index(emit, TGSI_FILE_TEMPORARY,
12135                                                  emit->initialize_temp_index);
12136    src = make_immediate_reg_float(emit, 0.0f);
12137    dst = make_dst_temp_reg(vgpu10_temp_index);
12138    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12139    emit->temp_map[emit->initialize_temp_index].initialized = true;
12140    emit->initialize_temp_index = INVALID_INDEX;
12141 }
12142 
12143 
12144 /**
12145  * Emit any extra/helper declarations/code that we might need between
12146  * the declaration section and code section.
12147  */
12148 static bool
emit_pre_helpers(struct svga_shader_emitter_v10 * emit)12149 emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
12150 {
12151    /* Properties */
12152    if (emit->unit == PIPE_SHADER_GEOMETRY)
12153       emit_property_instructions(emit);
12154    else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
12155       emit_hull_shader_declarations(emit);
12156 
12157       /* Save the position of the first instruction token so that we can
12158        * do a second pass of the instructions for the patch constant phase.
12159        */
12160       emit->tcs.instruction_token_pos = emit->cur_tgsi_token;
12161       emit->tcs.fork_phase_add_signature = false;
12162 
12163       if (!emit_hull_shader_control_point_phase(emit)) {
12164          emit->skip_instruction = true;
12165          return true;
12166       }
12167 
12168       /* Set the current tcs phase to control point phase */
12169       emit->tcs.control_point_phase = true;
12170    }
12171    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
12172       emit_domain_shader_declarations(emit);
12173    }
12174    else if (emit->unit == PIPE_SHADER_COMPUTE) {
12175       emit_compute_shader_declarations(emit);
12176    }
12177 
12178    /* Declare inputs */
12179    if (!emit_input_declarations(emit))
12180       return false;
12181 
12182    /* Declare outputs */
12183    if (!emit_output_declarations(emit))
12184       return false;
12185 
12186    /* Declare temporary registers */
12187    emit_temporaries_declaration(emit);
12188 
12189    /* For PIPE_SHADER_TESS_CTRL, constants, samplers, resources and immediates
12190     * will already be declared in hs_decls (emit_hull_shader_declarations)
12191     */
12192    if (emit->unit != PIPE_SHADER_TESS_CTRL) {
12193 
12194       alloc_common_immediates(emit);
12195 
12196       /* Declare constant registers */
12197       emit_constant_declaration(emit);
12198 
12199       /* Declare samplers and resources */
12200       emit_sampler_declarations(emit);
12201       emit_resource_declarations(emit);
12202 
12203       /* Declare images */
12204       emit_image_declarations(emit);
12205 
12206       /* Declare shader buffers */
12207       emit_shader_buf_declarations(emit);
12208 
12209       /* Declare atomic buffers */
12210       emit_atomic_buf_declarations(emit);
12211    }
12212 
12213    if (emit->unit != PIPE_SHADER_FRAGMENT &&
12214        emit->unit != PIPE_SHADER_COMPUTE) {
12215       /*
12216        * Declare clip distance output registers for ClipVertex or
12217        * user defined planes
12218        */
12219       emit_clip_distance_declarations(emit);
12220    }
12221 
12222    if (emit->unit == PIPE_SHADER_COMPUTE) {
12223       emit_memory_declarations(emit);
12224 
12225       if (emit->cs.grid_size.tgsi_index != INVALID_INDEX) {
12226          emit->cs.grid_size.imm_index =
12227             alloc_immediate_int4(emit,
12228                                  emit->key.cs.grid_size[0],
12229                                  emit->key.cs.grid_size[1],
12230                                  emit->key.cs.grid_size[2], 0);
12231       }
12232    }
12233 
12234    if (emit->unit == PIPE_SHADER_FRAGMENT &&
12235        emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
12236       float alpha = emit->key.fs.alpha_ref;
12237       emit->fs.alpha_ref_index =
12238          alloc_immediate_float4(emit, alpha, alpha, alpha, alpha);
12239    }
12240 
12241    if (emit->unit != PIPE_SHADER_TESS_CTRL) {
12242       /**
12243        * For PIPE_SHADER_TESS_CTRL, immediates are already declared in
12244        * hs_decls
12245        */
12246       emit_vgpu10_immediates_block(emit);
12247    }
12248    else {
12249       emit_tcs_default_control_point_output(emit);
12250    }
12251 
12252    if (emit->unit == PIPE_SHADER_FRAGMENT) {
12253       emit_frontface_instructions(emit);
12254       emit_fragcoord_instructions(emit);
12255       emit_sample_position_instructions(emit);
12256       emit_default_layer_instructions(emit);
12257    }
12258    else if (emit->unit == PIPE_SHADER_VERTEX) {
12259       emit_vertex_attrib_instructions(emit);
12260 
12261       if (emit->info.uses_vertexid)
12262          emit_vertex_id_nobase_instruction(emit);
12263    }
12264    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
12265       emit_temp_tessfactor_instructions(emit);
12266    }
12267 
12268    /**
12269     * For geometry shader that writes to viewport index, the prescale
12270     * temporaries will be done at the first vertex emission.
12271     */
12272    if (emit->vposition.need_prescale && emit->vposition.num_prescale == 1)
12273       emit_temp_prescale_instructions(emit);
12274 
12275    return true;
12276 }
12277 
12278 
12279 /**
12280  * The device has no direct support for the pipe_blend_state::alpha_to_one
12281  * option so we implement it here with shader code.
12282  *
12283  * Note that this is kind of pointless, actually.  Here we're clobbering
12284  * the alpha value with 1.0.  So if alpha-to-coverage is enabled, we'll wind
12285  * up with 100% coverage.  That's almost certainly not what the user wants.
12286  * The work-around is to add extra shader code to compute coverage from alpha
12287  * and write it to the coverage output register (if the user's shader doesn't
12288  * do so already).  We'll probably do that in the future.
12289  */
12290 static void
emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)12291 emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 *emit,
12292                                unsigned fs_color_tmp_index)
12293 {
12294    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
12295    unsigned i;
12296 
12297    /* Note: it's not 100% clear from the spec if we're supposed to clobber
12298     * the alpha for all render targets.  But that's what NVIDIA does and
12299     * that's what Piglit tests.
12300     */
12301    for (i = 0; i < emit->fs.num_color_outputs; i++) {
12302       struct tgsi_full_dst_register color_dst;
12303 
12304       if (fs_color_tmp_index != INVALID_INDEX && i == 0) {
12305          /* write to the temp color register */
12306          color_dst = make_dst_temp_reg(fs_color_tmp_index);
12307       }
12308       else {
12309          /* write directly to the color[i] output */
12310          color_dst = make_dst_output_reg(emit->fs.color_out_index[i]);
12311       }
12312 
12313       color_dst = writemask_dst(&color_dst, TGSI_WRITEMASK_W);
12314 
12315       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &one);
12316    }
12317 }
12318 
12319 
12320 /**
12321  * Emit alpha test code.  This compares TEMP[fs_color_tmp_index].w
12322  * against the alpha reference value and discards the fragment if the
12323  * comparison fails.
12324  */
12325 static void
emit_alpha_test_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)12326 emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
12327                              unsigned fs_color_tmp_index)
12328 {
12329    /* compare output color's alpha to alpha ref and discard if comparison
12330     * fails.
12331     */
12332    unsigned tmp = get_temp_index(emit);
12333    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
12334    struct tgsi_full_src_register tmp_src_x =
12335       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
12336    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
12337    struct tgsi_full_src_register color_src =
12338       make_src_temp_reg(fs_color_tmp_index);
12339    struct tgsi_full_src_register color_src_w =
12340       scalar_src(&color_src, TGSI_SWIZZLE_W);
12341    struct tgsi_full_src_register ref_src =
12342       make_src_immediate_reg(emit->fs.alpha_ref_index);
12343    struct tgsi_full_dst_register color_dst =
12344       make_dst_output_reg(emit->fs.color_out_index[0]);
12345 
12346    assert(emit->unit == PIPE_SHADER_FRAGMENT);
12347 
12348    /* dst = src0 'alpha_func' src1 */
12349    emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst,
12350                    &color_src_w, &ref_src);
12351 
12352    /* DISCARD if dst.x == 0 */
12353    begin_emit_instruction(emit);
12354    emit_discard_opcode(emit, false);  /* discard if src0.x is zero */
12355    emit_src_register(emit, &tmp_src_x);
12356    end_emit_instruction(emit);
12357 
12358    /* If we don't need to broadcast the color below, emit the final color here.
12359     */
12360    if (emit->key.fs.write_color0_to_n_cbufs <= 1) {
12361       /* MOV output.color, tempcolor */
12362       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src);
12363    }
12364 
12365    free_temp_indexes(emit);
12366 }
12367 
12368 
12369 /**
12370  * Emit instructions for writing a single color output to multiple
12371  * color buffers.
12372  * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or
12373  * when key.fs.white_fragments is true).
12374  * property is set and the number of render targets is greater than one.
12375  * \param fs_color_tmp_index  index of the temp register that holds the
12376  *                            color to broadcast.
12377  */
12378 static void
emit_broadcast_color_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)12379 emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit,
12380                                  unsigned fs_color_tmp_index)
12381 {
12382    const unsigned n = emit->key.fs.write_color0_to_n_cbufs;
12383    unsigned i;
12384    struct tgsi_full_src_register color_src;
12385 
12386    if (emit->key.fs.white_fragments) {
12387       /* set all color outputs to white */
12388       color_src = make_immediate_reg_float(emit, 1.0f);
12389    }
12390    else {
12391       /* set all color outputs to TEMP[fs_color_tmp_index] */
12392       assert(fs_color_tmp_index != INVALID_INDEX);
12393       color_src = make_src_temp_reg(fs_color_tmp_index);
12394    }
12395 
12396    assert(emit->unit == PIPE_SHADER_FRAGMENT);
12397 
12398    for (i = 0; i < n; i++) {
12399       unsigned output_reg = emit->fs.color_out_index[i];
12400       struct tgsi_full_dst_register color_dst =
12401          make_dst_output_reg(output_reg);
12402 
12403       /* Fill in this semantic here since we'll use it later in
12404        * emit_dst_register().
12405        */
12406       emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR;
12407 
12408       /* MOV output.color[i], tempcolor */
12409       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src);
12410    }
12411 }
12412 
12413 
12414 /**
12415  * Emit extra helper code after the original shader code, but before the
12416  * last END/RET instruction.
12417  * For vertex shaders this means emitting the extra code to apply the
12418  * prescale scale/translation.
12419  */
12420 static bool
emit_post_helpers(struct svga_shader_emitter_v10 * emit)12421 emit_post_helpers(struct svga_shader_emitter_v10 *emit)
12422 {
12423    if (emit->unit == PIPE_SHADER_VERTEX) {
12424       emit_vertex_instructions(emit);
12425    }
12426    else if (emit->unit == PIPE_SHADER_FRAGMENT) {
12427       const unsigned fs_color_tmp_index = emit->fs.color_tmp_index;
12428 
12429       assert(!(emit->key.fs.white_fragments &&
12430                emit->key.fs.write_color0_to_n_cbufs == 0));
12431 
12432       /* We no longer want emit_dst_register() to substitute the
12433        * temporary fragment color register for the real color output.
12434        */
12435       emit->fs.color_tmp_index = INVALID_INDEX;
12436 
12437       if (emit->key.fs.alpha_to_one) {
12438          emit_alpha_to_one_instructions(emit, fs_color_tmp_index);
12439       }
12440       if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
12441          emit_alpha_test_instructions(emit, fs_color_tmp_index);
12442       }
12443       if (emit->key.fs.write_color0_to_n_cbufs > 1 ||
12444           emit->key.fs.white_fragments) {
12445          emit_broadcast_color_instructions(emit, fs_color_tmp_index);
12446       }
12447    }
12448    else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
12449       if (!emit->tcs.control_point_phase) {
12450          /* store the tessellation levels in the patch constant phase only */
12451          store_tesslevels(emit);
12452       }
12453       else {
12454          emit_clipping_instructions(emit);
12455       }
12456    }
12457    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
12458       emit_vertex_instructions(emit);
12459    }
12460 
12461    return true;
12462 }
12463 
12464 
12465 /**
12466  * Reemit rawbuf instruction
12467  */
12468 static bool
emit_rawbuf_instruction(struct svga_shader_emitter_v10 * emit,unsigned inst_number,const struct tgsi_full_instruction * inst)12469 emit_rawbuf_instruction(struct svga_shader_emitter_v10 *emit,
12470                         unsigned inst_number,
12471                         const struct tgsi_full_instruction *inst)
12472 {
12473    bool ret;
12474 
12475    /* For all the rawbuf references in this instruction,
12476     * load the rawbuf reference and assign to the designated temporary.
12477     * Then reeemit the instruction.
12478     */
12479    emit->reemit_rawbuf_instruction = REEMIT_IN_PROGRESS;
12480 
12481    unsigned offset_tmp = get_temp_index(emit);
12482    struct tgsi_full_dst_register offset_dst = make_dst_temp_reg(offset_tmp);
12483    struct tgsi_full_src_register offset_src = make_src_temp_reg(offset_tmp);
12484    struct tgsi_full_src_register four = make_immediate_reg_int(emit, 4);
12485 
12486    for (unsigned i = 0; i < emit->raw_buf_cur_tmp_index; i++) {
12487       struct tgsi_full_src_register element_src;
12488 
12489       /* First get the element index register. */
12490 
12491       if (emit->raw_buf_tmp[i].indirect) {
12492          unsigned tmp = get_temp_index(emit);
12493          struct tgsi_full_dst_register element_dst = make_dst_temp_reg(tmp);
12494          struct tgsi_full_src_register element_index =
12495             make_src_temp_reg(emit->raw_buf_tmp[i].element_index);
12496          struct tgsi_full_src_register element_rel =
12497             make_immediate_reg_int(emit, emit->raw_buf_tmp[i].element_rel);
12498 
12499          element_src = make_src_temp_reg(tmp);
12500          element_src = scalar_src(&element_src, TGSI_SWIZZLE_X);
12501          element_dst = writemask_dst(&element_dst, TGSI_WRITEMASK_X);
12502 
12503          /* element index from the indirect register */
12504          element_index = make_src_temp_reg(emit->raw_buf_tmp[i].element_index);
12505          element_index = scalar_src(&element_index, TGSI_SWIZZLE_X);
12506 
12507          /* IADD element_src element_index element_index_relative */
12508          emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &element_dst,
12509                               &element_index, &element_rel);
12510       }
12511       else {
12512          unsigned element_index = emit->raw_buf_tmp[i].element_index;
12513          union tgsi_immediate_data imm;
12514          imm.Int = element_index;
12515          int immpos = find_immediate(emit, imm, 0);
12516          if (immpos < 0) {
12517             UNUSED unsigned element_index_imm =
12518 		                add_immediate_int(emit, element_index);
12519          }
12520          element_src = make_immediate_reg_int(emit, element_index);
12521       }
12522 
12523       /* byte offset = element index << 4 */
12524       emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &offset_dst,
12525                            &element_src, &four);
12526 
12527       struct tgsi_full_dst_register dst_tmp =
12528          make_dst_temp_reg(i + emit->raw_buf_tmp_index);
12529 
12530       /* LD_RAW tmp, rawbuf byte offset, rawbuf */
12531 
12532       begin_emit_instruction(emit);
12533       emit_opcode(emit, VGPU10_OPCODE_LD_RAW, false);
12534       emit_dst_register(emit, &dst_tmp);
12535 
12536       struct tgsi_full_src_register offset_x =
12537             scalar_src(&offset_src, TGSI_SWIZZLE_X);
12538       emit_src_register(emit, &offset_x);
12539 
12540       emit_resource_register(emit,
12541          emit->raw_buf_tmp[i].buffer_index + emit->raw_buf_srv_start_index);
12542       end_emit_instruction(emit);
12543    }
12544 
12545    emit->raw_buf_cur_tmp_index = 0;
12546 
12547    ret = emit_vgpu10_instruction(emit, inst_number, inst);
12548 
12549    /* reset raw buf state */
12550    emit->raw_buf_cur_tmp_index = 0;
12551    emit->reemit_rawbuf_instruction = REEMIT_FALSE;
12552 
12553    free_temp_indexes(emit);
12554 
12555    return ret;
12556 }
12557 
12558 
12559 /**
12560  * Translate the TGSI tokens into VGPU10 tokens.
12561  */
12562 static bool
emit_vgpu10_instructions(struct svga_shader_emitter_v10 * emit,const struct tgsi_token * tokens)12563 emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit,
12564                          const struct tgsi_token *tokens)
12565 {
12566    struct tgsi_parse_context parse;
12567    bool ret = true;
12568    bool pre_helpers_emitted = false;
12569    unsigned inst_number = 0;
12570 
12571    tgsi_parse_init(&parse, tokens);
12572 
12573    while (!tgsi_parse_end_of_tokens(&parse)) {
12574 
12575       /* Save the current tgsi token starting position */
12576       emit->cur_tgsi_token = parse.Position;
12577 
12578       tgsi_parse_token(&parse);
12579 
12580       switch (parse.FullToken.Token.Type) {
12581       case TGSI_TOKEN_TYPE_IMMEDIATE:
12582          ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate);
12583          if (!ret)
12584             goto done;
12585          break;
12586 
12587       case TGSI_TOKEN_TYPE_DECLARATION:
12588          ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration);
12589          if (!ret)
12590             goto done;
12591          break;
12592 
12593       case TGSI_TOKEN_TYPE_INSTRUCTION:
12594          if (!pre_helpers_emitted) {
12595             ret = emit_pre_helpers(emit);
12596             if (!ret)
12597                goto done;
12598             pre_helpers_emitted = true;
12599          }
12600          ret = emit_vgpu10_instruction(emit, inst_number++,
12601                                        &parse.FullToken.FullInstruction);
12602 
12603          /* Usually this applies to TCS only. If shader is reading control
12604           * point outputs in control point phase, we should reemit all
12605           * instructions which are writting into control point output in
12606           * control phase to store results into temporaries.
12607           */
12608          if (emit->reemit_instruction) {
12609             assert(emit->unit == PIPE_SHADER_TESS_CTRL);
12610             ret = emit_vgpu10_instruction(emit, inst_number,
12611                                           &parse.FullToken.FullInstruction);
12612          }
12613          else if (emit->initialize_temp_index != INVALID_INDEX) {
12614             emit_initialize_temp_instruction(emit);
12615             emit->initialize_temp_index = INVALID_INDEX;
12616             ret = emit_vgpu10_instruction(emit, inst_number - 1,
12617                                           &parse.FullToken.FullInstruction);
12618          }
12619          else if (emit->reemit_rawbuf_instruction) {
12620             ret = emit_rawbuf_instruction(emit, inst_number - 1,
12621                                           &parse.FullToken.FullInstruction);
12622          }
12623 
12624          if (!ret)
12625             goto done;
12626          break;
12627 
12628       case TGSI_TOKEN_TYPE_PROPERTY:
12629          ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty);
12630          if (!ret)
12631             goto done;
12632          break;
12633 
12634       default:
12635          break;
12636       }
12637    }
12638 
12639    if (emit->unit == PIPE_SHADER_TESS_CTRL) {
12640       ret = emit_hull_shader_patch_constant_phase(emit, &parse);
12641    }
12642 
12643 done:
12644    tgsi_parse_free(&parse);
12645    return ret;
12646 }
12647 
12648 
12649 /**
12650  * Emit the first VGPU10 shader tokens.
12651  */
12652 static bool
emit_vgpu10_header(struct svga_shader_emitter_v10 * emit)12653 emit_vgpu10_header(struct svga_shader_emitter_v10 *emit)
12654 {
12655    VGPU10ProgramToken ptoken;
12656 
12657    /* First token: VGPU10ProgramToken  (version info, program type (VS,GS,PS)) */
12658 
12659    /* Maximum supported shader version is 50 */
12660    unsigned version = MIN2(emit->version, 50);
12661 
12662    ptoken.value = 0; /* init whole token to zero */
12663    ptoken.majorVersion = version / 10;
12664    ptoken.minorVersion = version % 10;
12665    ptoken.programType = translate_shader_type(emit->unit);
12666    if (!emit_dword(emit, ptoken.value))
12667       return false;
12668 
12669    /* Second token: total length of shader, in tokens.  We can't fill this
12670     * in until we're all done.  Emit zero for now.
12671     */
12672    if (!emit_dword(emit, 0))
12673       return false;
12674 
12675    if (emit->version >= 50) {
12676       VGPU10OpcodeToken0 token;
12677 
12678       if (emit->unit == PIPE_SHADER_TESS_CTRL) {
12679          /* For hull shader, we need to start the declarations phase first before
12680           * emitting any declarations including the global flags.
12681           */
12682          token.value = 0;
12683          token.opcodeType = VGPU10_OPCODE_HS_DECLS;
12684          begin_emit_instruction(emit);
12685          emit_dword(emit, token.value);
12686          end_emit_instruction(emit);
12687       }
12688 
12689       /* Emit global flags */
12690       token.value = 0;    /* init whole token to zero */
12691       token.opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
12692       token.enableDoublePrecisionFloatOps = 1;  /* set bit */
12693       token.instructionLength = 1;
12694       if (!emit_dword(emit, token.value))
12695          return false;
12696    }
12697 
12698    if (emit->version >= 40) {
12699       VGPU10OpcodeToken0 token;
12700 
12701       /* Reserved for global flag such as refactoringAllowed.
12702        * If the shader does not use the precise qualifier, we will set the
12703        * refactoringAllowed global flag; otherwise, we will leave the reserved
12704        * token to NOP.
12705        */
12706       emit->reserved_token = (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0);
12707       token.value = 0;
12708       token.opcodeType = VGPU10_OPCODE_NOP;
12709       token.instructionLength = 1;
12710       if (!emit_dword(emit, token.value))
12711          return false;
12712    }
12713 
12714    return true;
12715 }
12716 
12717 
12718 static bool
emit_vgpu10_tail(struct svga_shader_emitter_v10 * emit)12719 emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit)
12720 {
12721    VGPU10ProgramToken *tokens;
12722 
12723    /* Replace the second token with total shader length */
12724    tokens = (VGPU10ProgramToken *) emit->buf;
12725    tokens[1].value = emit_get_num_tokens(emit);
12726 
12727    if (emit->version >= 40 && !emit->uses_precise_qualifier) {
12728       /* Replace the reserved token with the RefactoringAllowed global flag */
12729       VGPU10OpcodeToken0 *ptoken;
12730 
12731       ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token];
12732       assert(ptoken->opcodeType == VGPU10_OPCODE_NOP);
12733       ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
12734       ptoken->refactoringAllowed = 1;
12735    }
12736 
12737    if (emit->version >= 50 && emit->fs.forceEarlyDepthStencil) {
12738       /* Replace the reserved token with the forceEarlyDepthStencil  global flag */
12739       VGPU10OpcodeToken0 *ptoken;
12740 
12741       ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token];
12742       ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
12743       ptoken->forceEarlyDepthStencil = 1;
12744    }
12745 
12746    return true;
12747 }
12748 
12749 
12750 /**
12751  * Modify the FS to read the BCOLORs and use the FACE register
12752  * to choose between the front/back colors.
12753  */
12754 static const struct tgsi_token *
transform_fs_twoside(const struct tgsi_token * tokens)12755 transform_fs_twoside(const struct tgsi_token *tokens)
12756 {
12757    if (0) {
12758       debug_printf("Before tgsi_add_two_side ------------------\n");
12759       tgsi_dump(tokens,0);
12760    }
12761    tokens = tgsi_add_two_side(tokens);
12762    if (0) {
12763       debug_printf("After tgsi_add_two_side ------------------\n");
12764       tgsi_dump(tokens, 0);
12765    }
12766    return tokens;
12767 }
12768 
12769 
12770 /**
12771  * Modify the FS to do polygon stipple.
12772  */
12773 static const struct tgsi_token *
transform_fs_pstipple(struct svga_shader_emitter_v10 * emit,const struct tgsi_token * tokens)12774 transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
12775                       const struct tgsi_token *tokens)
12776 {
12777    const struct tgsi_token *new_tokens;
12778    unsigned unit;
12779 
12780    if (0) {
12781       debug_printf("Before pstipple ------------------\n");
12782       tgsi_dump(tokens,0);
12783    }
12784 
12785    new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
12786                                                      TGSI_FILE_INPUT);
12787 
12788    emit->fs.pstipple_sampler_unit = unit;
12789 
12790    /* The new sampler state is appended to the end of the samplers list */
12791    emit->fs.pstipple_sampler_state_index = emit->key.num_samplers++;
12792 
12793    /* Setup texture state for stipple */
12794    emit->sampler_target[unit] = TGSI_TEXTURE_2D;
12795    emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
12796    emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
12797    emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
12798    emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
12799    emit->key.tex[unit].target = PIPE_TEXTURE_2D;
12800    emit->key.tex[unit].sampler_index = emit->fs.pstipple_sampler_state_index;
12801 
12802    if (0) {
12803       debug_printf("After pstipple ------------------\n");
12804       tgsi_dump(new_tokens, 0);
12805    }
12806 
12807    return new_tokens;
12808 }
12809 
12810 /**
12811  * Modify the FS to support anti-aliasing point.
12812  */
12813 static const struct tgsi_token *
transform_fs_aapoint(struct svga_context * svga,const struct tgsi_token * tokens,int aa_coord_index)12814 transform_fs_aapoint(struct svga_context *svga,
12815 		     const struct tgsi_token *tokens,
12816                      int aa_coord_index)
12817 {
12818    bool need_texcoord_semantic =
12819       svga->pipe.screen->get_param(svga->pipe.screen, PIPE_CAP_TGSI_TEXCOORD);
12820 
12821    if (0) {
12822       debug_printf("Before tgsi_add_aa_point ------------------\n");
12823       tgsi_dump(tokens,0);
12824    }
12825    tokens = tgsi_add_aa_point(tokens, aa_coord_index, need_texcoord_semantic);
12826    if (0) {
12827       debug_printf("After tgsi_add_aa_point ------------------\n");
12828       tgsi_dump(tokens, 0);
12829    }
12830    return tokens;
12831 }
12832 
12833 
12834 /**
12835  * A helper function to determine the shader in the previous stage and
12836  * then call the linker function to determine the input mapping for this
12837  * shader to match the output indices from the shader in the previous stage.
12838  */
12839 static void
compute_input_mapping(struct svga_context * svga,struct svga_shader_emitter_v10 * emit,enum pipe_shader_type unit)12840 compute_input_mapping(struct svga_context *svga,
12841                       struct svga_shader_emitter_v10 *emit,
12842                       enum pipe_shader_type unit)
12843 {
12844    struct svga_shader *prevShader = NULL;   /* shader in the previous stage */
12845 
12846    if (unit == PIPE_SHADER_FRAGMENT) {
12847       prevShader = svga->curr.gs ?
12848          &svga->curr.gs->base : (svga->curr.tes ?
12849          &svga->curr.tes->base : &svga->curr.vs->base);
12850    } else if (unit == PIPE_SHADER_GEOMETRY) {
12851       prevShader = svga->curr.tes ? &svga->curr.tes->base : &svga->curr.vs->base;
12852    } else if (unit == PIPE_SHADER_TESS_EVAL) {
12853       assert(svga->curr.tcs);
12854       prevShader = &svga->curr.tcs->base;
12855    } else if (unit == PIPE_SHADER_TESS_CTRL) {
12856       assert(svga->curr.vs);
12857       prevShader = &svga->curr.vs->base;
12858    }
12859 
12860    if (prevShader != NULL) {
12861       svga_link_shaders(&prevShader->tgsi_info, &emit->info, &emit->linkage);
12862       emit->prevShaderInfo = &prevShader->tgsi_info;
12863    }
12864    else {
12865       /**
12866        * Since vertex shader does not need to go through the linker to
12867        * establish the input map, we need to make sure the highest index
12868        * of input registers is set properly here.
12869        */
12870       emit->linkage.input_map_max = MAX2((int)emit->linkage.input_map_max,
12871                                          emit->info.file_max[TGSI_FILE_INPUT]);
12872    }
12873 }
12874 
12875 
12876 /**
12877  * Copies the shader signature info to the shader variant
12878  */
12879 static void
copy_shader_signature(struct svga_shader_signature * sgn,struct svga_shader_variant * variant)12880 copy_shader_signature(struct svga_shader_signature *sgn,
12881                       struct svga_shader_variant *variant)
12882 {
12883    SVGA3dDXShaderSignatureHeader *header = &sgn->header;
12884 
12885    /* Calculate the signature length */
12886    variant->signatureLen = sizeof(SVGA3dDXShaderSignatureHeader) +
12887                            (header->numInputSignatures +
12888                             header->numOutputSignatures +
12889                             header->numPatchConstantSignatures) *
12890                            sizeof(SVGA3dDXShaderSignatureEntry);
12891 
12892    /* Allocate buffer for the signature info */
12893    variant->signature =
12894       (SVGA3dDXShaderSignatureHeader *)CALLOC(1, variant->signatureLen);
12895 
12896    char *sgnBuf = (char *)variant->signature;
12897    unsigned sgnLen;
12898 
12899    /* Copy the signature info to the shader variant structure */
12900    memcpy(sgnBuf, &sgn->header, sizeof(SVGA3dDXShaderSignatureHeader));
12901    sgnBuf += sizeof(SVGA3dDXShaderSignatureHeader);
12902 
12903    if (header->numInputSignatures) {
12904       sgnLen =
12905          header->numInputSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
12906       memcpy(sgnBuf, &sgn->inputs[0], sgnLen);
12907       sgnBuf += sgnLen;
12908    }
12909 
12910    if (header->numOutputSignatures) {
12911       sgnLen =
12912          header->numOutputSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
12913       memcpy(sgnBuf, &sgn->outputs[0], sgnLen);
12914       sgnBuf += sgnLen;
12915    }
12916 
12917    if (header->numPatchConstantSignatures) {
12918       sgnLen =
12919          header->numPatchConstantSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
12920       memcpy(sgnBuf, &sgn->patchConstants[0], sgnLen);
12921    }
12922 }
12923 
12924 
12925 /**
12926  * This is the main entrypoint for the TGSI -> VPGU10 translator.
12927  */
12928 struct svga_shader_variant *
svga_tgsi_vgpu10_translate(struct svga_context * svga,const struct svga_shader * shader,const struct svga_compile_key * key,enum pipe_shader_type unit)12929 svga_tgsi_vgpu10_translate(struct svga_context *svga,
12930                            const struct svga_shader *shader,
12931                            const struct svga_compile_key *key,
12932                            enum pipe_shader_type unit)
12933 {
12934    struct svga_screen *svgascreen = svga_screen(svga->pipe.screen);
12935    struct svga_shader_variant *variant = NULL;
12936    struct svga_shader_emitter_v10 *emit;
12937    const struct tgsi_token *tokens = shader->tokens;
12938 
12939    (void) make_immediate_reg_double;   /* unused at this time */
12940 
12941    assert(unit == PIPE_SHADER_VERTEX ||
12942           unit == PIPE_SHADER_GEOMETRY ||
12943           unit == PIPE_SHADER_FRAGMENT ||
12944           unit == PIPE_SHADER_TESS_CTRL ||
12945           unit == PIPE_SHADER_TESS_EVAL ||
12946           unit == PIPE_SHADER_COMPUTE);
12947 
12948    /* These two flags cannot be used together */
12949    assert(key->vs.need_prescale + key->vs.undo_viewport <= 1);
12950 
12951    SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_TGSIVGPU10TRANSLATE);
12952    /*
12953     * Setup the code emitter
12954     */
12955    emit = alloc_emitter();
12956    if (!emit)
12957       goto done;
12958 
12959    emit->unit = unit;
12960    if (svga_have_gl43(svga)) {
12961       emit->version = 51;
12962    } else if (svga_have_sm5(svga)) {
12963       emit->version = 50;
12964    } else if (svga_have_sm4_1(svga)) {
12965       emit->version = 41;
12966    } else {
12967       emit->version = 40;
12968    }
12969 
12970    emit->use_sampler_state_mapping = emit->key.sampler_state_mapping;
12971 
12972    emit->signature.header.headerVersion = SVGADX_SIGNATURE_HEADER_VERSION_0;
12973 
12974    emit->key = *key;
12975 
12976    emit->vposition.need_prescale = (emit->key.vs.need_prescale ||
12977                                     emit->key.gs.need_prescale ||
12978                                     emit->key.tes.need_prescale);
12979 
12980    /* Determine how many prescale factors in the constant buffer */
12981    emit->vposition.num_prescale = 1;
12982    if (emit->vposition.need_prescale && emit->key.gs.writes_viewport_index) {
12983       assert(emit->unit == PIPE_SHADER_GEOMETRY);
12984       emit->vposition.num_prescale = emit->key.gs.num_prescale;
12985    }
12986 
12987    emit->vposition.tmp_index = INVALID_INDEX;
12988    emit->vposition.so_index = INVALID_INDEX;
12989    emit->vposition.out_index = INVALID_INDEX;
12990 
12991    emit->vs.vertex_id_sys_index = INVALID_INDEX;
12992    emit->vs.vertex_id_tmp_index = INVALID_INDEX;
12993    emit->vs.vertex_id_bias_index = INVALID_INDEX;
12994 
12995    emit->fs.color_tmp_index = INVALID_INDEX;
12996    emit->fs.face_input_index = INVALID_INDEX;
12997    emit->fs.fragcoord_input_index = INVALID_INDEX;
12998    emit->fs.sample_id_sys_index = INVALID_INDEX;
12999    emit->fs.sample_pos_sys_index = INVALID_INDEX;
13000    emit->fs.sample_mask_in_sys_index = INVALID_INDEX;
13001    emit->fs.layer_input_index = INVALID_INDEX;
13002    emit->fs.layer_imm_index = INVALID_INDEX;
13003 
13004    emit->gs.prim_id_index = INVALID_INDEX;
13005    emit->gs.invocation_id_sys_index = INVALID_INDEX;
13006    emit->gs.viewport_index_out_index = INVALID_INDEX;
13007    emit->gs.viewport_index_tmp_index = INVALID_INDEX;
13008 
13009    emit->tcs.vertices_per_patch_index = INVALID_INDEX;
13010    emit->tcs.invocation_id_sys_index = INVALID_INDEX;
13011    emit->tcs.control_point_input_index = INVALID_INDEX;
13012    emit->tcs.control_point_addr_index = INVALID_INDEX;
13013    emit->tcs.control_point_out_index = INVALID_INDEX;
13014    emit->tcs.control_point_tmp_index = INVALID_INDEX;
13015    emit->tcs.control_point_out_count = 0;
13016    emit->tcs.inner.out_index = INVALID_INDEX;
13017    emit->tcs.inner.temp_index = INVALID_INDEX;
13018    emit->tcs.inner.tgsi_index = INVALID_INDEX;
13019    emit->tcs.outer.out_index = INVALID_INDEX;
13020    emit->tcs.outer.temp_index = INVALID_INDEX;
13021    emit->tcs.outer.tgsi_index = INVALID_INDEX;
13022    emit->tcs.patch_generic_out_count = 0;
13023    emit->tcs.patch_generic_out_index = INVALID_INDEX;
13024    emit->tcs.patch_generic_tmp_index = INVALID_INDEX;
13025    emit->tcs.prim_id_index = INVALID_INDEX;
13026 
13027    emit->tes.tesscoord_sys_index = INVALID_INDEX;
13028    emit->tes.inner.in_index = INVALID_INDEX;
13029    emit->tes.inner.temp_index = INVALID_INDEX;
13030    emit->tes.inner.tgsi_index = INVALID_INDEX;
13031    emit->tes.outer.in_index = INVALID_INDEX;
13032    emit->tes.outer.temp_index = INVALID_INDEX;
13033    emit->tes.outer.tgsi_index = INVALID_INDEX;
13034    emit->tes.prim_id_index = INVALID_INDEX;
13035 
13036    emit->cs.thread_id_index = INVALID_INDEX;
13037    emit->cs.block_id_index = INVALID_INDEX;
13038    emit->cs.grid_size.tgsi_index = INVALID_INDEX;
13039    emit->cs.grid_size.imm_index = INVALID_INDEX;
13040    emit->cs.block_width = 1;
13041    emit->cs.block_height = 1;
13042    emit->cs.block_depth = 1;
13043 
13044    emit->clip_dist_out_index = INVALID_INDEX;
13045    emit->clip_dist_tmp_index = INVALID_INDEX;
13046    emit->clip_dist_so_index = INVALID_INDEX;
13047    emit->clip_vertex_out_index = INVALID_INDEX;
13048    emit->clip_vertex_tmp_index = INVALID_INDEX;
13049    emit->svga_debug_callback = svga->debug.callback;
13050 
13051    emit->index_range.start_index = INVALID_INDEX;
13052    emit->index_range.count = 0;
13053    emit->index_range.required = false;
13054    emit->index_range.operandType = VGPU10_NUM_OPERANDS;
13055    emit->index_range.dim = 0;
13056    emit->index_range.size = 0;
13057 
13058    emit->current_loop_depth = 0;
13059 
13060    emit->initialize_temp_index = INVALID_INDEX;
13061    emit->image_size_index = INVALID_INDEX;
13062 
13063    emit->max_vs_inputs  = svgascreen->max_vs_inputs;
13064    emit->max_vs_outputs = svgascreen->max_vs_outputs;
13065    emit->max_gs_inputs  = svgascreen->max_gs_inputs;
13066 
13067    if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) {
13068       emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS;
13069    }
13070 
13071    if (unit == PIPE_SHADER_FRAGMENT) {
13072       if (key->fs.light_twoside) {
13073          tokens = transform_fs_twoside(tokens);
13074       }
13075       if (key->fs.pstipple) {
13076          const struct tgsi_token *new_tokens =
13077             transform_fs_pstipple(emit, tokens);
13078          if (tokens != shader->tokens) {
13079             /* free the two-sided shader tokens */
13080             tgsi_free_tokens(tokens);
13081          }
13082          tokens = new_tokens;
13083       }
13084       if (key->fs.aa_point) {
13085          tokens = transform_fs_aapoint(svga, tokens,
13086 			               key->fs.aa_point_coord_index);
13087       }
13088    }
13089 
13090    if (SVGA_DEBUG & DEBUG_TGSI) {
13091       debug_printf("#####################################\n");
13092       debug_printf("### TGSI Shader %u\n", shader->id);
13093       tgsi_dump(tokens, 0);
13094    }
13095 
13096    /**
13097     * Rescan the header if the token string is different from the one
13098     * included in the shader; otherwise, the header info is already up-to-date
13099     */
13100    if (tokens != shader->tokens) {
13101       tgsi_scan_shader(tokens, &emit->info);
13102    } else {
13103       emit->info = shader->tgsi_info;
13104    }
13105 
13106    emit->num_outputs = emit->info.num_outputs;
13107 
13108    /**
13109     * Compute input mapping to match the outputs from shader
13110     * in the previous stage
13111     */
13112    compute_input_mapping(svga, emit, unit);
13113 
13114    determine_clipping_mode(emit);
13115 
13116    if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX ||
13117        unit == PIPE_SHADER_TESS_CTRL || unit == PIPE_SHADER_TESS_EVAL) {
13118       if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) {
13119          /* if there is stream output declarations associated
13120           * with this shader or the shader writes to ClipDistance
13121           * then reserve extra registers for the non-adjusted vertex position
13122           * and the ClipDistance shadow copy.
13123           */
13124          emit->vposition.so_index = emit->num_outputs++;
13125 
13126          if (emit->clip_mode == CLIP_DISTANCE) {
13127             emit->clip_dist_so_index = emit->num_outputs++;
13128             if (emit->info.num_written_clipdistance > 4)
13129                emit->num_outputs++;
13130          }
13131       }
13132    }
13133 
13134    /* Determine if constbuf to rawbuf translation is needed */
13135    emit->raw_buf_srv_start_index = emit->key.srv_raw_constbuf_index;
13136    if (emit->info.const_buffers_declared)
13137       emit->raw_bufs = emit->key.raw_constbufs;
13138 
13139    emit->raw_shaderbuf_srv_start_index = emit->key.srv_raw_shaderbuf_index;
13140    if (emit->info.shader_buffers_declared)
13141       emit->raw_shaderbufs = emit->key.raw_shaderbufs;
13142 
13143    /*
13144     * Do actual shader translation.
13145     */
13146    if (!emit_vgpu10_header(emit)) {
13147       debug_printf("svga: emit VGPU10 header failed\n");
13148       goto cleanup;
13149    }
13150 
13151    if (!emit_vgpu10_instructions(emit, tokens)) {
13152       debug_printf("svga: emit VGPU10 instructions failed\n");
13153       goto cleanup;
13154    }
13155 
13156    if (emit->num_new_immediates > 0) {
13157       reemit_immediates_block(emit);
13158    }
13159 
13160    if (!emit_vgpu10_tail(emit)) {
13161       debug_printf("svga: emit VGPU10 tail failed\n");
13162       goto cleanup;
13163    }
13164 
13165    if (emit->register_overflow) {
13166       goto cleanup;
13167    }
13168 
13169    /*
13170     * Create, initialize the 'variant' object.
13171     */
13172    variant = svga_new_shader_variant(svga, unit);
13173    if (!variant)
13174       goto cleanup;
13175 
13176    variant->shader = shader;
13177    variant->nr_tokens = emit_get_num_tokens(emit);
13178    variant->tokens = (const unsigned *)emit->buf;
13179 
13180    /* Copy shader signature info to the shader variant */
13181    if (svga_have_sm5(svga)) {
13182       copy_shader_signature(&emit->signature, variant);
13183    }
13184 
13185    emit->buf = NULL;  /* buffer is no longer owed by emitter context */
13186    memcpy(&variant->key, key, sizeof(*key));
13187    variant->id = UTIL_BITMASK_INVALID_INDEX;
13188 
13189    /* The extra constant starting offset starts with the number of
13190     * shader constants declared in the shader.
13191     */
13192    variant->extra_const_start = emit->num_shader_consts[0];
13193    if (key->gs.wide_point) {
13194       /**
13195        * The extra constant added in the transformed shader
13196        * for inverse viewport scale is to be supplied by the driver.
13197        * So the extra constant starting offset needs to be reduced by 1.
13198        */
13199       assert(variant->extra_const_start > 0);
13200       variant->extra_const_start--;
13201    }
13202 
13203    if (unit == PIPE_SHADER_FRAGMENT) {
13204       struct svga_fs_variant *fs_variant = svga_fs_variant(variant);
13205 
13206       fs_variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
13207       fs_variant->pstipple_sampler_state_index =
13208          emit->fs.pstipple_sampler_state_index;
13209 
13210       /* If there was exactly one write to a fragment shader output register
13211        * and it came from a constant buffer, we know all fragments will have
13212        * the same color (except for blending).
13213        */
13214       fs_variant->constant_color_output =
13215          emit->constant_color_output && emit->num_output_writes == 1;
13216 
13217       /** keep track in the variant if flat interpolation is used
13218        *  for any of the varyings.
13219        */
13220       fs_variant->uses_flat_interp = emit->uses_flat_interp;
13221 
13222       fs_variant->fs_shadow_compare_units = emit->shadow_compare_units;
13223    }
13224    else if (unit == PIPE_SHADER_TESS_EVAL) {
13225       struct svga_tes_variant *tes_variant = svga_tes_variant(variant);
13226 
13227       /* Keep track in the tes variant some of the layout parameters.
13228        * These parameters will be referenced by the tcs to emit
13229        * the necessary declarations for the hull shader.
13230        */
13231       tes_variant->prim_mode = emit->tes.prim_mode;
13232       tes_variant->spacing = emit->tes.spacing;
13233       tes_variant->vertices_order_cw = emit->tes.vertices_order_cw;
13234       tes_variant->point_mode = emit->tes.point_mode;
13235    }
13236 
13237 
13238    if (tokens != shader->tokens) {
13239       tgsi_free_tokens(tokens);
13240    }
13241 
13242 cleanup:
13243    free_emitter(emit);
13244 
13245 done:
13246    SVGA_STATS_TIME_POP(svga_sws(svga));
13247    return variant;
13248 }
13249