• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**********************************************************
2  * Copyright 1998-2022 VMware, Inc.  All rights reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person
5  * obtaining a copy of this software and associated documentation
6  * files (the "Software"), to deal in the Software without
7  * restriction, including without limitation the rights to use, copy,
8  * modify, merge, publish, distribute, sublicense, and/or sell copies
9  * of the Software, and to permit persons to whom the Software is
10  * furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be
13  * included in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  **********************************************************/
25 
26 /**
27  * @file svga_tgsi_vgpu10.c
28  *
29  * TGSI -> VGPU10 shader translation.
30  *
31  * \author Mingcheng Chen
32  * \author Brian Paul
33  */
34 
35 #include "pipe/p_compiler.h"
36 #include "pipe/p_shader_tokens.h"
37 #include "pipe/p_defines.h"
38 #include "tgsi/tgsi_build.h"
39 #include "tgsi/tgsi_dump.h"
40 #include "tgsi/tgsi_info.h"
41 #include "tgsi/tgsi_parse.h"
42 #include "tgsi/tgsi_scan.h"
43 #include "tgsi/tgsi_strings.h"
44 #include "tgsi/tgsi_two_side.h"
45 #include "tgsi/tgsi_aa_point.h"
46 #include "tgsi/tgsi_util.h"
47 #include "util/u_math.h"
48 #include "util/u_memory.h"
49 #include "util/u_bitmask.h"
50 #include "util/u_debug.h"
51 #include "util/u_pstipple.h"
52 
53 #include "svga_context.h"
54 #include "svga_debug.h"
55 #include "svga_link.h"
56 #include "svga_shader.h"
57 #include "svga_tgsi.h"
58 
59 #include "VGPU10ShaderTokens.h"
60 
61 
62 #define INVALID_INDEX 99999
63 #define MAX_INTERNAL_TEMPS 4
64 #define MAX_SYSTEM_VALUES 4
65 #define MAX_IMMEDIATE_COUNT \
66         (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4)
67 #define MAX_TEMP_ARRAYS 64  /* Enough? */
68 
69 /**
70  * Clipping is complicated.  There's four different cases which we
71  * handle during VS/GS shader translation:
72  */
73 enum clipping_mode
74 {
75    CLIP_NONE,     /**< No clipping enabled */
76    CLIP_LEGACY,   /**< The shader has no clipping declarations or code but
77                    * one or more user-defined clip planes are enabled.  We
78                    * generate extra code to emit clip distances.
79                    */
80    CLIP_DISTANCE, /**< The shader already declares clip distance output
81                    * registers and has code to write to them.
82                    */
83    CLIP_VERTEX    /**< The shader declares a clip vertex output register and
84                   * has code that writes to the register.  We convert the
85                   * clipvertex position into one or more clip distances.
86                   */
87 };
88 
89 
90 /* Shader signature info */
91 struct svga_shader_signature
92 {
93    SVGA3dDXShaderSignatureHeader header;
94    SVGA3dDXShaderSignatureEntry inputs[PIPE_MAX_SHADER_INPUTS];
95    SVGA3dDXShaderSignatureEntry outputs[PIPE_MAX_SHADER_OUTPUTS];
96    SVGA3dDXShaderSignatureEntry patchConstants[PIPE_MAX_SHADER_OUTPUTS];
97 };
98 
99 static inline void
set_shader_signature_entry(SVGA3dDXShaderSignatureEntry * e,unsigned index,SVGA3dDXSignatureSemanticName sgnName,unsigned mask,SVGA3dDXSignatureRegisterComponentType compType,SVGA3dDXSignatureMinPrecision minPrecision)100 set_shader_signature_entry(SVGA3dDXShaderSignatureEntry *e,
101                            unsigned index,
102                            SVGA3dDXSignatureSemanticName sgnName,
103                            unsigned mask,
104                            SVGA3dDXSignatureRegisterComponentType compType,
105                            SVGA3dDXSignatureMinPrecision minPrecision)
106 {
107    e->registerIndex = index;
108    e->semanticName = sgnName;
109    e->mask = mask;
110    e->componentType = compType;
111    e->minPrecision = minPrecision;
112 };
113 
114 static const SVGA3dDXSignatureSemanticName
115 tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_COUNT] = {
116    SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION,
117    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
118    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
119    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
120    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
121    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
122    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
123    SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE,
124    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
125    SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID,
126    SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID,
127    SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID,
128    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
129    SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE,
130    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
131    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
132    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
133    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
134    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
135    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
136    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
137    SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX,
138    SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX,
139    SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX,
140    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
141    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
142    SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID,
143    SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID,
144    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
145    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
146    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
147    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
148    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
149    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
150    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
151    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
152    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
153    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
154    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
155    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
156    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
157    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
158    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
159    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
160    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
161 };
162 
163 
164 /**
165  * Map tgsi semantic name to SVGA signature semantic name
166  */
167 static inline SVGA3dDXSignatureSemanticName
map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name)168 map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name)
169 {
170    assert(name < TGSI_SEMANTIC_COUNT);
171 
172    /* Do a few asserts here to spot check the mapping */
173    assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_PRIMID] ==
174           SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
175    assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_VIEWPORT_INDEX] ==
176           SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX);
177    assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_INVOCATIONID] ==
178           SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID);
179 
180    return tgsi_semantic_to_sgn_name[name];
181 }
182 
183 enum reemit_mode {
184    REEMIT_FALSE = 0,
185    REEMIT_TRUE = 1,
186    REEMIT_IN_PROGRESS = 2
187 };
188 
189 struct svga_raw_buf_tmp {
190    bool indirect;
191    unsigned buffer_index:8;
192    unsigned element_index:8;
193    unsigned element_rel:8;
194 };
195 
196 struct svga_shader_emitter_v10
197 {
198    /* The token output buffer */
199    unsigned size;
200    char *buf;
201    char *ptr;
202 
203    /* Information about the shader and state (does not change) */
204    struct svga_compile_key key;
205    struct tgsi_shader_info info;
206    unsigned unit;
207    unsigned version; /**< Either 40, 41, 50 or 51 at this time */
208 
209    unsigned cur_tgsi_token;     /**< current tgsi token position */
210    unsigned inst_start_token;
211    boolean discard_instruction; /**< throw away current instruction? */
212    boolean reemit_instruction;  /**< reemit current instruction */
213    boolean reemit_tgsi_instruction;  /**< reemit current tgsi instruction */
214    boolean skip_instruction;    /**< skip current instruction */
215    boolean use_sampler_state_mapping; /* use sampler state mapping */
216    enum reemit_mode reemit_rawbuf_instruction;
217 
218    union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4];
219    double (*immediates_dbl)[2];
220    unsigned num_immediates;      /**< Number of immediates emitted */
221    unsigned common_immediate_pos[20];  /**< literals for common immediates */
222    unsigned num_common_immediates;
223    boolean immediates_emitted;
224 
225    unsigned num_outputs;      /**< include any extra outputs */
226                               /**  The first extra output is reserved for
227                                *   non-adjusted vertex position for
228                                *   stream output purpose
229                                */
230 
231    /* Temporary Registers */
232    unsigned num_shader_temps; /**< num of temps used by original shader */
233    unsigned internal_temp_count;  /**< currently allocated internal temps */
234    struct {
235       unsigned start, size;
236    } temp_arrays[MAX_TEMP_ARRAYS];
237    unsigned num_temp_arrays;
238 
239    /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */
240    struct {
241       unsigned arrayId, index;
242       boolean initialized;
243    } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */
244 
245    unsigned initialize_temp_index;
246 
247    /** Number of constants used by original shader for each constant buffer.
248     * The size should probably always match with that of svga_state.constbufs.
249     */
250    unsigned num_shader_consts[SVGA_MAX_CONST_BUFS];
251 
252    /* Raw constant buffers */
253    unsigned raw_buf_srv_start_index;  /* starting srv index for raw buffers */
254    unsigned raw_bufs;                 /* raw buffers bitmask */
255    unsigned raw_buf_tmp_index;        /* starting temp index for raw buffers */
256    unsigned raw_buf_cur_tmp_index;    /* current temp index for raw buffers */
257    struct svga_raw_buf_tmp raw_buf_tmp[3]; /* temporaries for raw buf source */
258 
259    /* Samplers */
260    unsigned num_samplers;
261    boolean sampler_view[PIPE_MAX_SAMPLERS];  /**< True if sampler view exists*/
262    ubyte sampler_target[PIPE_MAX_SAMPLERS];  /**< TGSI_TEXTURE_x */
263    ubyte sampler_return_type[PIPE_MAX_SAMPLERS];  /**< TGSI_RETURN_TYPE_x */
264 
265    /* Images */
266    unsigned num_images;
267    unsigned image_mask;
268    struct tgsi_declaration_image image[PIPE_MAX_SHADER_IMAGES];
269    unsigned image_size_index;  /* starting index to cbuf for image size */
270 
271    /* Shader buffers */
272    unsigned num_shader_bufs;
273 
274    /* HW atomic buffers */
275    unsigned num_atomic_bufs;
276    unsigned atomic_bufs_mask;
277    unsigned max_atomic_counter_index;
278    VGPU10_OPCODE_TYPE cur_atomic_opcode;    /* current atomic opcode */
279 
280    boolean uav_declared;  /* True if uav is declared */
281 
282    /* Index Range declaration */
283    struct {
284       unsigned start_index;
285       unsigned count;
286       boolean required;
287       unsigned operandType;
288       unsigned size;
289       unsigned dim;
290    } index_range;
291 
292    /* Address regs (really implemented with temps) */
293    unsigned num_address_regs;
294    unsigned address_reg_index[MAX_VGPU10_ADDR_REGS];
295 
296    /* Output register usage masks */
297    ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS];
298 
299    /* To map TGSI system value index to VGPU shader input indexes */
300    ubyte system_value_indexes[MAX_SYSTEM_VALUES];
301 
302    struct {
303       /* vertex position scale/translation */
304       unsigned out_index;  /**< the real position output reg */
305       unsigned tmp_index;  /**< the fake/temp position output reg */
306       unsigned so_index;   /**< the non-adjusted position output reg */
307       unsigned prescale_cbuf_index;  /* index to the const buf for prescale */
308       unsigned prescale_scale_index, prescale_trans_index;
309       unsigned num_prescale;      /* number of prescale factor in const buf */
310       unsigned viewport_index;
311       unsigned need_prescale:1;
312       unsigned have_prescale:1;
313    } vposition;
314 
315    /* Shader limits */
316    unsigned max_vs_inputs;
317    unsigned max_vs_outputs;
318    unsigned max_gs_inputs;
319 
320    /* For vertex shaders only */
321    struct {
322       /* viewport constant */
323       unsigned viewport_index;
324 
325       unsigned vertex_id_bias_index;
326       unsigned vertex_id_sys_index;
327       unsigned vertex_id_tmp_index;
328 
329       /* temp index of adjusted vertex attributes */
330       unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS];
331    } vs;
332 
333    /* For fragment shaders only */
334    struct {
335       unsigned color_out_index[PIPE_MAX_COLOR_BUFS];  /**< the real color output regs */
336       unsigned num_color_outputs;
337       unsigned color_tmp_index;  /**< fake/temp color output reg */
338       unsigned alpha_ref_index;  /**< immediate constant for alpha ref */
339 
340       /* front-face */
341       unsigned face_input_index; /**< real fragment shader face reg (bool) */
342       unsigned face_tmp_index;   /**< temp face reg converted to -1 / +1 */
343 
344       unsigned pstipple_sampler_unit;
345       unsigned pstipple_sampler_state_index;
346 
347       unsigned fragcoord_input_index;  /**< real fragment position input reg */
348       unsigned fragcoord_tmp_index;    /**< 1/w modified position temp reg */
349 
350       unsigned sample_id_sys_index;  /**< TGSI index of sample id sys value */
351 
352       unsigned sample_pos_sys_index; /**< TGSI index of sample pos sys value */
353       unsigned sample_pos_tmp_index; /**< which temp reg has the sample pos */
354 
355       /** TGSI index of sample mask input sys value */
356       unsigned sample_mask_in_sys_index;
357 
358       /* layer */
359       unsigned layer_input_index;    /**< TGSI index of layer */
360       unsigned layer_imm_index;      /**< immediate for default layer 0 */
361 
362       boolean forceEarlyDepthStencil;  /**< true if Early Depth stencil test is enabled */
363    } fs;
364 
365    /* For geometry shaders only */
366    struct {
367       VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */
368       VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */
369       unsigned input_size;       /**< size of input arrays */
370       unsigned prim_id_index;    /**< primitive id register index */
371       unsigned max_out_vertices; /**< maximum number of output vertices */
372       unsigned invocations;
373       unsigned invocation_id_sys_index;
374 
375       unsigned viewport_index_out_index;
376       unsigned viewport_index_tmp_index;
377    } gs;
378 
379    /* For tessellation control shaders only */
380    struct {
381       unsigned vertices_per_patch_index;     /**< vertices_per_patch system value index */
382       unsigned imm_index;                    /**< immediate for tcs */
383       unsigned invocation_id_sys_index;      /**< invocation id */
384       unsigned invocation_id_tmp_index;
385       unsigned instruction_token_pos;        /* token pos for the first instruction */
386       unsigned control_point_input_index;    /* control point input register index */
387       unsigned control_point_addr_index;     /* control point input address register */
388       unsigned control_point_out_index;      /* control point output register index */
389       unsigned control_point_tmp_index;      /* control point temporary register */
390       unsigned control_point_out_count;      /* control point output count */
391       boolean  control_point_phase;          /* true if in control point phase */
392       boolean  fork_phase_add_signature;     /* true if needs to add signature in fork phase */
393       unsigned patch_generic_out_count;      /* per-patch generic output count */
394       unsigned patch_generic_out_index;      /* per-patch generic output register index*/
395       unsigned patch_generic_tmp_index;      /* per-patch generic temporary register index*/
396       unsigned prim_id_index;                /* primitive id */
397       struct {
398          unsigned out_index;      /* real tessinner output register */
399          unsigned temp_index;     /* tessinner temp register */
400          unsigned tgsi_index;     /* tgsi tessinner output register */
401       } inner;
402       struct {
403          unsigned out_index;      /* real tessouter output register */
404          unsigned temp_index;     /* tessouter temp register */
405          unsigned tgsi_index;     /* tgsi tessouter output register */
406       } outer;
407    } tcs;
408 
409    /* For tessellation evaluation shaders only */
410    struct {
411       enum pipe_prim_type prim_mode;
412       enum pipe_tess_spacing spacing;
413       boolean vertices_order_cw;
414       boolean point_mode;
415       unsigned tesscoord_sys_index;
416       unsigned swizzle_max;
417       unsigned prim_id_index;                /* primitive id */
418       struct {
419          unsigned in_index;       /* real tessinner input register */
420          unsigned temp_index;     /* tessinner temp register */
421          unsigned tgsi_index;     /* tgsi tessinner input register */
422       } inner;
423       struct {
424          unsigned in_index;       /* real tessouter input register */
425          unsigned temp_index;     /* tessouter temp register */
426          unsigned tgsi_index;     /* tgsi tessouter input register */
427       } outer;
428    } tes;
429 
430    struct {
431       unsigned block_width;       /* thread group size in x dimension */
432       unsigned block_height;      /* thread group size in y dimension */
433       unsigned block_depth;       /* thread group size in z dimension */
434       unsigned thread_id_index;   /* thread id tgsi index */
435       unsigned block_id_index;    /* block id tgsi index */
436       bool shared_memory_declared;    /* set if shared memory is declared */
437       struct {
438          unsigned tgsi_index;   /* grid size tgsi index */
439          unsigned imm_index;    /* grid size imm index */
440       } grid_size;
441    } cs;
442 
443    /* For vertex or geometry shaders */
444    enum clipping_mode clip_mode;
445    unsigned clip_dist_out_index; /**< clip distance output register index */
446    unsigned clip_dist_tmp_index; /**< clip distance temporary register */
447    unsigned clip_dist_so_index;  /**< clip distance shadow copy */
448 
449    /** Index of temporary holding the clipvertex coordinate */
450    unsigned clip_vertex_out_index; /**< clip vertex output register index */
451    unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */
452 
453    /* user clip plane constant slot indexes */
454    unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES];
455 
456    unsigned num_output_writes;
457    boolean constant_color_output;
458 
459    boolean uses_flat_interp;
460 
461    unsigned reserved_token;        /* index to the reserved token */
462    boolean uses_precise_qualifier;
463 
464    /* For all shaders: const reg index for RECT coord scaling */
465    unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS];
466 
467    /* For all shaders: const reg index for texture buffer size */
468    unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS];
469 
470    /** Which texture units are doing shadow comparison in the shader code */
471    unsigned shadow_compare_units;
472 
473    /* VS/TCS/TES/GS/FS Linkage info */
474    struct shader_linkage linkage;
475    struct tgsi_shader_info *prevShaderInfo;
476 
477    /* Shader signature */
478    struct svga_shader_signature signature;
479 
480    bool register_overflow;  /**< Set if we exceed a VGPU10 register limit */
481 
482    /* For util_debug_message */
483    struct util_debug_callback svga_debug_callback;
484 
485    /* current loop depth in shader */
486    unsigned current_loop_depth;
487 };
488 
489 
490 static void emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit);
491 static void emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit);
492 static boolean emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit);
493 static boolean emit_constant_declaration(struct svga_shader_emitter_v10 *emit);
494 static boolean emit_sampler_declarations(struct svga_shader_emitter_v10 *emit);
495 static boolean emit_resource_declarations(struct svga_shader_emitter_v10 *emit);
496 static boolean emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit);
497 static boolean emit_index_range_declaration(struct svga_shader_emitter_v10 *emit);
498 static void emit_image_declarations(struct svga_shader_emitter_v10 *emit);
499 static void emit_shader_buf_declarations(struct svga_shader_emitter_v10 *emit);
500 static void emit_atomic_buf_declarations(struct svga_shader_emitter_v10 *emit);
501 static void emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit);
502 
503 static boolean
504 emit_post_helpers(struct svga_shader_emitter_v10 *emit);
505 
506 static boolean
507 emit_vertex(struct svga_shader_emitter_v10 *emit,
508             const struct tgsi_full_instruction *inst);
509 
510 static boolean
511 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
512                         unsigned inst_number,
513                         const struct tgsi_full_instruction *inst);
514 
515 static void
516 emit_input_declaration(struct svga_shader_emitter_v10 *emit,
517                        unsigned opcodeType, unsigned operandType,
518                        unsigned dim, unsigned index, unsigned size,
519                        unsigned name, unsigned numComp,
520                        unsigned selMode, unsigned usageMask,
521                        unsigned interpMode,
522                        boolean addSignature,
523                        SVGA3dDXSignatureSemanticName sgnName);
524 
525 static boolean
526 emit_rawbuf_instruction(struct svga_shader_emitter_v10 *emit,
527                         unsigned inst_number,
528                         const struct tgsi_full_instruction *inst);
529 
530 static void
531 create_temp_array(struct svga_shader_emitter_v10 *emit,
532                   unsigned arrayID, unsigned first, unsigned count,
533                   unsigned startIndex);
534 
535 static char err_buf[128];
536 
537 static boolean
expand(struct svga_shader_emitter_v10 * emit)538 expand(struct svga_shader_emitter_v10 *emit)
539 {
540    char *new_buf;
541    unsigned newsize = emit->size * 2;
542 
543    if (emit->buf != err_buf)
544       new_buf = REALLOC(emit->buf, emit->size, newsize);
545    else
546       new_buf = NULL;
547 
548    if (!new_buf) {
549       emit->ptr = err_buf;
550       emit->buf = err_buf;
551       emit->size = sizeof(err_buf);
552       return FALSE;
553    }
554 
555    emit->size = newsize;
556    emit->ptr = new_buf + (emit->ptr - emit->buf);
557    emit->buf = new_buf;
558    return TRUE;
559 }
560 
561 /**
562  * Create and initialize a new svga_shader_emitter_v10 object.
563  */
564 static struct svga_shader_emitter_v10 *
alloc_emitter(void)565 alloc_emitter(void)
566 {
567    struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit));
568 
569    if (!emit)
570       return NULL;
571 
572    /* to initialize the output buffer */
573    emit->size = 512;
574    if (!expand(emit)) {
575       FREE(emit);
576       return NULL;
577    }
578    return emit;
579 }
580 
581 /**
582  * Free an svga_shader_emitter_v10 object.
583  */
584 static void
free_emitter(struct svga_shader_emitter_v10 * emit)585 free_emitter(struct svga_shader_emitter_v10 *emit)
586 {
587    assert(emit);
588    FREE(emit->buf);    /* will be NULL if translation succeeded */
589    FREE(emit);
590 }
591 
592 static inline boolean
reserve(struct svga_shader_emitter_v10 * emit,unsigned nr_dwords)593 reserve(struct svga_shader_emitter_v10 *emit,
594         unsigned nr_dwords)
595 {
596    while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) {
597       if (!expand(emit))
598          return FALSE;
599    }
600 
601    return TRUE;
602 }
603 
604 static boolean
emit_dword(struct svga_shader_emitter_v10 * emit,uint32 dword)605 emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword)
606 {
607    if (!reserve(emit, 1))
608       return FALSE;
609 
610    *(uint32 *)emit->ptr = dword;
611    emit->ptr += sizeof dword;
612    return TRUE;
613 }
614 
615 static boolean
emit_dwords(struct svga_shader_emitter_v10 * emit,const uint32 * dwords,unsigned nr)616 emit_dwords(struct svga_shader_emitter_v10 *emit,
617             const uint32 *dwords,
618             unsigned nr)
619 {
620    if (!reserve(emit, nr))
621       return FALSE;
622 
623    memcpy(emit->ptr, dwords, nr * sizeof *dwords);
624    emit->ptr += nr * sizeof *dwords;
625    return TRUE;
626 }
627 
628 /** Return the number of tokens in the emitter's buffer */
629 static unsigned
emit_get_num_tokens(const struct svga_shader_emitter_v10 * emit)630 emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit)
631 {
632    return (emit->ptr - emit->buf) / sizeof(unsigned);
633 }
634 
635 
636 /**
637  * Check for register overflow.  If we overflow we'll set an
638  * error flag.  This function can be called for register declarations
639  * or use as src/dst instruction operands.
640  * \param type  register type.  One of VGPU10_OPERAND_TYPE_x
641                 or VGPU10_OPCODE_DCL_x
642  * \param index  the register index
643  */
644 static void
check_register_index(struct svga_shader_emitter_v10 * emit,unsigned operandType,unsigned index)645 check_register_index(struct svga_shader_emitter_v10 *emit,
646                      unsigned operandType, unsigned index)
647 {
648    bool overflow_before = emit->register_overflow;
649 
650    switch (operandType) {
651    case VGPU10_OPERAND_TYPE_TEMP:
652    case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP:
653    case VGPU10_OPCODE_DCL_TEMPS:
654       if (index >= VGPU10_MAX_TEMPS) {
655          emit->register_overflow = TRUE;
656       }
657       break;
658    case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER:
659    case VGPU10_OPCODE_DCL_CONSTANT_BUFFER:
660       if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
661          emit->register_overflow = TRUE;
662       }
663       break;
664    case VGPU10_OPERAND_TYPE_INPUT:
665    case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID:
666    case VGPU10_OPCODE_DCL_INPUT:
667    case VGPU10_OPCODE_DCL_INPUT_SGV:
668    case VGPU10_OPCODE_DCL_INPUT_SIV:
669    case VGPU10_OPCODE_DCL_INPUT_PS:
670    case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
671    case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
672       if ((emit->unit == PIPE_SHADER_VERTEX &&
673            index >= emit->max_vs_inputs) ||
674           (emit->unit == PIPE_SHADER_GEOMETRY &&
675            index >= emit->max_gs_inputs) ||
676           (emit->unit == PIPE_SHADER_FRAGMENT &&
677            index >= VGPU10_MAX_FS_INPUTS) ||
678           (emit->unit == PIPE_SHADER_TESS_CTRL &&
679            index >= VGPU11_MAX_HS_INPUT_CONTROL_POINTS) ||
680           (emit->unit == PIPE_SHADER_TESS_EVAL &&
681            index >= VGPU11_MAX_DS_INPUT_CONTROL_POINTS)) {
682          emit->register_overflow = TRUE;
683       }
684       break;
685    case VGPU10_OPERAND_TYPE_OUTPUT:
686    case VGPU10_OPCODE_DCL_OUTPUT:
687    case VGPU10_OPCODE_DCL_OUTPUT_SGV:
688    case VGPU10_OPCODE_DCL_OUTPUT_SIV:
689       /* Note: we are skipping two output indices in tcs for
690        * tessinner/outer levels. Implementation will not exceed
691        * number of output count but it allows index to go beyond
692        * VGPU11_MAX_HS_OUTPUTS.
693        * Index will never be >= index >= VGPU11_MAX_HS_OUTPUTS + 2
694        */
695       if ((emit->unit == PIPE_SHADER_VERTEX &&
696            index >= emit->max_vs_outputs) ||
697           (emit->unit == PIPE_SHADER_GEOMETRY &&
698            index >= VGPU10_MAX_GS_OUTPUTS) ||
699           (emit->unit == PIPE_SHADER_FRAGMENT &&
700            index >= VGPU10_MAX_FS_OUTPUTS) ||
701           (emit->unit == PIPE_SHADER_TESS_CTRL &&
702            index >= VGPU11_MAX_HS_OUTPUTS + 2) ||
703           (emit->unit == PIPE_SHADER_TESS_EVAL &&
704            index >= VGPU11_MAX_DS_OUTPUTS)) {
705          emit->register_overflow = TRUE;
706       }
707       break;
708    case VGPU10_OPERAND_TYPE_SAMPLER:
709    case VGPU10_OPCODE_DCL_SAMPLER:
710       if (index >= VGPU10_MAX_SAMPLERS) {
711          emit->register_overflow = TRUE;
712       }
713       break;
714    case VGPU10_OPERAND_TYPE_RESOURCE:
715    case VGPU10_OPCODE_DCL_RESOURCE:
716       if (index >= VGPU10_MAX_RESOURCES) {
717          emit->register_overflow = TRUE;
718       }
719       break;
720    case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
721       if (index >= MAX_IMMEDIATE_COUNT) {
722          emit->register_overflow = TRUE;
723       }
724       break;
725    case VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
726    case VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID:
727    case VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID:
728    case VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT:
729    case VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT:
730    case VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT:
731    case VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID:
732    case VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP:
733       /* nothing */
734       break;
735    default:
736       assert(0);
737       ; /* nothing */
738    }
739 
740    if (emit->register_overflow && !overflow_before) {
741       debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n",
742                    operandType, index);
743    }
744 }
745 
746 
747 /**
748  * Examine misc state to determine the clipping mode.
749  */
750 static void
determine_clipping_mode(struct svga_shader_emitter_v10 * emit)751 determine_clipping_mode(struct svga_shader_emitter_v10 *emit)
752 {
753    /* num_written_clipdistance in the shader info for tessellation
754     * control shader is always 0 because the TGSI_PROPERTY_NUM_CLIPDIST_ENABLED
755     * is not defined for this shader. So we go through all the output declarations
756     * to set the num_written_clipdistance. This is just to determine the
757     * clipping mode.
758     */
759    if (emit->unit == PIPE_SHADER_TESS_CTRL) {
760       unsigned i;
761       for (i = 0; i < emit->info.num_outputs; i++) {
762          if (emit->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) {
763             emit->info.num_written_clipdistance =
764                4 * (emit->info.output_semantic_index[i] + 1);
765          }
766       }
767    }
768 
769    if (emit->info.num_written_clipdistance > 0) {
770       emit->clip_mode = CLIP_DISTANCE;
771    }
772    else if (emit->info.writes_clipvertex) {
773       emit->clip_mode = CLIP_VERTEX;
774    }
775    else if (emit->key.clip_plane_enable && emit->key.last_vertex_stage) {
776       /*
777        * Only the last shader in the vertex processing stage needs to
778        * handle the legacy clip mode.
779        */
780       emit->clip_mode = CLIP_LEGACY;
781    }
782    else {
783       emit->clip_mode = CLIP_NONE;
784    }
785 }
786 
787 
788 /**
789  * For clip distance register declarations and clip distance register
790  * writes we need to mask the declaration usage or instruction writemask
791  * (respectively) against the set of the really-enabled clipping planes.
792  *
793  * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables
794  * has a VS that writes to all 8 clip distance registers, but the plane enable
795  * flags are a subset of that.
796  *
797  * This function is used to apply the plane enable flags to the register
798  * declaration or instruction writemask.
799  *
800  * \param writemask  the declaration usage mask or instruction writemask
801  * \param clip_reg_index  which clip plane register is being declared/written.
802  *                        The legal values are 0 and 1 (two clip planes per
803  *                        register, for a total of 8 clip planes)
804  */
805 static unsigned
apply_clip_plane_mask(struct svga_shader_emitter_v10 * emit,unsigned writemask,unsigned clip_reg_index)806 apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit,
807                       unsigned writemask, unsigned clip_reg_index)
808 {
809    unsigned shift;
810 
811    assert(clip_reg_index < 2);
812 
813    /* four clip planes per clip register: */
814    shift = clip_reg_index * 4;
815    writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf);
816 
817    return writemask;
818 }
819 
820 
821 /**
822  * Translate gallium shader type into VGPU10 type.
823  */
824 static VGPU10_PROGRAM_TYPE
translate_shader_type(unsigned type)825 translate_shader_type(unsigned type)
826 {
827    switch (type) {
828    case PIPE_SHADER_VERTEX:
829       return VGPU10_VERTEX_SHADER;
830    case PIPE_SHADER_GEOMETRY:
831       return VGPU10_GEOMETRY_SHADER;
832    case PIPE_SHADER_FRAGMENT:
833       return VGPU10_PIXEL_SHADER;
834    case PIPE_SHADER_TESS_CTRL:
835       return VGPU10_HULL_SHADER;
836    case PIPE_SHADER_TESS_EVAL:
837       return VGPU10_DOMAIN_SHADER;
838    case PIPE_SHADER_COMPUTE:
839       return VGPU10_COMPUTE_SHADER;
840    default:
841       assert(!"Unexpected shader type");
842       return VGPU10_VERTEX_SHADER;
843    }
844 }
845 
846 
847 /**
848  * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x
849  * Note: we only need to translate the opcodes for "simple" instructions,
850  * as seen below.  All other opcodes are handled/translated specially.
851  */
852 static VGPU10_OPCODE_TYPE
translate_opcode(enum tgsi_opcode opcode)853 translate_opcode(enum tgsi_opcode opcode)
854 {
855    switch (opcode) {
856    case TGSI_OPCODE_MOV:
857       return VGPU10_OPCODE_MOV;
858    case TGSI_OPCODE_MUL:
859       return VGPU10_OPCODE_MUL;
860    case TGSI_OPCODE_ADD:
861       return VGPU10_OPCODE_ADD;
862    case TGSI_OPCODE_DP3:
863       return VGPU10_OPCODE_DP3;
864    case TGSI_OPCODE_DP4:
865       return VGPU10_OPCODE_DP4;
866    case TGSI_OPCODE_MIN:
867       return VGPU10_OPCODE_MIN;
868    case TGSI_OPCODE_MAX:
869       return VGPU10_OPCODE_MAX;
870    case TGSI_OPCODE_MAD:
871       return VGPU10_OPCODE_MAD;
872    case TGSI_OPCODE_SQRT:
873       return VGPU10_OPCODE_SQRT;
874    case TGSI_OPCODE_FRC:
875       return VGPU10_OPCODE_FRC;
876    case TGSI_OPCODE_FLR:
877       return VGPU10_OPCODE_ROUND_NI;
878    case TGSI_OPCODE_FSEQ:
879       return VGPU10_OPCODE_EQ;
880    case TGSI_OPCODE_FSGE:
881       return VGPU10_OPCODE_GE;
882    case TGSI_OPCODE_FSNE:
883       return VGPU10_OPCODE_NE;
884    case TGSI_OPCODE_DDX:
885       return VGPU10_OPCODE_DERIV_RTX;
886    case TGSI_OPCODE_DDY:
887       return VGPU10_OPCODE_DERIV_RTY;
888    case TGSI_OPCODE_RET:
889       return VGPU10_OPCODE_RET;
890    case TGSI_OPCODE_DIV:
891       return VGPU10_OPCODE_DIV;
892    case TGSI_OPCODE_IDIV:
893       return VGPU10_OPCODE_VMWARE;
894    case TGSI_OPCODE_DP2:
895       return VGPU10_OPCODE_DP2;
896    case TGSI_OPCODE_BRK:
897       return VGPU10_OPCODE_BREAK;
898    case TGSI_OPCODE_IF:
899       return VGPU10_OPCODE_IF;
900    case TGSI_OPCODE_ELSE:
901       return VGPU10_OPCODE_ELSE;
902    case TGSI_OPCODE_ENDIF:
903       return VGPU10_OPCODE_ENDIF;
904    case TGSI_OPCODE_CEIL:
905       return VGPU10_OPCODE_ROUND_PI;
906    case TGSI_OPCODE_I2F:
907       return VGPU10_OPCODE_ITOF;
908    case TGSI_OPCODE_NOT:
909       return VGPU10_OPCODE_NOT;
910    case TGSI_OPCODE_TRUNC:
911       return VGPU10_OPCODE_ROUND_Z;
912    case TGSI_OPCODE_SHL:
913       return VGPU10_OPCODE_ISHL;
914    case TGSI_OPCODE_AND:
915       return VGPU10_OPCODE_AND;
916    case TGSI_OPCODE_OR:
917       return VGPU10_OPCODE_OR;
918    case TGSI_OPCODE_XOR:
919       return VGPU10_OPCODE_XOR;
920    case TGSI_OPCODE_CONT:
921       return VGPU10_OPCODE_CONTINUE;
922    case TGSI_OPCODE_EMIT:
923       return VGPU10_OPCODE_EMIT;
924    case TGSI_OPCODE_ENDPRIM:
925       return VGPU10_OPCODE_CUT;
926    case TGSI_OPCODE_BGNLOOP:
927       return VGPU10_OPCODE_LOOP;
928    case TGSI_OPCODE_ENDLOOP:
929       return VGPU10_OPCODE_ENDLOOP;
930    case TGSI_OPCODE_ENDSUB:
931       return VGPU10_OPCODE_RET;
932    case TGSI_OPCODE_NOP:
933       return VGPU10_OPCODE_NOP;
934    case TGSI_OPCODE_END:
935       return VGPU10_OPCODE_RET;
936    case TGSI_OPCODE_F2I:
937       return VGPU10_OPCODE_FTOI;
938    case TGSI_OPCODE_IMAX:
939       return VGPU10_OPCODE_IMAX;
940    case TGSI_OPCODE_IMIN:
941       return VGPU10_OPCODE_IMIN;
942    case TGSI_OPCODE_UDIV:
943    case TGSI_OPCODE_UMOD:
944    case TGSI_OPCODE_MOD:
945       return VGPU10_OPCODE_UDIV;
946    case TGSI_OPCODE_IMUL_HI:
947       return VGPU10_OPCODE_IMUL;
948    case TGSI_OPCODE_INEG:
949       return VGPU10_OPCODE_INEG;
950    case TGSI_OPCODE_ISHR:
951       return VGPU10_OPCODE_ISHR;
952    case TGSI_OPCODE_ISGE:
953       return VGPU10_OPCODE_IGE;
954    case TGSI_OPCODE_ISLT:
955       return VGPU10_OPCODE_ILT;
956    case TGSI_OPCODE_F2U:
957       return VGPU10_OPCODE_FTOU;
958    case TGSI_OPCODE_UADD:
959       return VGPU10_OPCODE_IADD;
960    case TGSI_OPCODE_U2F:
961       return VGPU10_OPCODE_UTOF;
962    case TGSI_OPCODE_UCMP:
963       return VGPU10_OPCODE_MOVC;
964    case TGSI_OPCODE_UMAD:
965       return VGPU10_OPCODE_UMAD;
966    case TGSI_OPCODE_UMAX:
967       return VGPU10_OPCODE_UMAX;
968    case TGSI_OPCODE_UMIN:
969       return VGPU10_OPCODE_UMIN;
970    case TGSI_OPCODE_UMUL:
971    case TGSI_OPCODE_UMUL_HI:
972       return VGPU10_OPCODE_UMUL;
973    case TGSI_OPCODE_USEQ:
974       return VGPU10_OPCODE_IEQ;
975    case TGSI_OPCODE_USGE:
976       return VGPU10_OPCODE_UGE;
977    case TGSI_OPCODE_USHR:
978       return VGPU10_OPCODE_USHR;
979    case TGSI_OPCODE_USLT:
980       return VGPU10_OPCODE_ULT;
981    case TGSI_OPCODE_USNE:
982       return VGPU10_OPCODE_INE;
983    case TGSI_OPCODE_SWITCH:
984       return VGPU10_OPCODE_SWITCH;
985    case TGSI_OPCODE_CASE:
986       return VGPU10_OPCODE_CASE;
987    case TGSI_OPCODE_DEFAULT:
988       return VGPU10_OPCODE_DEFAULT;
989    case TGSI_OPCODE_ENDSWITCH:
990       return VGPU10_OPCODE_ENDSWITCH;
991    case TGSI_OPCODE_FSLT:
992       return VGPU10_OPCODE_LT;
993    case TGSI_OPCODE_ROUND:
994       return VGPU10_OPCODE_ROUND_NE;
995    /* Begin SM5 opcodes */
996    case TGSI_OPCODE_F2D:
997       return VGPU10_OPCODE_FTOD;
998    case TGSI_OPCODE_D2F:
999       return VGPU10_OPCODE_DTOF;
1000    case TGSI_OPCODE_DMUL:
1001       return VGPU10_OPCODE_DMUL;
1002    case TGSI_OPCODE_DADD:
1003       return VGPU10_OPCODE_DADD;
1004    case TGSI_OPCODE_DMAX:
1005       return VGPU10_OPCODE_DMAX;
1006    case TGSI_OPCODE_DMIN:
1007       return VGPU10_OPCODE_DMIN;
1008    case TGSI_OPCODE_DSEQ:
1009       return VGPU10_OPCODE_DEQ;
1010    case TGSI_OPCODE_DSGE:
1011       return VGPU10_OPCODE_DGE;
1012    case TGSI_OPCODE_DSLT:
1013       return VGPU10_OPCODE_DLT;
1014    case TGSI_OPCODE_DSNE:
1015       return VGPU10_OPCODE_DNE;
1016    case TGSI_OPCODE_IBFE:
1017       return VGPU10_OPCODE_IBFE;
1018    case TGSI_OPCODE_UBFE:
1019       return VGPU10_OPCODE_UBFE;
1020    case TGSI_OPCODE_BFI:
1021       return VGPU10_OPCODE_BFI;
1022    case TGSI_OPCODE_BREV:
1023       return VGPU10_OPCODE_BFREV;
1024    case TGSI_OPCODE_POPC:
1025       return VGPU10_OPCODE_COUNTBITS;
1026    case TGSI_OPCODE_LSB:
1027       return VGPU10_OPCODE_FIRSTBIT_LO;
1028    case TGSI_OPCODE_IMSB:
1029       return VGPU10_OPCODE_FIRSTBIT_SHI;
1030    case TGSI_OPCODE_UMSB:
1031       return VGPU10_OPCODE_FIRSTBIT_HI;
1032    case TGSI_OPCODE_INTERP_CENTROID:
1033       return VGPU10_OPCODE_EVAL_CENTROID;
1034    case TGSI_OPCODE_INTERP_SAMPLE:
1035       return VGPU10_OPCODE_EVAL_SAMPLE_INDEX;
1036    case TGSI_OPCODE_BARRIER:
1037       return VGPU10_OPCODE_SYNC;
1038    case TGSI_OPCODE_DFMA:
1039       return VGPU10_OPCODE_DFMA;
1040    case TGSI_OPCODE_FMA:
1041       return VGPU10_OPCODE_MAD;
1042 
1043    /* DX11.1 Opcodes */
1044    case TGSI_OPCODE_DDIV:
1045       return VGPU10_OPCODE_DDIV;
1046    case TGSI_OPCODE_DRCP:
1047       return VGPU10_OPCODE_DRCP;
1048    case TGSI_OPCODE_D2I:
1049       return VGPU10_OPCODE_DTOI;
1050    case TGSI_OPCODE_D2U:
1051       return VGPU10_OPCODE_DTOU;
1052    case TGSI_OPCODE_I2D:
1053       return VGPU10_OPCODE_ITOD;
1054    case TGSI_OPCODE_U2D:
1055       return VGPU10_OPCODE_UTOD;
1056 
1057    case TGSI_OPCODE_SAMPLE_POS:
1058       /* Note: we never actually get this opcode because there's no GLSL
1059        * function to query multisample resource sample positions.  There's
1060        * only the TGSI_SEMANTIC_SAMPLEPOS system value which contains the
1061        * position of the current sample in the render target.
1062        */
1063       FALLTHROUGH;
1064    case TGSI_OPCODE_SAMPLE_INFO:
1065       /* NOTE: we never actually get this opcode because the GLSL compiler
1066        * implements the gl_NumSamples variable with a simple constant in the
1067        * constant buffer.
1068        */
1069       FALLTHROUGH;
1070    default:
1071       assert(!"Unexpected TGSI opcode in translate_opcode()");
1072       return VGPU10_OPCODE_NOP;
1073    }
1074 }
1075 
1076 
1077 /**
1078  * Translate a TGSI register file type into a VGPU10 operand type.
1079  * \param array  is the TGSI_FILE_TEMPORARY register an array?
1080  */
1081 static VGPU10_OPERAND_TYPE
translate_register_file(enum tgsi_file_type file,boolean array)1082 translate_register_file(enum tgsi_file_type file, boolean array)
1083 {
1084    switch (file) {
1085    case TGSI_FILE_CONSTANT:
1086       return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
1087    case TGSI_FILE_INPUT:
1088       return VGPU10_OPERAND_TYPE_INPUT;
1089    case TGSI_FILE_OUTPUT:
1090       return VGPU10_OPERAND_TYPE_OUTPUT;
1091    case TGSI_FILE_TEMPORARY:
1092       return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP
1093                    : VGPU10_OPERAND_TYPE_TEMP;
1094    case TGSI_FILE_IMMEDIATE:
1095       /* all immediates are 32-bit values at this time so
1096        * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time.
1097        */
1098       return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER;
1099    case TGSI_FILE_SAMPLER:
1100       return VGPU10_OPERAND_TYPE_SAMPLER;
1101    case TGSI_FILE_SYSTEM_VALUE:
1102       return VGPU10_OPERAND_TYPE_INPUT;
1103 
1104    /* XXX TODO more cases to finish */
1105 
1106    default:
1107       assert(!"Bad tgsi register file!");
1108       return VGPU10_OPERAND_TYPE_NULL;
1109    }
1110 }
1111 
1112 
1113 /**
1114  * Emit a null dst register
1115  */
1116 static void
emit_null_dst_register(struct svga_shader_emitter_v10 * emit)1117 emit_null_dst_register(struct svga_shader_emitter_v10 *emit)
1118 {
1119    VGPU10OperandToken0 operand;
1120 
1121    operand.value = 0;
1122    operand.operandType = VGPU10_OPERAND_TYPE_NULL;
1123    operand.numComponents = VGPU10_OPERAND_0_COMPONENT;
1124 
1125    emit_dword(emit, operand.value);
1126 }
1127 
1128 
1129 /**
1130  * If the given register is a temporary, return the array ID.
1131  * Else return zero.
1132  */
1133 static unsigned
get_temp_array_id(const struct svga_shader_emitter_v10 * emit,enum tgsi_file_type file,unsigned index)1134 get_temp_array_id(const struct svga_shader_emitter_v10 *emit,
1135                   enum tgsi_file_type file, unsigned index)
1136 {
1137    if (file == TGSI_FILE_TEMPORARY) {
1138       return emit->temp_map[index].arrayId;
1139    }
1140    else {
1141       return 0;
1142    }
1143 }
1144 
1145 
1146 /**
1147  * If the given register is a temporary, convert the index from a TGSI
1148  * TEMPORARY index to a VGPU10 temp index.
1149  */
1150 static unsigned
remap_temp_index(const struct svga_shader_emitter_v10 * emit,enum tgsi_file_type file,unsigned index)1151 remap_temp_index(const struct svga_shader_emitter_v10 *emit,
1152                  enum tgsi_file_type file, unsigned index)
1153 {
1154    if (file == TGSI_FILE_TEMPORARY) {
1155       return emit->temp_map[index].index;
1156    }
1157    else {
1158       return index;
1159    }
1160 }
1161 
1162 
1163 /**
1164  * Setup the operand0 fields related to indexing (1D, 2D, relative, etc).
1165  * Note: the operandType field must already be initialized.
1166  * \param file  the register file being accessed
1167  * \param indirect  using indirect addressing of the register file?
1168  * \param index2D  if true, 2-D indexing is being used (const or temp registers)
1169  * \param indirect2D  if true, 2-D indirect indexing being used (for const buf)
1170  */
1171 static VGPU10OperandToken0
setup_operand0_indexing(struct svga_shader_emitter_v10 * emit,VGPU10OperandToken0 operand0,enum tgsi_file_type file,boolean indirect,boolean index2D,bool indirect2D)1172 setup_operand0_indexing(struct svga_shader_emitter_v10 *emit,
1173                         VGPU10OperandToken0 operand0,
1174                         enum tgsi_file_type file,
1175                         boolean indirect,
1176                         boolean index2D, bool indirect2D)
1177 {
1178    VGPU10_OPERAND_INDEX_REPRESENTATION index0Rep, index1Rep;
1179    VGPU10_OPERAND_INDEX_DIMENSION indexDim;
1180 
1181    /*
1182     * Compute index dimensions
1183     */
1184    if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 ||
1185        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID ||
1186        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID ||
1187        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID ||
1188        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP ||
1189        operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) {
1190       /* there's no swizzle for in-line immediates */
1191       indexDim = VGPU10_OPERAND_INDEX_0D;
1192       assert(operand0.selectionMode == 0);
1193    }
1194    else if (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT) {
1195       indexDim = VGPU10_OPERAND_INDEX_0D;
1196    }
1197    else {
1198       indexDim = index2D ? VGPU10_OPERAND_INDEX_2D : VGPU10_OPERAND_INDEX_1D;
1199    }
1200 
1201    /*
1202     * Compute index representation(s) (immediate vs relative).
1203     */
1204    if (indexDim == VGPU10_OPERAND_INDEX_2D) {
1205       index0Rep = indirect2D ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1206          : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1207 
1208       index1Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1209          : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1210    }
1211    else if (indexDim == VGPU10_OPERAND_INDEX_1D) {
1212       index0Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1213          : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1214 
1215       index1Rep = 0;
1216    }
1217    else {
1218       index0Rep = 0;
1219       index1Rep = 0;
1220    }
1221 
1222    operand0.indexDimension = indexDim;
1223    operand0.index0Representation = index0Rep;
1224    operand0.index1Representation = index1Rep;
1225 
1226    return operand0;
1227 }
1228 
1229 
1230 /**
1231  * Emit the operand for expressing an address register for indirect indexing.
1232  * Note that the address register is really just a temp register.
1233  * \param addr_reg_index  which address register to use
1234  */
1235 static void
emit_indirect_register(struct svga_shader_emitter_v10 * emit,unsigned addr_reg_index)1236 emit_indirect_register(struct svga_shader_emitter_v10 *emit,
1237                        unsigned addr_reg_index)
1238 {
1239    unsigned tmp_reg_index;
1240    VGPU10OperandToken0 operand0;
1241 
1242    assert(addr_reg_index < MAX_VGPU10_ADDR_REGS);
1243 
1244    tmp_reg_index = emit->address_reg_index[addr_reg_index];
1245 
1246    /* operand0 is a simple temporary register, selecting one component */
1247    operand0.value = 0;
1248    operand0.operandType = VGPU10_OPERAND_TYPE_TEMP;
1249    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1250    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1251    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
1252    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1253    operand0.swizzleX = 0;
1254    operand0.swizzleY = 1;
1255    operand0.swizzleZ = 2;
1256    operand0.swizzleW = 3;
1257 
1258    emit_dword(emit, operand0.value);
1259    emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index));
1260 }
1261 
1262 
1263 /**
1264  * Translate the dst register of a TGSI instruction and emit VGPU10 tokens.
1265  * \param emit  the emitter context
1266  * \param reg  the TGSI dst register to translate
1267  */
1268 static void
emit_dst_register(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * reg)1269 emit_dst_register(struct svga_shader_emitter_v10 *emit,
1270                   const struct tgsi_full_dst_register *reg)
1271 {
1272    enum tgsi_file_type file = reg->Register.File;
1273    unsigned index = reg->Register.Index;
1274    const enum tgsi_semantic sem_name = emit->info.output_semantic_name[index];
1275    const unsigned sem_index = emit->info.output_semantic_index[index];
1276    unsigned writemask = reg->Register.WriteMask;
1277    const boolean indirect = reg->Register.Indirect;
1278    unsigned tempArrayId = get_temp_array_id(emit, file, index);
1279    boolean index2d = reg->Register.Dimension || tempArrayId > 0;
1280    VGPU10OperandToken0 operand0;
1281 
1282    if (file == TGSI_FILE_TEMPORARY) {
1283       emit->temp_map[index].initialized = TRUE;
1284    }
1285 
1286    if (file == TGSI_FILE_OUTPUT) {
1287       if (emit->unit == PIPE_SHADER_VERTEX ||
1288           emit->unit == PIPE_SHADER_GEOMETRY ||
1289           emit->unit == PIPE_SHADER_TESS_EVAL) {
1290          if (index == emit->vposition.out_index &&
1291              emit->vposition.tmp_index != INVALID_INDEX) {
1292             /* replace OUTPUT[POS] with TEMP[POS].  We need to store the
1293              * vertex position result in a temporary so that we can modify
1294              * it in the post_helper() code.
1295              */
1296             file = TGSI_FILE_TEMPORARY;
1297             index = emit->vposition.tmp_index;
1298          }
1299          else if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
1300                   emit->clip_dist_tmp_index != INVALID_INDEX) {
1301             /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
1302              * We store the clip distance in a temporary first, then
1303              * we'll copy it to the shadow copy and to CLIPDIST with the
1304              * enabled planes mask in emit_clip_distance_instructions().
1305              */
1306             file = TGSI_FILE_TEMPORARY;
1307             index = emit->clip_dist_tmp_index + sem_index;
1308          }
1309          else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
1310                   emit->clip_vertex_tmp_index != INVALID_INDEX) {
1311             /* replace the CLIPVERTEX output register with a temporary */
1312             assert(emit->clip_mode == CLIP_VERTEX);
1313             assert(sem_index == 0);
1314             file = TGSI_FILE_TEMPORARY;
1315             index = emit->clip_vertex_tmp_index;
1316          }
1317          else if (sem_name == TGSI_SEMANTIC_COLOR &&
1318                   emit->key.clamp_vertex_color) {
1319 
1320             /* set the saturate modifier of the instruction
1321              * to clamp the vertex color.
1322              */
1323             VGPU10OpcodeToken0 *token =
1324                (VGPU10OpcodeToken0 *)emit->buf + emit->inst_start_token;
1325             token->saturate = TRUE;
1326          }
1327          else if (sem_name == TGSI_SEMANTIC_VIEWPORT_INDEX &&
1328                   emit->gs.viewport_index_out_index != INVALID_INDEX) {
1329             file = TGSI_FILE_TEMPORARY;
1330             index = emit->gs.viewport_index_tmp_index;
1331          }
1332       }
1333       else if (emit->unit == PIPE_SHADER_FRAGMENT) {
1334          if (sem_name == TGSI_SEMANTIC_POSITION) {
1335             /* Fragment depth output register */
1336             operand0.value = 0;
1337             operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
1338             operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1339             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1340             emit_dword(emit, operand0.value);
1341             return;
1342          }
1343          else if (sem_name == TGSI_SEMANTIC_SAMPLEMASK) {
1344             /* Fragment sample mask output */
1345             operand0.value = 0;
1346             operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
1347             operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1348             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1349             emit_dword(emit, operand0.value);
1350             return;
1351          }
1352          else if (index == emit->fs.color_out_index[0] &&
1353              emit->fs.color_tmp_index != INVALID_INDEX) {
1354             /* replace OUTPUT[COLOR] with TEMP[COLOR].  We need to store the
1355              * fragment color result in a temporary so that we can read it
1356              * it in the post_helper() code.
1357              */
1358             file = TGSI_FILE_TEMPORARY;
1359             index = emit->fs.color_tmp_index;
1360          }
1361          else {
1362             /* Typically, for fragment shaders, the output register index
1363              * matches the color semantic index.  But not when we write to
1364              * the fragment depth register.  In that case, OUT[0] will be
1365              * fragdepth and OUT[1] will be the 0th color output.  We need
1366              * to use the semantic index for color outputs.
1367              */
1368             assert(sem_name == TGSI_SEMANTIC_COLOR);
1369             index = emit->info.output_semantic_index[index];
1370 
1371             emit->num_output_writes++;
1372          }
1373       }
1374       else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
1375          if (index == emit->tcs.inner.tgsi_index) {
1376             /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
1377              * in temporary for now so that will be store into appropriate
1378              * registers in post_helper() in patch constant phase.
1379              */
1380             if (emit->tcs.control_point_phase) {
1381                /* Discard writing into tessfactor in control point phase */
1382                emit->discard_instruction =  TRUE;
1383             }
1384             else {
1385                file = TGSI_FILE_TEMPORARY;
1386                index = emit->tcs.inner.temp_index;
1387             }
1388          }
1389          else if (index == emit->tcs.outer.tgsi_index) {
1390             /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
1391              * in temporary for now so that will be store into appropriate
1392              * registers in post_helper().
1393              */
1394             if (emit->tcs.control_point_phase) {
1395                /* Discard writing into tessfactor in control point phase */
1396                emit->discard_instruction =  TRUE;
1397             }
1398             else {
1399                file = TGSI_FILE_TEMPORARY;
1400                index = emit->tcs.outer.temp_index;
1401             }
1402          }
1403          else if (index >= emit->tcs.patch_generic_out_index &&
1404                   index < (emit->tcs.patch_generic_out_index +
1405                           emit->tcs.patch_generic_out_count)) {
1406             if (emit->tcs.control_point_phase) {
1407                /* Discard writing into generic patch constant outputs in
1408                   control point phase */
1409                emit->discard_instruction =  TRUE;
1410             }
1411             else {
1412                if (emit->reemit_instruction) {
1413                   /* Store results of reemitted instruction in temporary register. */
1414                   file = TGSI_FILE_TEMPORARY;
1415                   index = emit->tcs.patch_generic_tmp_index +
1416                           (index - emit->tcs.patch_generic_out_index);
1417                   /**
1418                    * Temporaries for patch constant data can be done
1419                    * as indexable temporaries.
1420                    */
1421                   tempArrayId = get_temp_array_id(emit, file, index);
1422                   index2d = tempArrayId > 0;
1423 
1424                   emit->reemit_instruction = FALSE;
1425                }
1426                else {
1427                   /* If per-patch outputs is been read in shader, we
1428                    * reemit instruction and store results in temporaries in
1429                    * patch constant phase. */
1430                   if (emit->info.reads_perpatch_outputs) {
1431                      emit->reemit_instruction = TRUE;
1432                   }
1433                }
1434             }
1435          }
1436          else if (reg->Register.Dimension) {
1437             /* Only control point outputs are declared 2D in tgsi */
1438             if (emit->tcs.control_point_phase) {
1439                if (emit->reemit_instruction) {
1440                   /* Store results of reemitted instruction in temporary register. */
1441                   index2d = FALSE;
1442                   file = TGSI_FILE_TEMPORARY;
1443                   index = emit->tcs.control_point_tmp_index +
1444                           (index - emit->tcs.control_point_out_index);
1445                   emit->reemit_instruction = FALSE;
1446                }
1447                else {
1448                   /* The mapped control point outputs are 1-D */
1449                   index2d = FALSE;
1450                   if (emit->info.reads_pervertex_outputs) {
1451                      /* If per-vertex outputs is been read in shader, we
1452                       * reemit instruction and store results in temporaries
1453                       * control point phase. */
1454                      emit->reemit_instruction = TRUE;
1455                   }
1456                }
1457 
1458                if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
1459                    emit->clip_dist_tmp_index != INVALID_INDEX) {
1460                   /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
1461                    * We store the clip distance in a temporary first, then
1462                    * we'll copy it to the shadow copy and to CLIPDIST with the
1463                    * enabled planes mask in emit_clip_distance_instructions().
1464                    */
1465                   file = TGSI_FILE_TEMPORARY;
1466                   index = emit->clip_dist_tmp_index + sem_index;
1467                }
1468                else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
1469                         emit->clip_vertex_tmp_index != INVALID_INDEX) {
1470                   /* replace the CLIPVERTEX output register with a temporary */
1471                   assert(emit->clip_mode == CLIP_VERTEX);
1472                   assert(sem_index == 0);
1473                   file = TGSI_FILE_TEMPORARY;
1474                   index = emit->clip_vertex_tmp_index;
1475                }
1476             }
1477             else {
1478                /* Discard writing into control point outputs in
1479                   patch constant phase */
1480                emit->discard_instruction =  TRUE;
1481             }
1482          }
1483       }
1484    }
1485 
1486    /* init operand tokens to all zero */
1487    operand0.value = 0;
1488 
1489    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1490 
1491    /* the operand has a writemask */
1492    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
1493 
1494    /* Which of the four dest components to write to. Note that we can use a
1495     * simple assignment here since TGSI writemasks match VGPU10 writemasks.
1496     */
1497    STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X);
1498    operand0.mask = writemask;
1499 
1500    /* translate TGSI register file type to VGPU10 operand type */
1501    operand0.operandType = translate_register_file(file, tempArrayId > 0);
1502 
1503    check_register_index(emit, operand0.operandType, index);
1504 
1505    operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1506                                       index2d, FALSE);
1507 
1508    /* Emit tokens */
1509    emit_dword(emit, operand0.value);
1510    if (tempArrayId > 0) {
1511       emit_dword(emit, tempArrayId);
1512    }
1513 
1514    emit_dword(emit, remap_temp_index(emit, file, index));
1515 
1516    if (indirect) {
1517       emit_indirect_register(emit, reg->Indirect.Index);
1518    }
1519 }
1520 
1521 
1522 /**
1523  * Check if temporary register needs to be initialize when
1524  * shader is not using indirect addressing for temporary and uninitialized
1525  * temporary is not used in loop. In these two scenarios, we cannot
1526  * determine if temporary is initialized or not.
1527  */
1528 static boolean
need_temp_reg_initialization(struct svga_shader_emitter_v10 * emit,unsigned index)1529 need_temp_reg_initialization(struct svga_shader_emitter_v10 *emit,
1530                              unsigned index)
1531 {
1532    if (!(emit->info.indirect_files & (1u << TGSI_FILE_TEMPORARY))
1533        && emit->current_loop_depth == 0) {
1534       if (!emit->temp_map[index].initialized &&
1535           emit->temp_map[index].index < emit->num_shader_temps) {
1536          return TRUE;
1537       }
1538    }
1539 
1540    return FALSE;
1541 }
1542 
1543 
1544 /**
1545  * Translate a src register of a TGSI instruction and emit VGPU10 tokens.
1546  * In quite a few cases, we do register substitution.  For example, if
1547  * the TGSI register is the front/back-face register, we replace that with
1548  * a temp register containing a value we computed earlier.
1549  */
1550 static void
emit_src_register(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * reg)1551 emit_src_register(struct svga_shader_emitter_v10 *emit,
1552                   const struct tgsi_full_src_register *reg)
1553 {
1554    enum tgsi_file_type file = reg->Register.File;
1555    unsigned index = reg->Register.Index;
1556    boolean indirect = reg->Register.Indirect;
1557    unsigned tempArrayId = get_temp_array_id(emit, file, index);
1558    boolean index2d = (reg->Register.Dimension ||
1559                             tempArrayId > 0 ||
1560                             file == TGSI_FILE_CONSTANT);
1561    unsigned index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index;
1562    boolean indirect2d = reg->Dimension.Indirect;
1563    unsigned swizzleX = reg->Register.SwizzleX;
1564    unsigned swizzleY = reg->Register.SwizzleY;
1565    unsigned swizzleZ = reg->Register.SwizzleZ;
1566    unsigned swizzleW = reg->Register.SwizzleW;
1567    const boolean absolute = reg->Register.Absolute;
1568    const boolean negate = reg->Register.Negate;
1569    VGPU10OperandToken0 operand0;
1570    VGPU10OperandToken1 operand1;
1571 
1572    operand0.value = operand1.value = 0;
1573 
1574    if (emit->unit == PIPE_SHADER_FRAGMENT){
1575       if (file == TGSI_FILE_INPUT) {
1576          if (index == emit->fs.face_input_index) {
1577             /* Replace INPUT[FACE] with TEMP[FACE] */
1578             file = TGSI_FILE_TEMPORARY;
1579             index = emit->fs.face_tmp_index;
1580          }
1581          else if (index == emit->fs.fragcoord_input_index) {
1582             /* Replace INPUT[POSITION] with TEMP[POSITION] */
1583             file = TGSI_FILE_TEMPORARY;
1584             index = emit->fs.fragcoord_tmp_index;
1585          }
1586          else if (index == emit->fs.layer_input_index) {
1587             /* Replace INPUT[LAYER] with zero.x */
1588             file = TGSI_FILE_IMMEDIATE;
1589             index = emit->fs.layer_imm_index;
1590             swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1591          }
1592          else {
1593             /* We remap fragment shader inputs to that FS input indexes
1594              * match up with VS/GS output indexes.
1595              */
1596             index = emit->linkage.input_map[index];
1597          }
1598       }
1599       else if (file == TGSI_FILE_SYSTEM_VALUE) {
1600          if (index == emit->fs.sample_pos_sys_index) {
1601             assert(emit->version >= 41);
1602             /* Current sample position is in a temp register */
1603             file = TGSI_FILE_TEMPORARY;
1604             index = emit->fs.sample_pos_tmp_index;
1605          }
1606          else if (index == emit->fs.sample_mask_in_sys_index) {
1607             /* Emitted as vCoverage0.x */
1608             /* According to GLSL spec, the gl_SampleMaskIn array has ceil(s / 32)
1609              * elements where s is the maximum number of color samples supported
1610              * by the implementation.
1611              */
1612             operand0.value = 0;
1613             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK;
1614             operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1615             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1616             operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1617             emit_dword(emit, operand0.value);
1618             return;
1619          }
1620          else {
1621             /* Map the TGSI system value to a VGPU10 input register */
1622             assert(index < ARRAY_SIZE(emit->system_value_indexes));
1623             file = TGSI_FILE_INPUT;
1624             index = emit->system_value_indexes[index];
1625          }
1626       }
1627    }
1628    else if (emit->unit == PIPE_SHADER_GEOMETRY) {
1629       if (file == TGSI_FILE_INPUT) {
1630          if (index == emit->gs.prim_id_index) {
1631             operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
1632             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1633          }
1634          index = emit->linkage.input_map[index];
1635       }
1636       else if (file == TGSI_FILE_SYSTEM_VALUE &&
1637                index == emit->gs.invocation_id_sys_index) {
1638          /* Emitted as vGSInstanceID0.x */
1639          operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1640          operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID;
1641          index = 0;
1642       }
1643    }
1644    else if (emit->unit == PIPE_SHADER_VERTEX) {
1645       if (file == TGSI_FILE_INPUT) {
1646          /* if input is adjusted... */
1647          if ((emit->key.vs.adjust_attrib_w_1 |
1648               emit->key.vs.adjust_attrib_itof |
1649               emit->key.vs.adjust_attrib_utof |
1650               emit->key.vs.attrib_is_bgra |
1651               emit->key.vs.attrib_puint_to_snorm |
1652               emit->key.vs.attrib_puint_to_uscaled |
1653               emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) {
1654             file = TGSI_FILE_TEMPORARY;
1655             index = emit->vs.adjusted_input[index];
1656          }
1657       }
1658       else if (file == TGSI_FILE_SYSTEM_VALUE) {
1659          if (index == emit->vs.vertex_id_sys_index &&
1660              emit->vs.vertex_id_tmp_index != INVALID_INDEX) {
1661             file = TGSI_FILE_TEMPORARY;
1662             index = emit->vs.vertex_id_tmp_index;
1663             swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1664          }
1665          else {
1666             /* Map the TGSI system value to a VGPU10 input register */
1667             assert(index < ARRAY_SIZE(emit->system_value_indexes));
1668             file = TGSI_FILE_INPUT;
1669             index = emit->system_value_indexes[index];
1670          }
1671       }
1672    }
1673    else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
1674 
1675       if (file == TGSI_FILE_SYSTEM_VALUE) {
1676          if (index == emit->tcs.vertices_per_patch_index) {
1677             /**
1678              * if source register is the system value for vertices_per_patch,
1679              * replace it with the immediate.
1680              */
1681             file = TGSI_FILE_IMMEDIATE;
1682             index = emit->tcs.imm_index;
1683             swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1684          }
1685          else if (index == emit->tcs.invocation_id_sys_index) {
1686             if (emit->tcs.control_point_phase) {
1687                /**
1688                 * Emitted as vOutputControlPointID.x
1689                 */
1690                operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1691                operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID;
1692                operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
1693                operand0.mask = 0;
1694                emit_dword(emit, operand0.value);
1695                return;
1696             }
1697             else {
1698                /* There is no control point ID input declaration in
1699                 * the patch constant phase in hull shader.
1700                 * Since for now we are emitting all instructions in
1701                 * the patch constant phase, we are replacing the
1702                 * control point ID reference with the immediate 0.
1703                 */
1704                file = TGSI_FILE_IMMEDIATE;
1705                index = emit->tcs.imm_index;
1706                swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_W;
1707             }
1708          }
1709          else if (index == emit->tcs.prim_id_index) {
1710             /**
1711              * Emitted as vPrim.x
1712              */
1713             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1714             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1715             index = 0;
1716          }
1717       }
1718       else if (file == TGSI_FILE_INPUT) {
1719          index = emit->linkage.input_map[index];
1720          if (!emit->tcs.control_point_phase) {
1721             /* Emitted as vicp */
1722             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1723             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
1724             assert(reg->Register.Dimension);
1725          }
1726       }
1727       else if (file == TGSI_FILE_OUTPUT) {
1728          if ((index >= emit->tcs.patch_generic_out_index &&
1729              index < (emit->tcs.patch_generic_out_index +
1730                       emit->tcs.patch_generic_out_count)) ||
1731              index == emit->tcs.inner.tgsi_index ||
1732              index == emit->tcs.outer.tgsi_index) {
1733             if (emit->tcs.control_point_phase) {
1734                emit->discard_instruction = TRUE;
1735             }
1736             else {
1737                /* Device doesn't allow reading from output so
1738                 * use corresponding temporary register as source */
1739                file = TGSI_FILE_TEMPORARY;
1740                if (index == emit->tcs.inner.tgsi_index) {
1741                   index = emit->tcs.inner.temp_index;
1742                }
1743                else if (index == emit->tcs.outer.tgsi_index) {
1744                   index = emit->tcs.outer.temp_index;
1745                }
1746                else {
1747                   index = emit->tcs.patch_generic_tmp_index +
1748                           (index - emit->tcs.patch_generic_out_index);
1749                }
1750 
1751                /**
1752                 * Temporaries for patch constant data can be done
1753                 * as indexable temporaries.
1754                 */
1755                tempArrayId = get_temp_array_id(emit, file, index);
1756                index2d = tempArrayId > 0;
1757                index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index;
1758             }
1759          }
1760          else if (index2d) {
1761             if (emit->tcs.control_point_phase) {
1762                /* Device doesn't allow reading from output so
1763                 * use corresponding temporary register as source */
1764                file = TGSI_FILE_TEMPORARY;
1765                index2d = FALSE;
1766                index = emit->tcs.control_point_tmp_index +
1767                        (index - emit->tcs.control_point_out_index);
1768             }
1769             else {
1770                emit->discard_instruction = TRUE;
1771             }
1772          }
1773       }
1774    }
1775    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
1776       if (file == TGSI_FILE_SYSTEM_VALUE) {
1777          if (index == emit->tes.tesscoord_sys_index) {
1778             /**
1779              * Emitted as vDomain
1780              */
1781             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1782             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT;
1783             index = 0;
1784 
1785             /* Make sure swizzles are of those components allowed according
1786              * to the tessellator domain.
1787              */
1788             swizzleX = MIN2(swizzleX, emit->tes.swizzle_max);
1789             swizzleY = MIN2(swizzleY, emit->tes.swizzle_max);
1790             swizzleZ = MIN2(swizzleZ, emit->tes.swizzle_max);
1791             swizzleW = MIN2(swizzleW, emit->tes.swizzle_max);
1792          }
1793          else if (index == emit->tes.inner.tgsi_index) {
1794             file = TGSI_FILE_TEMPORARY;
1795             index = emit->tes.inner.temp_index;
1796          }
1797          else if (index == emit->tes.outer.tgsi_index) {
1798             file = TGSI_FILE_TEMPORARY;
1799             index = emit->tes.outer.temp_index;
1800          }
1801          else if (index == emit->tes.prim_id_index) {
1802             /**
1803              * Emitted as vPrim.x
1804              */
1805             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1806             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1807             index = 0;
1808          }
1809 
1810       }
1811       else if (file == TGSI_FILE_INPUT) {
1812          if (index2d) {
1813             /* 2D input is emitted as vcp (input control point). */
1814             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
1815             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1816 
1817             /* index specifies the element index and is remapped
1818              * to align with the tcs output index.
1819              */
1820             index = emit->linkage.input_map[index];
1821 
1822             assert(index2 < emit->key.tes.vertices_per_patch);
1823          }
1824          else {
1825             if (index < emit->key.tes.tessfactor_index)
1826                /* index specifies the generic patch index.
1827                 * Remapped to match up with the tcs output index.
1828                 */
1829                index = emit->linkage.input_map[index];
1830 
1831             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT;
1832             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1833          }
1834       }
1835    }
1836    else if (emit->unit == PIPE_SHADER_COMPUTE) {
1837       if (file == TGSI_FILE_SYSTEM_VALUE) {
1838          if (index == emit->cs.thread_id_index) {
1839             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1840             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP;
1841             index = 0;
1842          } else if (index == emit->cs.block_id_index) {
1843             operand0.value = 0;
1844             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1845             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID;
1846             operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1847             operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1848             operand0.swizzleX = swizzleX;
1849             operand0.swizzleY = swizzleY;
1850             operand0.swizzleZ = swizzleZ;
1851             operand0.swizzleW = swizzleW;
1852             emit_dword(emit, operand0.value);
1853             return;
1854          } else if (index == emit->cs.grid_size.tgsi_index) {
1855             file = TGSI_FILE_IMMEDIATE;
1856             index = emit->cs.grid_size.imm_index;
1857          }
1858       }
1859    }
1860 
1861    if (file == TGSI_FILE_ADDRESS) {
1862       index = emit->address_reg_index[index];
1863       file = TGSI_FILE_TEMPORARY;
1864    }
1865 
1866    if (file == TGSI_FILE_CONSTANT) {
1867       /**
1868        * If this constant buffer is to be bound as srv raw buffer,
1869        * then we have to load the constant to a temp first before
1870        * it can be used as a source in the instruction.
1871        * This is accomplished in two passes. The first pass is to
1872        * identify if there is any constbuf to rawbuf translation.
1873        * If there isn't, emit the instruction as usual.
1874        * If there is, then we save the constant buffer reference info,
1875        * and then instead of emitting the instruction at the end
1876        * of the instruction, it will trigger a second pass of parsing
1877        * this instruction. Before it starts the parsing, it will
1878        * load the referenced raw buffer elements to temporaries.
1879        * Then it will emit the instruction that replaces the
1880        * constant buffer replaces with the corresponding temporaries.
1881        */
1882       if (emit->raw_bufs & (1 << index2)) {
1883          if (emit->reemit_rawbuf_instruction != REEMIT_IN_PROGRESS) {
1884             unsigned tmpIdx = emit->raw_buf_cur_tmp_index;
1885 
1886             emit->raw_buf_tmp[tmpIdx].buffer_index = index2;
1887 
1888             /* Save whether the element index is indirect indexing */
1889             emit->raw_buf_tmp[tmpIdx].indirect = indirect;
1890 
1891             /* If it is indirect index, save the temporary
1892              * address index, otherwise, save the immediate index.
1893              */
1894             if (indirect) {
1895                emit->raw_buf_tmp[tmpIdx].element_index =
1896                   emit->address_reg_index[reg->Indirect.Index];
1897                emit->raw_buf_tmp[tmpIdx].element_rel =
1898                   reg->Register.Index;
1899             }
1900             else {
1901                emit->raw_buf_tmp[tmpIdx].element_index = index;
1902                emit->raw_buf_tmp[tmpIdx].element_rel = 0;
1903             }
1904 
1905             emit->raw_buf_cur_tmp_index++;
1906             emit->reemit_rawbuf_instruction = REEMIT_TRUE;
1907             emit->discard_instruction = TRUE;
1908             emit->reemit_tgsi_instruction = TRUE;
1909          }
1910          else {
1911             /* In the reemitting process, replace the constant buffer
1912              * reference with temporary.
1913              */
1914             file = TGSI_FILE_TEMPORARY;
1915             index = emit->raw_buf_cur_tmp_index + emit->raw_buf_tmp_index;
1916             index2d = FALSE;
1917             indirect = FALSE;
1918             emit->raw_buf_cur_tmp_index++;
1919          }
1920       }
1921    }
1922 
1923    if (file == TGSI_FILE_TEMPORARY) {
1924       if (need_temp_reg_initialization(emit, index)) {
1925          emit->initialize_temp_index = index;
1926          emit->discard_instruction = TRUE;
1927       }
1928    }
1929 
1930    if (operand0.value == 0) {
1931       /* if operand0 was not set above for a special case, do the general
1932        * case now.
1933        */
1934       operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1935       operand0.operandType = translate_register_file(file, tempArrayId > 0);
1936    }
1937    operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1938                                       index2d, indirect2d);
1939 
1940    if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 &&
1941        operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
1942       /* there's no swizzle for in-line immediates */
1943       if (swizzleX == swizzleY &&
1944           swizzleX == swizzleZ &&
1945           swizzleX == swizzleW) {
1946          operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1947       }
1948       else {
1949          operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1950       }
1951 
1952       operand0.swizzleX = swizzleX;
1953       operand0.swizzleY = swizzleY;
1954       operand0.swizzleZ = swizzleZ;
1955       operand0.swizzleW = swizzleW;
1956 
1957       if (absolute || negate) {
1958          operand0.extended = 1;
1959          operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER;
1960          if (absolute && !negate)
1961             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS;
1962          if (!absolute && negate)
1963             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG;
1964          if (absolute && negate)
1965             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG;
1966       }
1967    }
1968 
1969    check_register_index(emit, operand0.operandType, index);
1970 
1971    /* Emit the operand tokens */
1972    emit_dword(emit, operand0.value);
1973    if (operand0.extended)
1974       emit_dword(emit, operand1.value);
1975 
1976    if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) {
1977       /* Emit the four float/int in-line immediate values */
1978       unsigned *c;
1979       assert(index < ARRAY_SIZE(emit->immediates));
1980       assert(file == TGSI_FILE_IMMEDIATE);
1981       assert(swizzleX < 4);
1982       assert(swizzleY < 4);
1983       assert(swizzleZ < 4);
1984       assert(swizzleW < 4);
1985       c = (unsigned *) emit->immediates[index];
1986       emit_dword(emit, c[swizzleX]);
1987       emit_dword(emit, c[swizzleY]);
1988       emit_dword(emit, c[swizzleZ]);
1989       emit_dword(emit, c[swizzleW]);
1990    }
1991    else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) {
1992       /* Emit the register index(es) */
1993       if (index2d) {
1994          emit_dword(emit, index2);
1995 
1996          if (indirect2d) {
1997             emit_indirect_register(emit, reg->DimIndirect.Index);
1998          }
1999       }
2000 
2001       emit_dword(emit, remap_temp_index(emit, file, index));
2002 
2003       if (indirect) {
2004          assert(operand0.operandType != VGPU10_OPERAND_TYPE_TEMP);
2005          emit_indirect_register(emit, reg->Indirect.Index);
2006       }
2007    }
2008 }
2009 
2010 
2011 /**
2012  * Emit a resource operand (for use with a SAMPLE instruction).
2013  */
2014 static void
emit_resource_register(struct svga_shader_emitter_v10 * emit,unsigned resource_number)2015 emit_resource_register(struct svga_shader_emitter_v10 *emit,
2016                        unsigned resource_number)
2017 {
2018    VGPU10OperandToken0 operand0;
2019 
2020    check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number);
2021 
2022    /* init */
2023    operand0.value = 0;
2024 
2025    operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
2026    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2027    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
2028    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
2029    operand0.swizzleX = VGPU10_COMPONENT_X;
2030    operand0.swizzleY = VGPU10_COMPONENT_Y;
2031    operand0.swizzleZ = VGPU10_COMPONENT_Z;
2032    operand0.swizzleW = VGPU10_COMPONENT_W;
2033 
2034    emit_dword(emit, operand0.value);
2035    emit_dword(emit, resource_number);
2036 }
2037 
2038 
2039 /**
2040  * Emit a sampler operand (for use with a SAMPLE instruction).
2041  */
2042 static void
emit_sampler_register(struct svga_shader_emitter_v10 * emit,unsigned unit)2043 emit_sampler_register(struct svga_shader_emitter_v10 *emit,
2044                       unsigned unit)
2045 {
2046    VGPU10OperandToken0 operand0;
2047    unsigned sampler_number;
2048 
2049    sampler_number = emit->key.tex[unit].sampler_index;
2050 
2051    if ((emit->shadow_compare_units & (1 << unit)) && emit->use_sampler_state_mapping)
2052       sampler_number++;
2053 
2054    check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number);
2055 
2056    /* init */
2057    operand0.value = 0;
2058 
2059    operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
2060    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2061 
2062    emit_dword(emit, operand0.value);
2063    emit_dword(emit, sampler_number);
2064 }
2065 
2066 
2067 /**
2068  * Emit an operand which reads the IS_FRONT_FACING register.
2069  */
2070 static void
emit_face_register(struct svga_shader_emitter_v10 * emit)2071 emit_face_register(struct svga_shader_emitter_v10 *emit)
2072 {
2073    VGPU10OperandToken0 operand0;
2074    unsigned index = emit->linkage.input_map[emit->fs.face_input_index];
2075 
2076    /* init */
2077    operand0.value = 0;
2078 
2079    operand0.operandType = VGPU10_OPERAND_TYPE_INPUT;
2080    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2081    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
2082    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
2083 
2084    operand0.swizzleX = VGPU10_COMPONENT_X;
2085    operand0.swizzleY = VGPU10_COMPONENT_X;
2086    operand0.swizzleZ = VGPU10_COMPONENT_X;
2087    operand0.swizzleW = VGPU10_COMPONENT_X;
2088 
2089    emit_dword(emit, operand0.value);
2090    emit_dword(emit, index);
2091 }
2092 
2093 
2094 /**
2095  * Emit tokens for the "rasterizer" register used by the SAMPLE_POS
2096  * instruction.
2097  */
2098 static void
emit_rasterizer_register(struct svga_shader_emitter_v10 * emit)2099 emit_rasterizer_register(struct svga_shader_emitter_v10 *emit)
2100 {
2101    VGPU10OperandToken0 operand0;
2102 
2103    /* init */
2104    operand0.value = 0;
2105 
2106    /* No register index for rasterizer index (there's only one) */
2107    operand0.operandType = VGPU10_OPERAND_TYPE_RASTERIZER;
2108    operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
2109    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
2110    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
2111    operand0.swizzleX = VGPU10_COMPONENT_X;
2112    operand0.swizzleY = VGPU10_COMPONENT_Y;
2113    operand0.swizzleZ = VGPU10_COMPONENT_Z;
2114    operand0.swizzleW = VGPU10_COMPONENT_W;
2115 
2116    emit_dword(emit, operand0.value);
2117 }
2118 
2119 
2120 /**
2121  * Emit tokens for the "stream" register used by the
2122  * DCL_STREAM, CUT_STREAM, EMIT_STREAM instructions.
2123  */
2124 static void
emit_stream_register(struct svga_shader_emitter_v10 * emit,unsigned index)2125 emit_stream_register(struct svga_shader_emitter_v10 *emit, unsigned index)
2126 {
2127    VGPU10OperandToken0 operand0;
2128 
2129    /* init */
2130    operand0.value = 0;
2131 
2132    /* No register index for rasterizer index (there's only one) */
2133    operand0.operandType = VGPU10_OPERAND_TYPE_STREAM;
2134    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
2135    operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
2136 
2137    emit_dword(emit, operand0.value);
2138    emit_dword(emit, index);
2139 }
2140 
2141 
2142 /**
2143  * Emit the token for a VGPU10 opcode, with precise parameter.
2144  * \param saturate   clamp result to [0,1]?
2145  */
2146 static void
emit_opcode_precise(struct svga_shader_emitter_v10 * emit,unsigned vgpu10_opcode,boolean saturate,boolean precise)2147 emit_opcode_precise(struct svga_shader_emitter_v10 *emit,
2148                     unsigned vgpu10_opcode, boolean saturate, boolean precise)
2149 {
2150    VGPU10OpcodeToken0 token0;
2151 
2152    token0.value = 0;  /* init all fields to zero */
2153    token0.opcodeType = vgpu10_opcode;
2154    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2155    token0.saturate = saturate;
2156 
2157    /* Mesa's GLSL IR -> TGSI translator will set the TGSI precise flag for
2158     * 'invariant' declarations.  Only set preciseValues=1 if we have SM5.
2159     */
2160    token0.preciseValues = precise && emit->version >= 50;
2161 
2162    emit_dword(emit, token0.value);
2163 
2164    emit->uses_precise_qualifier |= token0.preciseValues;
2165 }
2166 
2167 
2168 /**
2169  * Emit the token for a VGPU10 opcode.
2170  * \param saturate   clamp result to [0,1]?
2171  */
2172 static void
emit_opcode(struct svga_shader_emitter_v10 * emit,unsigned vgpu10_opcode,boolean saturate)2173 emit_opcode(struct svga_shader_emitter_v10 *emit,
2174             unsigned vgpu10_opcode, boolean saturate)
2175 {
2176    emit_opcode_precise(emit, vgpu10_opcode, saturate, FALSE);
2177 }
2178 
2179 
2180 /**
2181  * Emit the token for a VGPU10 resinfo instruction.
2182  * \param modifier   return type modifier, _uint or _rcpFloat.
2183  *                   TODO: We may want to remove this parameter if it will
2184  *                   only ever be used as _uint.
2185  */
2186 static void
emit_opcode_resinfo(struct svga_shader_emitter_v10 * emit,VGPU10_RESINFO_RETURN_TYPE modifier)2187 emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit,
2188                     VGPU10_RESINFO_RETURN_TYPE modifier)
2189 {
2190    VGPU10OpcodeToken0 token0;
2191 
2192    token0.value = 0;  /* init all fields to zero */
2193    token0.opcodeType = VGPU10_OPCODE_RESINFO;
2194    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2195    token0.resinfoReturnType = modifier;
2196 
2197    emit_dword(emit, token0.value);
2198 }
2199 
2200 
2201 /**
2202  * Emit opcode tokens for a texture sample instruction.  Texture instructions
2203  * can be rather complicated (texel offsets, etc) so we have this specialized
2204  * function.
2205  */
2206 static void
emit_sample_opcode(struct svga_shader_emitter_v10 * emit,unsigned vgpu10_opcode,boolean saturate,const int offsets[3])2207 emit_sample_opcode(struct svga_shader_emitter_v10 *emit,
2208                    unsigned vgpu10_opcode, boolean saturate,
2209                    const int offsets[3])
2210 {
2211    VGPU10OpcodeToken0 token0;
2212    VGPU10OpcodeToken1 token1;
2213 
2214    token0.value = 0;  /* init all fields to zero */
2215    token0.opcodeType = vgpu10_opcode;
2216    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2217    token0.saturate = saturate;
2218 
2219    if (offsets[0] || offsets[1] || offsets[2]) {
2220       assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2221       assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2222       assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2223       assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2224       assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2225       assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2226 
2227       token0.extended = 1;
2228       token1.value = 0;
2229       token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS;
2230       token1.offsetU = offsets[0];
2231       token1.offsetV = offsets[1];
2232       token1.offsetW = offsets[2];
2233    }
2234 
2235    emit_dword(emit, token0.value);
2236    if (token0.extended) {
2237       emit_dword(emit, token1.value);
2238    }
2239 }
2240 
2241 
2242 /**
2243  * Emit a DISCARD opcode token.
2244  * If nonzero is set, we'll discard the fragment if the X component is not 0.
2245  * Otherwise, we'll discard the fragment if the X component is 0.
2246  */
2247 static void
emit_discard_opcode(struct svga_shader_emitter_v10 * emit,boolean nonzero)2248 emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero)
2249 {
2250    VGPU10OpcodeToken0 opcode0;
2251 
2252    opcode0.value = 0;
2253    opcode0.opcodeType = VGPU10_OPCODE_DISCARD;
2254    if (nonzero)
2255       opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
2256 
2257    emit_dword(emit, opcode0.value);
2258 }
2259 
2260 
2261 /**
2262  * We need to call this before we begin emitting a VGPU10 instruction.
2263  */
2264 static void
begin_emit_instruction(struct svga_shader_emitter_v10 * emit)2265 begin_emit_instruction(struct svga_shader_emitter_v10 *emit)
2266 {
2267    assert(emit->inst_start_token == 0);
2268    /* Save location of the instruction's VGPU10OpcodeToken0 token.
2269     * Note, we can't save a pointer because it would become invalid if
2270     * we have to realloc the output buffer.
2271     */
2272    emit->inst_start_token = emit_get_num_tokens(emit);
2273 }
2274 
2275 
2276 /**
2277  * We need to call this after we emit the last token of a VGPU10 instruction.
2278  * This function patches in the opcode token's instructionLength field.
2279  */
2280 static void
end_emit_instruction(struct svga_shader_emitter_v10 * emit)2281 end_emit_instruction(struct svga_shader_emitter_v10 *emit)
2282 {
2283    VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
2284    unsigned inst_length;
2285 
2286    assert(emit->inst_start_token > 0);
2287 
2288    if (emit->discard_instruction) {
2289       /* Back up the emit->ptr to where this instruction started so
2290        * that we discard the current instruction.
2291        */
2292       emit->ptr = (char *) (tokens + emit->inst_start_token);
2293    }
2294    else {
2295       /* Compute instruction length and patch that into the start of
2296        * the instruction.
2297        */
2298       inst_length = emit_get_num_tokens(emit) - emit->inst_start_token;
2299 
2300       assert(inst_length > 0);
2301 
2302       tokens[emit->inst_start_token].instructionLength = inst_length;
2303    }
2304 
2305    emit->inst_start_token = 0; /* reset to zero for error checking */
2306    emit->discard_instruction = FALSE;
2307 }
2308 
2309 
2310 /**
2311  * Return index for a free temporary register.
2312  */
2313 static unsigned
get_temp_index(struct svga_shader_emitter_v10 * emit)2314 get_temp_index(struct svga_shader_emitter_v10 *emit)
2315 {
2316    assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS);
2317    return emit->num_shader_temps + emit->internal_temp_count++;
2318 }
2319 
2320 
2321 /**
2322  * Release the temporaries which were generated by get_temp_index().
2323  */
2324 static void
free_temp_indexes(struct svga_shader_emitter_v10 * emit)2325 free_temp_indexes(struct svga_shader_emitter_v10 *emit)
2326 {
2327    emit->internal_temp_count = 0;
2328 }
2329 
2330 
2331 /**
2332  * Create a tgsi_full_src_register.
2333  */
2334 static struct tgsi_full_src_register
make_src_reg(enum tgsi_file_type file,unsigned index)2335 make_src_reg(enum tgsi_file_type file, unsigned index)
2336 {
2337    struct tgsi_full_src_register reg;
2338 
2339    memset(&reg, 0, sizeof(reg));
2340    reg.Register.File = file;
2341    reg.Register.Index = index;
2342    reg.Register.SwizzleX = TGSI_SWIZZLE_X;
2343    reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
2344    reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
2345    reg.Register.SwizzleW = TGSI_SWIZZLE_W;
2346    return reg;
2347 }
2348 
2349 
2350 /**
2351  * Create a tgsi_full_src_register with a swizzle such that all four
2352  * vector components have the same scalar value.
2353  */
2354 static struct tgsi_full_src_register
make_src_scalar_reg(enum tgsi_file_type file,unsigned index,unsigned component)2355 make_src_scalar_reg(enum tgsi_file_type file, unsigned index, unsigned component)
2356 {
2357    struct tgsi_full_src_register reg;
2358 
2359    assert(component >= TGSI_SWIZZLE_X);
2360    assert(component <= TGSI_SWIZZLE_W);
2361 
2362    memset(&reg, 0, sizeof(reg));
2363    reg.Register.File = file;
2364    reg.Register.Index = index;
2365    reg.Register.SwizzleX =
2366    reg.Register.SwizzleY =
2367    reg.Register.SwizzleZ =
2368    reg.Register.SwizzleW = component;
2369    return reg;
2370 }
2371 
2372 
2373 /**
2374  * Create a tgsi_full_src_register for a temporary.
2375  */
2376 static struct tgsi_full_src_register
make_src_temp_reg(unsigned index)2377 make_src_temp_reg(unsigned index)
2378 {
2379    return make_src_reg(TGSI_FILE_TEMPORARY, index);
2380 }
2381 
2382 
2383 /**
2384  * Create a tgsi_full_src_register for a constant.
2385  */
2386 static struct tgsi_full_src_register
make_src_const_reg(unsigned index)2387 make_src_const_reg(unsigned index)
2388 {
2389    return make_src_reg(TGSI_FILE_CONSTANT, index);
2390 }
2391 
2392 
2393 /**
2394  * Create a tgsi_full_src_register for an immediate constant.
2395  */
2396 static struct tgsi_full_src_register
make_src_immediate_reg(unsigned index)2397 make_src_immediate_reg(unsigned index)
2398 {
2399    return make_src_reg(TGSI_FILE_IMMEDIATE, index);
2400 }
2401 
2402 
2403 /**
2404  * Create a tgsi_full_dst_register.
2405  */
2406 static struct tgsi_full_dst_register
make_dst_reg(enum tgsi_file_type file,unsigned index)2407 make_dst_reg(enum tgsi_file_type file, unsigned index)
2408 {
2409    struct tgsi_full_dst_register reg;
2410 
2411    memset(&reg, 0, sizeof(reg));
2412    reg.Register.File = file;
2413    reg.Register.Index = index;
2414    reg.Register.WriteMask = TGSI_WRITEMASK_XYZW;
2415    return reg;
2416 }
2417 
2418 
2419 /**
2420  * Create a tgsi_full_dst_register for a temporary.
2421  */
2422 static struct tgsi_full_dst_register
make_dst_temp_reg(unsigned index)2423 make_dst_temp_reg(unsigned index)
2424 {
2425    return make_dst_reg(TGSI_FILE_TEMPORARY, index);
2426 }
2427 
2428 
2429 /**
2430  * Create a tgsi_full_dst_register for an output.
2431  */
2432 static struct tgsi_full_dst_register
make_dst_output_reg(unsigned index)2433 make_dst_output_reg(unsigned index)
2434 {
2435    return make_dst_reg(TGSI_FILE_OUTPUT, index);
2436 }
2437 
2438 
2439 /**
2440  * Create negated tgsi_full_src_register.
2441  */
2442 static struct tgsi_full_src_register
negate_src(const struct tgsi_full_src_register * reg)2443 negate_src(const struct tgsi_full_src_register *reg)
2444 {
2445    struct tgsi_full_src_register neg = *reg;
2446    neg.Register.Negate = !reg->Register.Negate;
2447    return neg;
2448 }
2449 
2450 /**
2451  * Create absolute value of a tgsi_full_src_register.
2452  */
2453 static struct tgsi_full_src_register
absolute_src(const struct tgsi_full_src_register * reg)2454 absolute_src(const struct tgsi_full_src_register *reg)
2455 {
2456    struct tgsi_full_src_register absolute = *reg;
2457    absolute.Register.Absolute = 1;
2458    return absolute;
2459 }
2460 
2461 
2462 /** Return the named swizzle term from the src register */
2463 static inline unsigned
get_swizzle(const struct tgsi_full_src_register * reg,enum tgsi_swizzle term)2464 get_swizzle(const struct tgsi_full_src_register *reg, enum tgsi_swizzle term)
2465 {
2466    switch (term) {
2467    case TGSI_SWIZZLE_X:
2468       return reg->Register.SwizzleX;
2469    case TGSI_SWIZZLE_Y:
2470       return reg->Register.SwizzleY;
2471    case TGSI_SWIZZLE_Z:
2472       return reg->Register.SwizzleZ;
2473    case TGSI_SWIZZLE_W:
2474       return reg->Register.SwizzleW;
2475    default:
2476       assert(!"Bad swizzle");
2477       return TGSI_SWIZZLE_X;
2478    }
2479 }
2480 
2481 
2482 /**
2483  * Create swizzled tgsi_full_src_register.
2484  */
2485 static struct tgsi_full_src_register
swizzle_src(const struct tgsi_full_src_register * reg,enum tgsi_swizzle swizzleX,enum tgsi_swizzle swizzleY,enum tgsi_swizzle swizzleZ,enum tgsi_swizzle swizzleW)2486 swizzle_src(const struct tgsi_full_src_register *reg,
2487             enum tgsi_swizzle swizzleX, enum tgsi_swizzle swizzleY,
2488             enum tgsi_swizzle swizzleZ, enum tgsi_swizzle swizzleW)
2489 {
2490    struct tgsi_full_src_register swizzled = *reg;
2491    /* Note: we swizzle the current swizzle */
2492    swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX);
2493    swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY);
2494    swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ);
2495    swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW);
2496    return swizzled;
2497 }
2498 
2499 
2500 /**
2501  * Create swizzled tgsi_full_src_register where all the swizzle
2502  * terms are the same.
2503  */
2504 static struct tgsi_full_src_register
scalar_src(const struct tgsi_full_src_register * reg,enum tgsi_swizzle swizzle)2505 scalar_src(const struct tgsi_full_src_register *reg, enum tgsi_swizzle swizzle)
2506 {
2507    struct tgsi_full_src_register swizzled = *reg;
2508    /* Note: we swizzle the current swizzle */
2509    swizzled.Register.SwizzleX =
2510    swizzled.Register.SwizzleY =
2511    swizzled.Register.SwizzleZ =
2512    swizzled.Register.SwizzleW = get_swizzle(reg, swizzle);
2513    return swizzled;
2514 }
2515 
2516 
2517 /**
2518  * Create new tgsi_full_dst_register with writemask.
2519  * \param mask  bitmask of TGSI_WRITEMASK_[XYZW]
2520  */
2521 static struct tgsi_full_dst_register
writemask_dst(const struct tgsi_full_dst_register * reg,unsigned mask)2522 writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask)
2523 {
2524    struct tgsi_full_dst_register masked = *reg;
2525    masked.Register.WriteMask = mask;
2526    return masked;
2527 }
2528 
2529 
2530 /**
2531  * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW.
2532  */
2533 static boolean
same_swizzle_terms(const struct tgsi_full_src_register * reg)2534 same_swizzle_terms(const struct tgsi_full_src_register *reg)
2535 {
2536    return (reg->Register.SwizzleX == reg->Register.SwizzleY &&
2537            reg->Register.SwizzleY == reg->Register.SwizzleZ &&
2538            reg->Register.SwizzleZ == reg->Register.SwizzleW);
2539 }
2540 
2541 
2542 /**
2543  * Search the vector for the value 'x' and return its position.
2544  */
2545 static int
find_imm_in_vec4(const union tgsi_immediate_data vec[4],union tgsi_immediate_data x)2546 find_imm_in_vec4(const union tgsi_immediate_data vec[4],
2547                  union tgsi_immediate_data x)
2548 {
2549    unsigned i;
2550    for (i = 0; i < 4; i++) {
2551       if (vec[i].Int == x.Int)
2552          return i;
2553    }
2554    return -1;
2555 }
2556 
2557 
2558 /**
2559  * Helper used by make_immediate_reg(), make_immediate_reg_4().
2560  */
2561 static int
find_immediate(struct svga_shader_emitter_v10 * emit,union tgsi_immediate_data x,unsigned startIndex)2562 find_immediate(struct svga_shader_emitter_v10 *emit,
2563                union tgsi_immediate_data x, unsigned startIndex)
2564 {
2565    const unsigned endIndex = emit->num_immediates;
2566    unsigned i;
2567 
2568    assert(emit->immediates_emitted);
2569 
2570    /* Search immediates for x, y, z, w */
2571    for (i = startIndex; i < endIndex; i++) {
2572       if (x.Int == emit->immediates[i][0].Int ||
2573           x.Int == emit->immediates[i][1].Int ||
2574           x.Int == emit->immediates[i][2].Int ||
2575           x.Int == emit->immediates[i][3].Int) {
2576          return i;
2577       }
2578    }
2579    /* Should never try to use an immediate value that wasn't pre-declared */
2580    assert(!"find_immediate() failed!");
2581    return -1;
2582 }
2583 
2584 
2585 /**
2586  * As above, but search for a double[2] pair.
2587  */
2588 static int
find_immediate_dbl(struct svga_shader_emitter_v10 * emit,double x,double y)2589 find_immediate_dbl(struct svga_shader_emitter_v10 *emit,
2590                    double x, double y)
2591 {
2592    const unsigned endIndex = emit->num_immediates;
2593    unsigned i;
2594 
2595    assert(emit->immediates_emitted);
2596 
2597    /* Search immediates for x, y, z, w */
2598    for (i = 0; i < endIndex; i++) {
2599       if (x == emit->immediates_dbl[i][0] &&
2600           y == emit->immediates_dbl[i][1]) {
2601          return i;
2602       }
2603    }
2604    /* Should never try to use an immediate value that wasn't pre-declared */
2605    assert(!"find_immediate_dbl() failed!");
2606    return -1;
2607 }
2608 
2609 
2610 
2611 /**
2612  * Return a tgsi_full_src_register for an immediate/literal
2613  * union tgsi_immediate_data[4] value.
2614  * Note: the values must have been previously declared/allocated in
2615  * emit_pre_helpers().  And, all of x,y,z,w must be located in the same
2616  * vec4 immediate.
2617  */
2618 static struct tgsi_full_src_register
make_immediate_reg_4(struct svga_shader_emitter_v10 * emit,const union tgsi_immediate_data imm[4])2619 make_immediate_reg_4(struct svga_shader_emitter_v10 *emit,
2620                      const union tgsi_immediate_data imm[4])
2621 {
2622    struct tgsi_full_src_register reg;
2623    unsigned i;
2624 
2625    for (i = 0; i < emit->num_common_immediates; i++) {
2626       /* search for first component value */
2627       int immpos = find_immediate(emit, imm[0], i);
2628       int x, y, z, w;
2629 
2630       assert(immpos >= 0);
2631 
2632       /* find remaining components within the immediate vector */
2633       x = find_imm_in_vec4(emit->immediates[immpos], imm[0]);
2634       y = find_imm_in_vec4(emit->immediates[immpos], imm[1]);
2635       z = find_imm_in_vec4(emit->immediates[immpos], imm[2]);
2636       w = find_imm_in_vec4(emit->immediates[immpos], imm[3]);
2637 
2638       if (x >=0 &&  y >= 0 && z >= 0 && w >= 0) {
2639          /* found them all */
2640          memset(&reg, 0, sizeof(reg));
2641          reg.Register.File = TGSI_FILE_IMMEDIATE;
2642          reg.Register.Index = immpos;
2643          reg.Register.SwizzleX = x;
2644          reg.Register.SwizzleY = y;
2645          reg.Register.SwizzleZ = z;
2646          reg.Register.SwizzleW = w;
2647          return reg;
2648       }
2649       /* else, keep searching */
2650    }
2651 
2652    assert(!"Failed to find immediate register!");
2653 
2654    /* Just return IMM[0].xxxx */
2655    memset(&reg, 0, sizeof(reg));
2656    reg.Register.File = TGSI_FILE_IMMEDIATE;
2657    return reg;
2658 }
2659 
2660 
2661 /**
2662  * Return a tgsi_full_src_register for an immediate/literal
2663  * union tgsi_immediate_data value of the form {value, value, value, value}.
2664  * \sa make_immediate_reg_4() regarding allowed values.
2665  */
2666 static struct tgsi_full_src_register
make_immediate_reg(struct svga_shader_emitter_v10 * emit,union tgsi_immediate_data value)2667 make_immediate_reg(struct svga_shader_emitter_v10 *emit,
2668                    union tgsi_immediate_data value)
2669 {
2670    struct tgsi_full_src_register reg;
2671    int immpos = find_immediate(emit, value, 0);
2672 
2673    assert(immpos >= 0);
2674 
2675    memset(&reg, 0, sizeof(reg));
2676    reg.Register.File = TGSI_FILE_IMMEDIATE;
2677    reg.Register.Index = immpos;
2678    reg.Register.SwizzleX =
2679    reg.Register.SwizzleY =
2680    reg.Register.SwizzleZ =
2681    reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value);
2682 
2683    return reg;
2684 }
2685 
2686 
2687 /**
2688  * Return a tgsi_full_src_register for an immediate/literal float[4] value.
2689  * \sa make_immediate_reg_4() regarding allowed values.
2690  */
2691 static struct tgsi_full_src_register
make_immediate_reg_float4(struct svga_shader_emitter_v10 * emit,float x,float y,float z,float w)2692 make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit,
2693                           float x, float y, float z, float w)
2694 {
2695    union tgsi_immediate_data imm[4];
2696    imm[0].Float = x;
2697    imm[1].Float = y;
2698    imm[2].Float = z;
2699    imm[3].Float = w;
2700    return make_immediate_reg_4(emit, imm);
2701 }
2702 
2703 
2704 /**
2705  * Return a tgsi_full_src_register for an immediate/literal float value
2706  * of the form {value, value, value, value}.
2707  * \sa make_immediate_reg_4() regarding allowed values.
2708  */
2709 static struct tgsi_full_src_register
make_immediate_reg_float(struct svga_shader_emitter_v10 * emit,float value)2710 make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value)
2711 {
2712    union tgsi_immediate_data imm;
2713    imm.Float = value;
2714    return make_immediate_reg(emit, imm);
2715 }
2716 
2717 
2718 /**
2719  * Return a tgsi_full_src_register for an immediate/literal int[4] vector.
2720  */
2721 static struct tgsi_full_src_register
make_immediate_reg_int4(struct svga_shader_emitter_v10 * emit,int x,int y,int z,int w)2722 make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit,
2723                         int x, int y, int z, int w)
2724 {
2725    union tgsi_immediate_data imm[4];
2726    imm[0].Int = x;
2727    imm[1].Int = y;
2728    imm[2].Int = z;
2729    imm[3].Int = w;
2730    return make_immediate_reg_4(emit, imm);
2731 }
2732 
2733 
2734 /**
2735  * Return a tgsi_full_src_register for an immediate/literal int value
2736  * of the form {value, value, value, value}.
2737  * \sa make_immediate_reg_4() regarding allowed values.
2738  */
2739 static struct tgsi_full_src_register
make_immediate_reg_int(struct svga_shader_emitter_v10 * emit,int value)2740 make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value)
2741 {
2742    union tgsi_immediate_data imm;
2743    imm.Int = value;
2744    return make_immediate_reg(emit, imm);
2745 }
2746 
2747 
2748 static struct tgsi_full_src_register
make_immediate_reg_double(struct svga_shader_emitter_v10 * emit,double value)2749 make_immediate_reg_double(struct svga_shader_emitter_v10 *emit, double value)
2750 {
2751    struct tgsi_full_src_register reg;
2752    int immpos = find_immediate_dbl(emit, value, value);
2753 
2754    assert(immpos >= 0);
2755 
2756    memset(&reg, 0, sizeof(reg));
2757    reg.Register.File = TGSI_FILE_IMMEDIATE;
2758    reg.Register.Index = immpos;
2759    reg.Register.SwizzleX = TGSI_SWIZZLE_X;
2760    reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
2761    reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
2762    reg.Register.SwizzleW = TGSI_SWIZZLE_W;
2763 
2764    return reg;
2765 }
2766 
2767 
2768 /**
2769  * Allocate space for a union tgsi_immediate_data[4] immediate.
2770  * \return  the index/position of the immediate.
2771  */
2772 static unsigned
alloc_immediate_4(struct svga_shader_emitter_v10 * emit,const union tgsi_immediate_data imm[4])2773 alloc_immediate_4(struct svga_shader_emitter_v10 *emit,
2774                   const union tgsi_immediate_data imm[4])
2775 {
2776    unsigned n = emit->num_immediates++;
2777    assert(!emit->immediates_emitted);
2778    assert(n < ARRAY_SIZE(emit->immediates));
2779    emit->immediates[n][0] = imm[0];
2780    emit->immediates[n][1] = imm[1];
2781    emit->immediates[n][2] = imm[2];
2782    emit->immediates[n][3] = imm[3];
2783    return n;
2784 }
2785 
2786 
2787 /**
2788  * Allocate space for a float[4] immediate.
2789  * \return  the index/position of the immediate.
2790  */
2791 static unsigned
alloc_immediate_float4(struct svga_shader_emitter_v10 * emit,float x,float y,float z,float w)2792 alloc_immediate_float4(struct svga_shader_emitter_v10 *emit,
2793                        float x, float y, float z, float w)
2794 {
2795    union tgsi_immediate_data imm[4];
2796    imm[0].Float = x;
2797    imm[1].Float = y;
2798    imm[2].Float = z;
2799    imm[3].Float = w;
2800    return alloc_immediate_4(emit, imm);
2801 }
2802 
2803 
2804 /**
2805  * Allocate space for an int[4] immediate.
2806  * \return  the index/position of the immediate.
2807  */
2808 static unsigned
alloc_immediate_int4(struct svga_shader_emitter_v10 * emit,int x,int y,int z,int w)2809 alloc_immediate_int4(struct svga_shader_emitter_v10 *emit,
2810                        int x, int y, int z, int w)
2811 {
2812    union tgsi_immediate_data imm[4];
2813    imm[0].Int = x;
2814    imm[1].Int = y;
2815    imm[2].Int = z;
2816    imm[3].Int = w;
2817    return alloc_immediate_4(emit, imm);
2818 }
2819 
2820 
2821 static unsigned
alloc_immediate_double2(struct svga_shader_emitter_v10 * emit,double x,double y)2822 alloc_immediate_double2(struct svga_shader_emitter_v10 *emit,
2823                         double x, double y)
2824 {
2825    unsigned n = emit->num_immediates++;
2826    assert(!emit->immediates_emitted);
2827    assert(n < ARRAY_SIZE(emit->immediates));
2828    emit->immediates_dbl[n][0] = x;
2829    emit->immediates_dbl[n][1] = y;
2830    return n;
2831 
2832 }
2833 
2834 
2835 /**
2836  * Allocate a shader input to store a system value.
2837  */
2838 static unsigned
alloc_system_value_index(struct svga_shader_emitter_v10 * emit,unsigned index)2839 alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index)
2840 {
2841    const unsigned n = emit->linkage.input_map_max + 1 + index;
2842    assert(index < ARRAY_SIZE(emit->system_value_indexes));
2843    emit->system_value_indexes[index] = n;
2844    return n;
2845 }
2846 
2847 
2848 /**
2849  * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10.
2850  */
2851 static boolean
emit_vgpu10_immediate(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_immediate * imm)2852 emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit,
2853                       const struct tgsi_full_immediate *imm)
2854 {
2855    /* We don't actually emit any code here.  We just save the
2856     * immediate values and emit them later.
2857     */
2858    alloc_immediate_4(emit, imm->u);
2859    return TRUE;
2860 }
2861 
2862 
2863 /**
2864  * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block
2865  * containing all the immediate values previously allocated
2866  * with alloc_immediate_4().
2867  */
2868 static boolean
emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 * emit)2869 emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit)
2870 {
2871    VGPU10OpcodeToken0 token;
2872 
2873    assert(!emit->immediates_emitted);
2874 
2875    token.value = 0;
2876    token.opcodeType = VGPU10_OPCODE_CUSTOMDATA;
2877    token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER;
2878 
2879    /* Note: no begin/end_emit_instruction() calls */
2880    emit_dword(emit, token.value);
2881    emit_dword(emit, 2 + 4 * emit->num_immediates);
2882    emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates);
2883 
2884    emit->immediates_emitted = TRUE;
2885 
2886    return TRUE;
2887 }
2888 
2889 
2890 /**
2891  * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10
2892  * interpolation mode.
2893  * \return a VGPU10_INTERPOLATION_x value
2894  */
2895 static unsigned
translate_interpolation(const struct svga_shader_emitter_v10 * emit,enum tgsi_interpolate_mode interp,enum tgsi_interpolate_loc interpolate_loc)2896 translate_interpolation(const struct svga_shader_emitter_v10 *emit,
2897                         enum tgsi_interpolate_mode interp,
2898                         enum tgsi_interpolate_loc interpolate_loc)
2899 {
2900    if (interp == TGSI_INTERPOLATE_COLOR) {
2901       interp = emit->key.fs.flatshade ?
2902          TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
2903    }
2904 
2905    switch (interp) {
2906    case TGSI_INTERPOLATE_CONSTANT:
2907       return VGPU10_INTERPOLATION_CONSTANT;
2908    case TGSI_INTERPOLATE_LINEAR:
2909       if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
2910          return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID;
2911       } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
2912                  emit->version >= 41) {
2913          return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE;
2914       } else {
2915          return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE;
2916       }
2917       break;
2918    case TGSI_INTERPOLATE_PERSPECTIVE:
2919       if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
2920          return VGPU10_INTERPOLATION_LINEAR_CENTROID;
2921       } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
2922                  emit->version >= 41) {
2923          return VGPU10_INTERPOLATION_LINEAR_SAMPLE;
2924       } else {
2925          return VGPU10_INTERPOLATION_LINEAR;
2926       }
2927       break;
2928    default:
2929       assert(!"Unexpected interpolation mode");
2930       return VGPU10_INTERPOLATION_CONSTANT;
2931    }
2932 }
2933 
2934 
2935 /**
2936  * Translate a TGSI property to VGPU10.
2937  * Don't emit any instructions yet, only need to gather the primitive property
2938  * information.  The output primitive topology might be changed later. The
2939  * final property instructions will be emitted as part of the pre-helper code.
2940  */
2941 static boolean
emit_vgpu10_property(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_property * prop)2942 emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
2943                      const struct tgsi_full_property *prop)
2944 {
2945    static const VGPU10_PRIMITIVE primType[] = {
2946       VGPU10_PRIMITIVE_POINT,           /* PIPE_PRIM_POINTS */
2947       VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINES */
2948       VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINE_LOOP */
2949       VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINE_STRIP */
2950       VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLES */
2951       VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLE_STRIP */
2952       VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLE_FAN */
2953       VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_QUADS */
2954       VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_QUAD_STRIP */
2955       VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_POLYGON */
2956       VGPU10_PRIMITIVE_LINE_ADJ,        /* PIPE_PRIM_LINES_ADJACENCY */
2957       VGPU10_PRIMITIVE_LINE_ADJ,        /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
2958       VGPU10_PRIMITIVE_TRIANGLE_ADJ,    /* PIPE_PRIM_TRIANGLES_ADJACENCY */
2959       VGPU10_PRIMITIVE_TRIANGLE_ADJ     /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
2960    };
2961 
2962    static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = {
2963       VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST,     /* PIPE_PRIM_POINTS */
2964       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* PIPE_PRIM_LINES */
2965       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* PIPE_PRIM_LINE_LOOP */
2966       VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP,     /* PIPE_PRIM_LINE_STRIP */
2967       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST,  /* PIPE_PRIM_TRIANGLES */
2968       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */
2969       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */
2970       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_QUADS */
2971       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_QUAD_STRIP */
2972       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_POLYGON */
2973       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* PIPE_PRIM_LINES_ADJACENCY */
2974       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
2975       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */
2976       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
2977    };
2978 
2979    static const unsigned inputArraySize[] = {
2980       0,       /* VGPU10_PRIMITIVE_UNDEFINED */
2981       1,       /* VGPU10_PRIMITIVE_POINT */
2982       2,       /* VGPU10_PRIMITIVE_LINE */
2983       3,       /* VGPU10_PRIMITIVE_TRIANGLE */
2984       0,
2985       0,
2986       4,       /* VGPU10_PRIMITIVE_LINE_ADJ */
2987       6        /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */
2988    };
2989 
2990    switch (prop->Property.PropertyName) {
2991    case TGSI_PROPERTY_GS_INPUT_PRIM:
2992       assert(prop->u[0].Data < ARRAY_SIZE(primType));
2993       emit->gs.prim_type = primType[prop->u[0].Data];
2994       assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED);
2995       emit->gs.input_size = inputArraySize[emit->gs.prim_type];
2996       break;
2997 
2998    case TGSI_PROPERTY_GS_OUTPUT_PRIM:
2999       assert(prop->u[0].Data < ARRAY_SIZE(primTopology));
3000       emit->gs.prim_topology = primTopology[prop->u[0].Data];
3001       assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED);
3002       break;
3003 
3004    case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
3005       emit->gs.max_out_vertices = prop->u[0].Data;
3006       break;
3007 
3008    case TGSI_PROPERTY_GS_INVOCATIONS:
3009       emit->gs.invocations = prop->u[0].Data;
3010       break;
3011 
3012    case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
3013    case TGSI_PROPERTY_NEXT_SHADER:
3014    case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
3015       /* no-op */
3016       break;
3017 
3018    case TGSI_PROPERTY_TCS_VERTICES_OUT:
3019       /* This info is already captured in the shader key */
3020       break;
3021 
3022    case TGSI_PROPERTY_TES_PRIM_MODE:
3023       emit->tes.prim_mode = prop->u[0].Data;
3024       break;
3025 
3026    case TGSI_PROPERTY_TES_SPACING:
3027       emit->tes.spacing = prop->u[0].Data;
3028       break;
3029 
3030    case TGSI_PROPERTY_TES_VERTEX_ORDER_CW:
3031       emit->tes.vertices_order_cw = prop->u[0].Data;
3032       break;
3033 
3034    case TGSI_PROPERTY_TES_POINT_MODE:
3035       emit->tes.point_mode = prop->u[0].Data;
3036       break;
3037 
3038    case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH:
3039       emit->cs.block_width = prop->u[0].Data;
3040       break;
3041 
3042    case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT:
3043       emit->cs.block_height = prop->u[0].Data;
3044       break;
3045 
3046    case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH:
3047       emit->cs.block_depth = prop->u[0].Data;
3048       break;
3049 
3050    case TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL:
3051       emit->fs.forceEarlyDepthStencil = TRUE;
3052       break;
3053 
3054    default:
3055       debug_printf("Unexpected TGSI property %s\n",
3056                    tgsi_property_names[prop->Property.PropertyName]);
3057    }
3058 
3059    return TRUE;
3060 }
3061 
3062 
3063 static void
emit_property_instruction(struct svga_shader_emitter_v10 * emit,VGPU10OpcodeToken0 opcode0,unsigned nData,unsigned data)3064 emit_property_instruction(struct svga_shader_emitter_v10 *emit,
3065                           VGPU10OpcodeToken0 opcode0, unsigned nData,
3066                           unsigned data)
3067 {
3068    begin_emit_instruction(emit);
3069    emit_dword(emit, opcode0.value);
3070    if (nData)
3071       emit_dword(emit, data);
3072    end_emit_instruction(emit);
3073 }
3074 
3075 
3076 /**
3077  * Emit property instructions
3078  */
3079 static void
emit_property_instructions(struct svga_shader_emitter_v10 * emit)3080 emit_property_instructions(struct svga_shader_emitter_v10 *emit)
3081 {
3082    VGPU10OpcodeToken0 opcode0;
3083 
3084    assert(emit->unit == PIPE_SHADER_GEOMETRY);
3085 
3086    /* emit input primitive type declaration */
3087    opcode0.value = 0;
3088    opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE;
3089    opcode0.primitive = emit->gs.prim_type;
3090    emit_property_instruction(emit, opcode0, 0, 0);
3091 
3092    /* emit max output vertices */
3093    opcode0.value = 0;
3094    opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT;
3095    emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices);
3096 
3097    if (emit->version >= 50 && emit->gs.invocations > 0) {
3098       opcode0.value = 0;
3099       opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT;
3100       emit_property_instruction(emit, opcode0, 1, emit->gs.invocations);
3101    }
3102 }
3103 
3104 
3105 /**
3106  * A helper function to declare tessellator domain in a hull shader or
3107  * in the domain shader.
3108  */
3109 static void
emit_tessellator_domain(struct svga_shader_emitter_v10 * emit,enum pipe_prim_type prim_mode)3110 emit_tessellator_domain(struct svga_shader_emitter_v10 *emit,
3111                         enum pipe_prim_type prim_mode)
3112 {
3113    VGPU10OpcodeToken0 opcode0;
3114 
3115    opcode0.value = 0;
3116    opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_DOMAIN;
3117    switch (prim_mode) {
3118    case PIPE_PRIM_QUADS:
3119    case PIPE_PRIM_LINES:
3120       opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_QUAD;
3121       break;
3122    case PIPE_PRIM_TRIANGLES:
3123       opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_TRI;
3124       break;
3125    default:
3126       debug_printf("Invalid tessellator prim mode %d\n", prim_mode);
3127       opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_UNDEFINED;
3128    }
3129    begin_emit_instruction(emit);
3130    emit_dword(emit, opcode0.value);
3131    end_emit_instruction(emit);
3132 }
3133 
3134 
3135 /**
3136  * Emit domain shader declarations.
3137  */
3138 static void
emit_domain_shader_declarations(struct svga_shader_emitter_v10 * emit)3139 emit_domain_shader_declarations(struct svga_shader_emitter_v10 *emit)
3140 {
3141    VGPU10OpcodeToken0 opcode0;
3142 
3143    assert(emit->unit == PIPE_SHADER_TESS_EVAL);
3144 
3145    /* Emit the input control point count */
3146    assert(emit->key.tes.vertices_per_patch >= 0 &&
3147           emit->key.tes.vertices_per_patch <= 32);
3148 
3149    opcode0.value = 0;
3150    opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT;
3151    opcode0.controlPointCount = emit->key.tes.vertices_per_patch;
3152    begin_emit_instruction(emit);
3153    emit_dword(emit, opcode0.value);
3154    end_emit_instruction(emit);
3155 
3156    emit_tessellator_domain(emit, emit->tes.prim_mode);
3157 
3158    /* Specify a max for swizzles of the domain point according to the
3159     * tessellator domain type.
3160     */
3161    emit->tes.swizzle_max = emit->tes.prim_mode == PIPE_PRIM_TRIANGLES ?
3162                               TGSI_SWIZZLE_Z : TGSI_SWIZZLE_Y;
3163 }
3164 
3165 
3166 /**
3167  * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed
3168  * to implement some instructions.  We pre-allocate those values here
3169  * in the immediate constant buffer.
3170  */
3171 static void
alloc_common_immediates(struct svga_shader_emitter_v10 * emit)3172 alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
3173 {
3174    unsigned n = 0;
3175 
3176    emit->common_immediate_pos[n++] =
3177       alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f);
3178 
3179    if (emit->info.opcode_count[TGSI_OPCODE_LIT] > 0) {
3180       emit->common_immediate_pos[n++] =
3181          alloc_immediate_float4(emit, 128.0f, -128.0f, 0.0f, 0.0f);
3182    }
3183 
3184    emit->common_immediate_pos[n++] =
3185       alloc_immediate_int4(emit, 0, 1, 2, -1);
3186 
3187    emit->common_immediate_pos[n++] =
3188       alloc_immediate_int4(emit, 3, 4, 5, 6);
3189 
3190    if (emit->info.opcode_count[TGSI_OPCODE_IMSB] > 0 ||
3191        emit->info.opcode_count[TGSI_OPCODE_UMSB] > 0) {
3192       emit->common_immediate_pos[n++] =
3193          alloc_immediate_int4(emit, 31, 0, 0, 0);
3194    }
3195 
3196    if (emit->info.opcode_count[TGSI_OPCODE_UBFE] > 0 ||
3197        emit->info.opcode_count[TGSI_OPCODE_IBFE] > 0 ||
3198        emit->info.opcode_count[TGSI_OPCODE_BFI] > 0) {
3199       emit->common_immediate_pos[n++] =
3200          alloc_immediate_int4(emit, 32, 0, 0, 0);
3201    }
3202 
3203    if (emit->key.vs.attrib_puint_to_snorm) {
3204       emit->common_immediate_pos[n++] =
3205          alloc_immediate_float4(emit, -2.0f, 2.0f, 3.0f, -1.66666f);
3206    }
3207 
3208    if (emit->key.vs.attrib_puint_to_uscaled) {
3209       emit->common_immediate_pos[n++] =
3210          alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f);
3211    }
3212 
3213    if (emit->key.vs.attrib_puint_to_sscaled) {
3214       emit->common_immediate_pos[n++] =
3215          alloc_immediate_int4(emit, 22, 12, 2, 0);
3216 
3217       emit->common_immediate_pos[n++] =
3218          alloc_immediate_int4(emit, 22, 30, 0, 0);
3219    }
3220 
3221    if (emit->vposition.num_prescale > 1) {
3222       unsigned i;
3223       for (i = 0; i < emit->vposition.num_prescale; i+=4) {
3224          emit->common_immediate_pos[n++] =
3225             alloc_immediate_int4(emit, i, i+1, i+2, i+3);
3226       }
3227    }
3228 
3229    emit->immediates_dbl = (double (*)[2]) emit->immediates;
3230 
3231    if (emit->info.opcode_count[TGSI_OPCODE_DNEG] > 0) {
3232       emit->common_immediate_pos[n++] =
3233          alloc_immediate_double2(emit, -1.0, -1.0);
3234    }
3235 
3236    if (emit->info.opcode_count[TGSI_OPCODE_DSQRT] > 0 ||
3237        emit->info.opcode_count[TGSI_OPCODE_DTRUNC] > 0) {
3238       emit->common_immediate_pos[n++] =
3239          alloc_immediate_double2(emit, 0.0, 0.0);
3240       emit->common_immediate_pos[n++] =
3241          alloc_immediate_double2(emit, 1.0, 1.0);
3242    }
3243 
3244    if (emit->info.opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0) {
3245       emit->common_immediate_pos[n++] =
3246          alloc_immediate_float4(emit, 16.0f, -16.0f, 0.0, 0.0);
3247    }
3248 
3249    assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
3250 
3251    unsigned i;
3252 
3253    for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
3254       if (emit->key.tex[i].texel_bias) {
3255          /* Replace 0.0f if more immediate float value is needed */
3256          emit->common_immediate_pos[n++] =
3257             alloc_immediate_float4(emit, 0.0001f, 0.0f, 0.0f, 0.0f);
3258          break;
3259       }
3260    }
3261 
3262    /** TODO: allocate immediates for all possible element byte offset?
3263     */
3264    if (emit->raw_bufs) {
3265       unsigned i;
3266       for (i = 7; i < 12; i+=4) {
3267          emit->common_immediate_pos[n++] =
3268             alloc_immediate_int4(emit, i, (i+1), (i+2), (i+3));
3269       }
3270    }
3271 
3272    if (emit->info.indirect_files &
3273        (1 << TGSI_FILE_IMAGE | 1 << TGSI_FILE_BUFFER)) {
3274       unsigned i;
3275       for (i = 7; i < 8; i+=4) {
3276          emit->common_immediate_pos[n++] =
3277             alloc_immediate_int4(emit, i, (i+1), (i+2), (i+3));
3278       }
3279    }
3280 
3281    assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
3282    emit->num_common_immediates = n;
3283 }
3284 
3285 
3286 /**
3287  * Emit hull shader declarations.
3288 */
3289 static void
emit_hull_shader_declarations(struct svga_shader_emitter_v10 * emit)3290 emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit)
3291 {
3292    VGPU10OpcodeToken0 opcode0;
3293 
3294    /* Emit the input control point count */
3295    assert(emit->key.tcs.vertices_per_patch > 0 &&
3296           emit->key.tcs.vertices_per_patch <= 32);
3297 
3298    opcode0.value = 0;
3299    opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT;
3300    opcode0.controlPointCount = emit->key.tcs.vertices_per_patch;
3301    begin_emit_instruction(emit);
3302    emit_dword(emit, opcode0.value);
3303    end_emit_instruction(emit);
3304 
3305    /* Emit the output control point count */
3306    assert(emit->key.tcs.vertices_out >= 0 && emit->key.tcs.vertices_out <= 32);
3307 
3308    opcode0.value = 0;
3309    opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT;
3310    opcode0.controlPointCount = emit->key.tcs.vertices_out;
3311    begin_emit_instruction(emit);
3312    emit_dword(emit, opcode0.value);
3313    end_emit_instruction(emit);
3314 
3315    /* Emit tessellator domain */
3316    emit_tessellator_domain(emit, emit->key.tcs.prim_mode);
3317 
3318    /* Emit tessellator output primitive */
3319    opcode0.value = 0;
3320    opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE;
3321    if (emit->key.tcs.point_mode) {
3322       opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_POINT;
3323    }
3324    else if (emit->key.tcs.prim_mode == PIPE_PRIM_LINES) {
3325       opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_LINE;
3326    }
3327    else {
3328       assert(emit->key.tcs.prim_mode == PIPE_PRIM_QUADS ||
3329              emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES);
3330 
3331       if (emit->key.tcs.vertices_order_cw)
3332          opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CCW;
3333       else
3334          opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CW;
3335    }
3336    begin_emit_instruction(emit);
3337    emit_dword(emit, opcode0.value);
3338    end_emit_instruction(emit);
3339 
3340    /* Emit tessellator partitioning */
3341    opcode0.value = 0;
3342    opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_PARTITIONING;
3343    switch (emit->key.tcs.spacing) {
3344    case PIPE_TESS_SPACING_FRACTIONAL_ODD:
3345       opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD;
3346       break;
3347    case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
3348       opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN;
3349       break;
3350    case PIPE_TESS_SPACING_EQUAL:
3351       opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_INTEGER;
3352       break;
3353    default:
3354       debug_printf("invalid tessellator spacing %d\n", emit->key.tcs.spacing);
3355       opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_UNDEFINED;
3356    }
3357    begin_emit_instruction(emit);
3358    emit_dword(emit, opcode0.value);
3359    end_emit_instruction(emit);
3360 
3361    alloc_common_immediates(emit);
3362 
3363    /* Declare constant registers */
3364    emit_constant_declaration(emit);
3365 
3366    /* Declare samplers and resources */
3367    emit_sampler_declarations(emit);
3368    emit_resource_declarations(emit);
3369 
3370    /* Declare images */
3371    emit_image_declarations(emit);
3372 
3373    /* Declare shader buffers */
3374    emit_shader_buf_declarations(emit);
3375 
3376    /* Declare atomic buffers */
3377    emit_atomic_buf_declarations(emit);
3378 
3379    int nVertices = emit->key.tcs.vertices_per_patch;
3380    emit->tcs.imm_index =
3381       alloc_immediate_int4(emit, nVertices, nVertices, nVertices, 0);
3382 
3383    /* Now, emit the constant block containing all the immediates
3384     * declared by shader, as well as the extra ones seen above.
3385     */
3386    emit_vgpu10_immediates_block(emit);
3387 
3388 }
3389 
3390 
3391 /**
3392  * A helper function to determine if control point phase is needed.
3393  * Returns TRUE if there is control point output.
3394  */
3395 static boolean
needs_control_point_phase(struct svga_shader_emitter_v10 * emit)3396 needs_control_point_phase(struct svga_shader_emitter_v10 *emit)
3397 {
3398    unsigned i;
3399 
3400    assert(emit->unit == PIPE_SHADER_TESS_CTRL);
3401 
3402    /* If output control point count does not match the input count,
3403     * we need a control point phase to explicitly set the output control
3404     * points.
3405     */
3406    if ((emit->key.tcs.vertices_per_patch != emit->key.tcs.vertices_out) &&
3407        emit->key.tcs.vertices_out)
3408       return TRUE;
3409 
3410    for (i = 0; i < emit->info.num_outputs; i++) {
3411       switch (emit->info.output_semantic_name[i]) {
3412       case TGSI_SEMANTIC_PATCH:
3413       case TGSI_SEMANTIC_TESSOUTER:
3414       case TGSI_SEMANTIC_TESSINNER:
3415          break;
3416       default:
3417          return TRUE;
3418       }
3419    }
3420    return FALSE;
3421 }
3422 
3423 
3424 /**
3425  * A helper function to add shader signature for passthrough control point
3426  * phase. This signature is also generated for passthrough control point
3427  * phase from HLSL compiler and is needed by Metal Renderer.
3428  */
3429 static void
emit_passthrough_control_point_signature(struct svga_shader_emitter_v10 * emit)3430 emit_passthrough_control_point_signature(struct svga_shader_emitter_v10 *emit)
3431 {
3432    struct svga_shader_signature *sgn = &emit->signature;
3433    SVGA3dDXShaderSignatureEntry *sgnEntry;
3434    unsigned i;
3435 
3436    for (i = 0; i < emit->info.num_inputs; i++) {
3437       unsigned index = emit->linkage.input_map[i];
3438       enum tgsi_semantic sem_name = emit->info.input_semantic_name[i];
3439 
3440       sgnEntry = &sgn->inputs[sgn->header.numInputSignatures++];
3441 
3442       set_shader_signature_entry(sgnEntry, index,
3443                                  tgsi_semantic_to_sgn_name[sem_name],
3444                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3445                                  SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3446                                  SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3447 
3448       sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++];
3449 
3450       set_shader_signature_entry(sgnEntry, i,
3451                                  tgsi_semantic_to_sgn_name[sem_name],
3452                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3453                                  SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3454                                  SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3455    }
3456 }
3457 
3458 
3459 /**
3460  * A helper function to emit an instruction to start the control point phase
3461  * in the hull shader.
3462  */
3463 static void
emit_control_point_phase_instruction(struct svga_shader_emitter_v10 * emit)3464 emit_control_point_phase_instruction(struct svga_shader_emitter_v10 *emit)
3465 {
3466    VGPU10OpcodeToken0 opcode0;
3467 
3468    opcode0.value = 0;
3469    opcode0.opcodeType = VGPU10_OPCODE_HS_CONTROL_POINT_PHASE;
3470    begin_emit_instruction(emit);
3471    emit_dword(emit, opcode0.value);
3472    end_emit_instruction(emit);
3473 }
3474 
3475 
3476 /**
3477  * Start the hull shader control point phase
3478  */
3479 static boolean
emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 * emit)3480 emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit)
3481 {
3482    /* If there is no control point output, skip the control point phase. */
3483    if (!needs_control_point_phase(emit)) {
3484       if (!emit->key.tcs.vertices_out) {
3485          /**
3486           * If the tcs does not explicitly generate any control point output
3487           * and the tes does not use any input control point, then
3488           * emit an empty control point phase with zero output control
3489           * point count.
3490           */
3491          emit_control_point_phase_instruction(emit);
3492 
3493          /**
3494           * Since this is an empty control point phase, we will need to
3495           * add input signatures when we parse the tcs again in the
3496           * patch constant phase.
3497           */
3498          emit->tcs.fork_phase_add_signature = TRUE;
3499       }
3500       else {
3501          /**
3502           * Before skipping the control point phase, add the signature for
3503           * the passthrough control point.
3504           */
3505          emit_passthrough_control_point_signature(emit);
3506       }
3507       return FALSE;
3508    }
3509 
3510    /* Start the control point phase in the hull shader */
3511    emit_control_point_phase_instruction(emit);
3512 
3513    /* Declare the output control point ID */
3514    if (emit->tcs.invocation_id_sys_index == INVALID_INDEX) {
3515       /* Add invocation id declaration if it does not exist */
3516       emit->tcs.invocation_id_sys_index = emit->info.num_system_values + 1;
3517    }
3518 
3519    emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3520                           VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID,
3521                           VGPU10_OPERAND_INDEX_0D,
3522                           0, 1,
3523                           VGPU10_NAME_UNDEFINED,
3524                           VGPU10_OPERAND_0_COMPONENT, 0,
3525                           0,
3526                           VGPU10_INTERPOLATION_CONSTANT, TRUE,
3527                           SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
3528 
3529    if (emit->tcs.prim_id_index != INVALID_INDEX) {
3530       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3531                              VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
3532                              VGPU10_OPERAND_INDEX_0D,
3533                              0, 1,
3534                              VGPU10_NAME_UNDEFINED,
3535                              VGPU10_OPERAND_0_COMPONENT,
3536                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
3537                              0,
3538                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
3539                              SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
3540    }
3541 
3542    return TRUE;
3543 }
3544 
3545 
3546 /**
3547  * Start the hull shader patch constant phase and
3548  * do the second pass of the tcs translation and emit
3549  * the relevant declarations and instructions for this phase.
3550  */
3551 static boolean
emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 * emit,struct tgsi_parse_context * parse)3552 emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 *emit,
3553                                       struct tgsi_parse_context *parse)
3554 {
3555    unsigned inst_number = 0;
3556    boolean ret = TRUE;
3557    VGPU10OpcodeToken0 opcode0;
3558 
3559    emit->skip_instruction = FALSE;
3560 
3561    /* Start the patch constant phase */
3562    opcode0.value = 0;
3563    opcode0.opcodeType = VGPU10_OPCODE_HS_FORK_PHASE;
3564    begin_emit_instruction(emit);
3565    emit_dword(emit, opcode0.value);
3566    end_emit_instruction(emit);
3567 
3568    /* Set the current phase to patch constant phase */
3569    emit->tcs.control_point_phase = FALSE;
3570 
3571    if (emit->tcs.prim_id_index != INVALID_INDEX) {
3572       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3573                              VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
3574                              VGPU10_OPERAND_INDEX_0D,
3575                              0, 1,
3576                              VGPU10_NAME_UNDEFINED,
3577                              VGPU10_OPERAND_0_COMPONENT,
3578                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
3579                              0,
3580                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
3581                              SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
3582    }
3583 
3584    /* Emit declarations for this phase */
3585    emit->index_range.required =
3586       emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE;
3587    emit_tcs_input_declarations(emit);
3588 
3589    if (emit->index_range.start_index != INVALID_INDEX) {
3590       emit_index_range_declaration(emit);
3591    }
3592 
3593    emit->index_range.required =
3594       emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE;
3595    emit_tcs_output_declarations(emit);
3596 
3597    if (emit->index_range.start_index != INVALID_INDEX) {
3598       emit_index_range_declaration(emit);
3599    }
3600    emit->index_range.required = FALSE;
3601 
3602    emit_temporaries_declaration(emit);
3603 
3604    /* Reset the token position to the first instruction token
3605     * in preparation for the second pass of the shader
3606     */
3607    parse->Position = emit->tcs.instruction_token_pos;
3608 
3609    while (!tgsi_parse_end_of_tokens(parse)) {
3610       tgsi_parse_token(parse);
3611 
3612       assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
3613       ret = emit_vgpu10_instruction(emit, inst_number++,
3614                                     &parse->FullToken.FullInstruction);
3615 
3616       /* Usually this applies to TCS only. If shader is reading output of
3617        * patch constant in fork phase, we should reemit all instructions
3618        * which are writting into output of patch constant in fork phase
3619        * to store results into temporaries.
3620        */
3621       assert(!(emit->reemit_instruction && emit->reemit_rawbuf_instruction));
3622       if (emit->reemit_instruction) {
3623          assert(emit->unit == PIPE_SHADER_TESS_CTRL);
3624          ret = emit_vgpu10_instruction(emit, inst_number,
3625                                        &parse->FullToken.FullInstruction);
3626       } else if (emit->reemit_rawbuf_instruction) {
3627          ret = emit_rawbuf_instruction(emit, inst_number,
3628                                        &parse->FullToken.FullInstruction);
3629       }
3630 
3631       if (!ret)
3632          return FALSE;
3633    }
3634 
3635    return TRUE;
3636 }
3637 
3638 
3639 /**
3640  * Emit the thread group declaration for compute shader.
3641  */
3642 static void
emit_compute_shader_declarations(struct svga_shader_emitter_v10 * emit)3643 emit_compute_shader_declarations(struct svga_shader_emitter_v10 *emit)
3644 {
3645    VGPU10OpcodeToken0 opcode0;
3646 
3647    opcode0.value = 0;
3648    opcode0.opcodeType = VGPU10_OPCODE_DCL_THREAD_GROUP;
3649    begin_emit_instruction(emit);
3650    emit_dword(emit, opcode0.value);
3651    emit_dword(emit, emit->cs.block_width);
3652    emit_dword(emit, emit->cs.block_height);
3653    emit_dword(emit, emit->cs.block_depth);
3654    end_emit_instruction(emit);
3655 }
3656 
3657 
3658 /**
3659  * Emit index range declaration.
3660  */
3661 static boolean
emit_index_range_declaration(struct svga_shader_emitter_v10 * emit)3662 emit_index_range_declaration(struct svga_shader_emitter_v10 *emit)
3663 {
3664    if (emit->version < 50)
3665       return TRUE;
3666 
3667    assert(emit->index_range.start_index != INVALID_INDEX);
3668    assert(emit->index_range.count != 0);
3669    assert(emit->index_range.required);
3670    assert(emit->index_range.operandType != VGPU10_NUM_OPERANDS);
3671    assert(emit->index_range.dim != 0);
3672    assert(emit->index_range.size != 0);
3673 
3674    VGPU10OpcodeToken0 opcode0;
3675    VGPU10OperandToken0 operand0;
3676 
3677    opcode0.value = 0;
3678    opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEX_RANGE;
3679 
3680    operand0.value = 0;
3681    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
3682    operand0.indexDimension = emit->index_range.dim;
3683    operand0.operandType = emit->index_range.operandType;
3684    operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
3685    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3686 
3687    if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D)
3688       operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3689 
3690    begin_emit_instruction(emit);
3691    emit_dword(emit, opcode0.value);
3692    emit_dword(emit, operand0.value);
3693 
3694    if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D) {
3695       emit_dword(emit, emit->index_range.size);
3696       emit_dword(emit, emit->index_range.start_index);
3697       emit_dword(emit, emit->index_range.count);
3698    }
3699    else {
3700       emit_dword(emit, emit->index_range.start_index);
3701       emit_dword(emit, emit->index_range.count);
3702    }
3703 
3704    end_emit_instruction(emit);
3705 
3706    /* Reset fields in emit->index_range struct except
3707     * emit->index_range.required which will be reset afterwards
3708     */
3709    emit->index_range.count = 0;
3710    emit->index_range.operandType = VGPU10_NUM_OPERANDS;
3711    emit->index_range.start_index = INVALID_INDEX;
3712    emit->index_range.size = 0;
3713    emit->index_range.dim = 0;
3714 
3715    return TRUE;
3716 }
3717 
3718 
3719 /**
3720  * Emit a vgpu10 declaration "instruction".
3721  * \param index  the register index
3722  * \param size   array size of the operand. In most cases, it is 1,
3723  *               but for inputs to geometry shader, the array size varies
3724  *               depending on the primitive type.
3725  */
3726 static void
emit_decl_instruction(struct svga_shader_emitter_v10 * emit,VGPU10OpcodeToken0 opcode0,VGPU10OperandToken0 operand0,VGPU10NameToken name_token,unsigned index,unsigned size)3727 emit_decl_instruction(struct svga_shader_emitter_v10 *emit,
3728                       VGPU10OpcodeToken0 opcode0,
3729                       VGPU10OperandToken0 operand0,
3730                       VGPU10NameToken name_token,
3731                       unsigned index, unsigned size)
3732 {
3733    assert(opcode0.opcodeType);
3734    assert(operand0.mask ||
3735           (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT) ||
3736           (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH) ||
3737           (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK) ||
3738           (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) ||
3739           (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) ||
3740           (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID) ||
3741           (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK) ||
3742           (operand0.operandType == VGPU10_OPERAND_TYPE_STREAM));
3743 
3744    begin_emit_instruction(emit);
3745    emit_dword(emit, opcode0.value);
3746 
3747    emit_dword(emit, operand0.value);
3748 
3749    if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) {
3750       /* Next token is the index of the register to declare */
3751       emit_dword(emit, index);
3752    }
3753    else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) {
3754       /* Next token is the size of the register */
3755       emit_dword(emit, size);
3756 
3757       /* Followed by the index of the register */
3758       emit_dword(emit, index);
3759    }
3760 
3761    if (name_token.value) {
3762       emit_dword(emit, name_token.value);
3763    }
3764 
3765    end_emit_instruction(emit);
3766 }
3767 
3768 
3769 /**
3770  * Emit the declaration for a shader input.
3771  * \param opcodeType  opcode type, one of VGPU10_OPCODE_DCL_INPUTx
3772  * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x
3773  * \param dim         index dimension
3774  * \param index       the input register index
3775  * \param size        array size of the operand. In most cases, it is 1,
3776  *                    but for inputs to geometry shader, the array size varies
3777  *                    depending on the primitive type. For tessellation control
3778  *                    shader, the array size is the vertex count per patch.
3779  * \param name        one of VGPU10_NAME_x
3780  * \parma numComp     number of components
3781  * \param selMode     component selection mode
3782  * \param usageMask   bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
3783  * \param interpMode  interpolation mode
3784  */
3785 static void
emit_input_declaration(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcodeType,VGPU10_OPERAND_TYPE operandType,VGPU10_OPERAND_INDEX_DIMENSION dim,unsigned index,unsigned size,VGPU10_SYSTEM_NAME name,VGPU10_OPERAND_NUM_COMPONENTS numComp,VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,unsigned usageMask,VGPU10_INTERPOLATION_MODE interpMode,boolean addSignature,SVGA3dDXSignatureSemanticName sgnName)3786 emit_input_declaration(struct svga_shader_emitter_v10 *emit,
3787                        VGPU10_OPCODE_TYPE opcodeType,
3788                        VGPU10_OPERAND_TYPE operandType,
3789                        VGPU10_OPERAND_INDEX_DIMENSION dim,
3790                        unsigned index, unsigned size,
3791                        VGPU10_SYSTEM_NAME name,
3792                        VGPU10_OPERAND_NUM_COMPONENTS numComp,
3793                        VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,
3794                        unsigned usageMask,
3795                        VGPU10_INTERPOLATION_MODE interpMode,
3796                        boolean addSignature,
3797                        SVGA3dDXSignatureSemanticName sgnName)
3798 {
3799    VGPU10OpcodeToken0 opcode0;
3800    VGPU10OperandToken0 operand0;
3801    VGPU10NameToken name_token;
3802 
3803    assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
3804    assert(opcodeType == VGPU10_OPCODE_DCL_INPUT ||
3805           opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV ||
3806           opcodeType == VGPU10_OPCODE_DCL_INPUT_SGV ||
3807           opcodeType == VGPU10_OPCODE_DCL_INPUT_PS ||
3808           opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SIV ||
3809           opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV);
3810    assert(operandType == VGPU10_OPERAND_TYPE_INPUT ||
3811           operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID ||
3812           operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK ||
3813           operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID ||
3814           operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID ||
3815           operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT ||
3816           operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT ||
3817           operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT ||
3818           operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID ||
3819           operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID ||
3820           operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP);
3821 
3822    assert(numComp <= VGPU10_OPERAND_4_COMPONENT);
3823    assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
3824    assert(dim <= VGPU10_OPERAND_INDEX_3D);
3825    assert(name == VGPU10_NAME_UNDEFINED ||
3826           name == VGPU10_NAME_POSITION ||
3827           name == VGPU10_NAME_INSTANCE_ID ||
3828           name == VGPU10_NAME_VERTEX_ID ||
3829           name == VGPU10_NAME_PRIMITIVE_ID ||
3830           name == VGPU10_NAME_IS_FRONT_FACE ||
3831           name == VGPU10_NAME_SAMPLE_INDEX ||
3832           name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
3833           name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX);
3834 
3835    assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED ||
3836           interpMode == VGPU10_INTERPOLATION_CONSTANT ||
3837           interpMode == VGPU10_INTERPOLATION_LINEAR ||
3838           interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID ||
3839           interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE ||
3840           interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID ||
3841           interpMode == VGPU10_INTERPOLATION_LINEAR_SAMPLE ||
3842           interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE);
3843 
3844    check_register_index(emit, opcodeType, index);
3845 
3846    opcode0.value = operand0.value = name_token.value = 0;
3847 
3848    opcode0.opcodeType = opcodeType;
3849    opcode0.interpolationMode = interpMode;
3850 
3851    operand0.operandType = operandType;
3852    operand0.numComponents = numComp;
3853    operand0.selectionMode = selMode;
3854    operand0.mask = usageMask;
3855    operand0.indexDimension = dim;
3856    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3857    if (dim == VGPU10_OPERAND_INDEX_2D)
3858       operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3859 
3860    name_token.name = name;
3861 
3862    emit_decl_instruction(emit, opcode0, operand0, name_token, index, size);
3863 
3864    if (addSignature) {
3865       struct svga_shader_signature *sgn = &emit->signature;
3866       if (operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT) {
3867          /* Set patch constant signature */
3868          SVGA3dDXShaderSignatureEntry *sgnEntry =
3869             &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
3870          set_shader_signature_entry(sgnEntry, index,
3871                                     sgnName, usageMask,
3872                                     SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3873                                     SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3874 
3875       } else if (operandType == VGPU10_OPERAND_TYPE_INPUT ||
3876                  operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT) {
3877          /* Set input signature */
3878          SVGA3dDXShaderSignatureEntry *sgnEntry =
3879             &sgn->inputs[sgn->header.numInputSignatures++];
3880          set_shader_signature_entry(sgnEntry, index,
3881                                     sgnName, usageMask,
3882                                     SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3883                                     SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3884       }
3885    }
3886 
3887    if (emit->index_range.required) {
3888       /* Here, index_range declaration is only applicable for opcodeType
3889        * VGPU10_OPCODE_DCL_INPUT and VGPU10_OPCODE_DCL_INPUT_PS and
3890        * for operandType VGPU10_OPERAND_TYPE_INPUT,
3891        * VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT and
3892        * VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT.
3893        */
3894       if ((opcodeType != VGPU10_OPCODE_DCL_INPUT &&
3895            opcodeType != VGPU10_OPCODE_DCL_INPUT_PS) ||
3896           (operandType != VGPU10_OPERAND_TYPE_INPUT &&
3897            operandType != VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT &&
3898            operandType != VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT)) {
3899          if (emit->index_range.start_index != INVALID_INDEX) {
3900             emit_index_range_declaration(emit);
3901          }
3902          return;
3903       }
3904 
3905       if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) {
3906          /* Need record new index_range */
3907          emit->index_range.count = 1;
3908          emit->index_range.operandType = operandType;
3909          emit->index_range.start_index = index;
3910          emit->index_range.size = size;
3911          emit->index_range.dim = dim;
3912       }
3913       else if (index !=
3914                (emit->index_range.start_index + emit->index_range.count) ||
3915                emit->index_range.operandType != operandType) {
3916          /* Input index is not contiguous with index range or operandType is
3917           * different from index range's operandType. We need to emit current
3918           * index_range first and then start recording next index range.
3919           */
3920          emit_index_range_declaration(emit);
3921 
3922          emit->index_range.count = 1;
3923          emit->index_range.operandType = operandType;
3924          emit->index_range.start_index = index;
3925          emit->index_range.size = size;
3926          emit->index_range.dim = dim;
3927       }
3928       else if (emit->index_range.operandType == operandType) {
3929          /* Since input index is contiguous with index range and operandType
3930           * is same as index range's operandType, increment index range count.
3931           */
3932          emit->index_range.count++;
3933       }
3934    }
3935 }
3936 
3937 
3938 /**
3939  * Emit the declaration for a shader output.
3940  * \param type  one of VGPU10_OPCODE_DCL_OUTPUTx
3941  * \param index  the output register index
3942  * \param name  one of VGPU10_NAME_x
3943  * \param usageMask  bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
3944  */
3945 static void
emit_output_declaration(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE type,unsigned index,VGPU10_SYSTEM_NAME name,unsigned writemask,boolean addSignature,SVGA3dDXSignatureSemanticName sgnName)3946 emit_output_declaration(struct svga_shader_emitter_v10 *emit,
3947                         VGPU10_OPCODE_TYPE type, unsigned index,
3948                         VGPU10_SYSTEM_NAME name,
3949                         unsigned writemask,
3950                         boolean addSignature,
3951                         SVGA3dDXSignatureSemanticName sgnName)
3952 {
3953    VGPU10OpcodeToken0 opcode0;
3954    VGPU10OperandToken0 operand0;
3955    VGPU10NameToken name_token;
3956 
3957    assert(writemask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
3958    assert(type == VGPU10_OPCODE_DCL_OUTPUT ||
3959           type == VGPU10_OPCODE_DCL_OUTPUT_SGV ||
3960           type == VGPU10_OPCODE_DCL_OUTPUT_SIV);
3961    assert(name == VGPU10_NAME_UNDEFINED ||
3962           name == VGPU10_NAME_POSITION ||
3963           name == VGPU10_NAME_PRIMITIVE_ID ||
3964           name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
3965           name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX ||
3966           name == VGPU10_NAME_CLIP_DISTANCE);
3967 
3968    check_register_index(emit, type, index);
3969 
3970    opcode0.value = operand0.value = name_token.value = 0;
3971 
3972    opcode0.opcodeType = type;
3973    operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
3974    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
3975    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
3976    operand0.mask = writemask;
3977    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
3978    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3979 
3980    name_token.name = name;
3981 
3982    emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
3983 
3984    /* Capture output signature */
3985    if (addSignature) {
3986       struct svga_shader_signature *sgn = &emit->signature;
3987       SVGA3dDXShaderSignatureEntry *sgnEntry =
3988          &sgn->outputs[sgn->header.numOutputSignatures++];
3989       set_shader_signature_entry(sgnEntry, index,
3990                                  sgnName, writemask,
3991                                  SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3992                                  SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3993    }
3994 
3995    if (emit->index_range.required) {
3996       /* Here, index_range declaration is only applicable for opcodeType
3997        * VGPU10_OPCODE_DCL_OUTPUT and for operandType
3998        * VGPU10_OPERAND_TYPE_OUTPUT.
3999        */
4000       if (type != VGPU10_OPCODE_DCL_OUTPUT) {
4001          if (emit->index_range.start_index != INVALID_INDEX) {
4002             emit_index_range_declaration(emit);
4003          }
4004          return;
4005       }
4006 
4007       if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) {
4008          /* Need record new index_range */
4009          emit->index_range.count = 1;
4010          emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
4011          emit->index_range.start_index = index;
4012          emit->index_range.size = 1;
4013          emit->index_range.dim = VGPU10_OPERAND_INDEX_1D;
4014       }
4015       else if (index !=
4016                (emit->index_range.start_index + emit->index_range.count)) {
4017          /* Output index is not contiguous with index range. We need to
4018           * emit current index_range first and then start recording next
4019           * index range.
4020           */
4021          emit_index_range_declaration(emit);
4022 
4023          emit->index_range.count = 1;
4024          emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
4025          emit->index_range.start_index = index;
4026          emit->index_range.size = 1;
4027          emit->index_range.dim = VGPU10_OPERAND_INDEX_1D;
4028       }
4029       else {
4030          /* Since output index is contiguous with index range, increment
4031           * index range count.
4032           */
4033          emit->index_range.count++;
4034       }
4035    }
4036 }
4037 
4038 
4039 /**
4040  * Emit the declaration for the fragment depth output.
4041  */
4042 static void
emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 * emit)4043 emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit)
4044 {
4045    VGPU10OpcodeToken0 opcode0;
4046    VGPU10OperandToken0 operand0;
4047    VGPU10NameToken name_token;
4048 
4049    assert(emit->unit == PIPE_SHADER_FRAGMENT);
4050 
4051    opcode0.value = operand0.value = name_token.value = 0;
4052 
4053    opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
4054    operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
4055    operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
4056    operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
4057    operand0.mask = 0;
4058 
4059    emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
4060 }
4061 
4062 
4063 /**
4064  * Emit the declaration for the fragment sample mask/coverage output.
4065  */
4066 static void
emit_samplemask_output_declaration(struct svga_shader_emitter_v10 * emit)4067 emit_samplemask_output_declaration(struct svga_shader_emitter_v10 *emit)
4068 {
4069    VGPU10OpcodeToken0 opcode0;
4070    VGPU10OperandToken0 operand0;
4071    VGPU10NameToken name_token;
4072 
4073    assert(emit->unit == PIPE_SHADER_FRAGMENT);
4074    assert(emit->version >= 41);
4075 
4076    opcode0.value = operand0.value = name_token.value = 0;
4077 
4078    opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
4079    operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
4080    operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
4081    operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
4082    operand0.mask = 0;
4083 
4084    emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
4085 }
4086 
4087 
4088 /**
4089  * Emit output declarations for fragment shader.
4090  */
4091 static void
emit_fs_output_declarations(struct svga_shader_emitter_v10 * emit)4092 emit_fs_output_declarations(struct svga_shader_emitter_v10 *emit)
4093 {
4094    unsigned int i;
4095 
4096    for (i = 0; i < emit->info.num_outputs; i++) {
4097       /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
4098       const enum tgsi_semantic semantic_name =
4099          emit->info.output_semantic_name[i];
4100       const unsigned semantic_index = emit->info.output_semantic_index[i];
4101       unsigned index = i;
4102 
4103       if (semantic_name == TGSI_SEMANTIC_COLOR) {
4104          assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index));
4105 
4106          emit->fs.color_out_index[semantic_index] = index;
4107 
4108          emit->fs.num_color_outputs = MAX2(emit->fs.num_color_outputs,
4109                                               index + 1);
4110 
4111          /* The semantic index is the shader's color output/buffer index */
4112          emit_output_declaration(emit,
4113                                  VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
4114                                  VGPU10_NAME_UNDEFINED,
4115                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4116                                  TRUE,
4117                                  map_tgsi_semantic_to_sgn_name(semantic_name));
4118 
4119          if (semantic_index == 0) {
4120             if (emit->key.fs.write_color0_to_n_cbufs > 1) {
4121                /* Emit declarations for the additional color outputs
4122                 * for broadcasting.
4123                 */
4124                unsigned j;
4125                for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) {
4126                   /* Allocate a new output index */
4127                   unsigned idx = emit->info.num_outputs + j - 1;
4128                   emit->fs.color_out_index[j] = idx;
4129                   emit_output_declaration(emit,
4130                                         VGPU10_OPCODE_DCL_OUTPUT, idx,
4131                                         VGPU10_NAME_UNDEFINED,
4132                                         VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4133                                         TRUE,
4134                                         map_tgsi_semantic_to_sgn_name(semantic_name));
4135                   emit->info.output_semantic_index[idx] = j;
4136                }
4137 
4138                emit->fs.num_color_outputs =
4139                      emit->key.fs.write_color0_to_n_cbufs;
4140             }
4141          }
4142       }
4143       else if (semantic_name == TGSI_SEMANTIC_POSITION) {
4144          /* Fragment depth output */
4145          emit_fragdepth_output_declaration(emit);
4146       }
4147       else if (semantic_name == TGSI_SEMANTIC_SAMPLEMASK) {
4148          /* Sample mask output */
4149          emit_samplemask_output_declaration(emit);
4150       }
4151       else {
4152          assert(!"Bad output semantic name");
4153       }
4154    }
4155 }
4156 
4157 
4158 /**
4159  * Emit common output declaration for vertex processing.
4160  */
4161 static void
emit_vertex_output_declaration(struct svga_shader_emitter_v10 * emit,unsigned index,unsigned writemask,boolean addSignature)4162 emit_vertex_output_declaration(struct svga_shader_emitter_v10 *emit,
4163                                unsigned index, unsigned writemask,
4164                                boolean addSignature)
4165 {
4166    const enum tgsi_semantic semantic_name =
4167          emit->info.output_semantic_name[index];
4168    const unsigned semantic_index = emit->info.output_semantic_index[index];
4169    unsigned name, type;
4170    unsigned final_mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
4171 
4172    assert(emit->unit != PIPE_SHADER_FRAGMENT &&
4173           emit->unit != PIPE_SHADER_COMPUTE);
4174 
4175    switch (semantic_name) {
4176    case TGSI_SEMANTIC_POSITION:
4177       if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4178          /* position will be declared in control point only */
4179          assert(emit->tcs.control_point_phase);
4180          type = VGPU10_OPCODE_DCL_OUTPUT;
4181          name = VGPU10_NAME_UNDEFINED;
4182          emit_output_declaration(emit, type, index, name, final_mask, TRUE,
4183                                  SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4184          return;
4185       }
4186       else {
4187          type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
4188          name = VGPU10_NAME_POSITION;
4189       }
4190       /* Save the index of the vertex position output register */
4191       emit->vposition.out_index = index;
4192       break;
4193    case TGSI_SEMANTIC_CLIPDIST:
4194       type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
4195       name = VGPU10_NAME_CLIP_DISTANCE;
4196       /* save the starting index of the clip distance output register */
4197       if (semantic_index == 0)
4198          emit->clip_dist_out_index = index;
4199       final_mask = apply_clip_plane_mask(emit, writemask, semantic_index);
4200       if (final_mask == 0x0)
4201          return; /* discard this do-nothing declaration */
4202       break;
4203    case TGSI_SEMANTIC_CLIPVERTEX:
4204       type = VGPU10_OPCODE_DCL_OUTPUT;
4205       name = VGPU10_NAME_UNDEFINED;
4206       emit->clip_vertex_out_index = index;
4207       break;
4208    default:
4209       /* generic output */
4210       type = VGPU10_OPCODE_DCL_OUTPUT;
4211       name = VGPU10_NAME_UNDEFINED;
4212    }
4213 
4214    emit_output_declaration(emit, type, index, name, final_mask, addSignature,
4215                            map_tgsi_semantic_to_sgn_name(semantic_name));
4216 }
4217 
4218 
4219 /**
4220  * Emit declaration for outputs in vertex shader.
4221  */
4222 static void
emit_vs_output_declarations(struct svga_shader_emitter_v10 * emit)4223 emit_vs_output_declarations(struct svga_shader_emitter_v10 *emit)
4224 {
4225    unsigned i;
4226    for (i = 0; i < emit->info.num_outputs; i++) {
4227       emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE);
4228    }
4229 }
4230 
4231 
4232 /**
4233  * A helper function to determine the writemask for an output
4234  * for the specified stream.
4235  */
4236 static unsigned
output_writemask_for_stream(unsigned stream,ubyte output_streams,ubyte output_usagemask)4237 output_writemask_for_stream(unsigned stream, ubyte output_streams,
4238                                  ubyte output_usagemask)
4239 {
4240    unsigned i;
4241    unsigned writemask = 0;
4242 
4243    for (i = 0; i < 4; i++) {
4244       if ((output_streams & 0x3) == stream)
4245          writemask |= (VGPU10_OPERAND_4_COMPONENT_MASK_X << i);
4246       output_streams >>= 2;
4247    }
4248    return writemask & output_usagemask;
4249 }
4250 
4251 
4252 /**
4253  * Emit declaration for outputs in geometry shader.
4254  */
4255 static void
emit_gs_output_declarations(struct svga_shader_emitter_v10 * emit)4256 emit_gs_output_declarations(struct svga_shader_emitter_v10 *emit)
4257 {
4258    unsigned i;
4259    VGPU10OpcodeToken0 opcode0;
4260    unsigned numStreamsSupported = 1;
4261    int s;
4262 
4263    if (emit->version >= 50) {
4264       numStreamsSupported = ARRAY_SIZE(emit->info.num_stream_output_components);
4265    }
4266 
4267    /**
4268     * Start emitting from the last stream first, so we end with
4269     * stream 0, so any of the auxiliary output declarations will
4270     * go to stream 0.
4271     */
4272    for (s = numStreamsSupported-1; s >= 0; s--) {
4273 
4274       if (emit->info.num_stream_output_components[s] == 0)
4275          continue;
4276 
4277       if (emit->version >= 50) {
4278          /* DCL_STREAM stream */
4279          begin_emit_instruction(emit);
4280          emit_opcode(emit, VGPU10_OPCODE_DCL_STREAM, FALSE);
4281          emit_stream_register(emit, s);
4282          end_emit_instruction(emit);
4283       }
4284 
4285       /* emit output primitive topology declaration */
4286       opcode0.value = 0;
4287       opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY;
4288       opcode0.primitiveTopology = emit->gs.prim_topology;
4289       emit_property_instruction(emit, opcode0, 0, 0);
4290 
4291       for (i = 0; i < emit->info.num_outputs; i++) {
4292          unsigned writemask;
4293 
4294          /* find out the writemask for this stream */
4295          writemask = output_writemask_for_stream(s, emit->info.output_streams[i],
4296                                                  emit->output_usage_mask[i]);
4297 
4298          if (writemask) {
4299             enum tgsi_semantic semantic_name =
4300                emit->info.output_semantic_name[i];
4301 
4302             /* TODO: Still need to take care of a special case where a
4303              *       single varying spans across multiple output registers.
4304              */
4305             switch(semantic_name) {
4306             case TGSI_SEMANTIC_PRIMID:
4307                emit_output_declaration(emit,
4308                                        VGPU10_OPCODE_DCL_OUTPUT_SGV, i,
4309                                        VGPU10_NAME_PRIMITIVE_ID,
4310                                        VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4311                                        FALSE,
4312                                        map_tgsi_semantic_to_sgn_name(semantic_name));
4313                break;
4314             case TGSI_SEMANTIC_LAYER:
4315                emit_output_declaration(emit,
4316                                        VGPU10_OPCODE_DCL_OUTPUT_SIV, i,
4317                                        VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX,
4318                                        VGPU10_OPERAND_4_COMPONENT_MASK_X,
4319                                        FALSE,
4320                                        map_tgsi_semantic_to_sgn_name(semantic_name));
4321                break;
4322             case TGSI_SEMANTIC_VIEWPORT_INDEX:
4323                emit_output_declaration(emit,
4324                                        VGPU10_OPCODE_DCL_OUTPUT_SIV, i,
4325                                        VGPU10_NAME_VIEWPORT_ARRAY_INDEX,
4326                                        VGPU10_OPERAND_4_COMPONENT_MASK_X,
4327                                        FALSE,
4328                                        map_tgsi_semantic_to_sgn_name(semantic_name));
4329                emit->gs.viewport_index_out_index = i;
4330                break;
4331             default:
4332                emit_vertex_output_declaration(emit, i, writemask, FALSE);
4333             }
4334          }
4335       }
4336    }
4337 
4338    /* For geometry shader outputs, it is possible the same register is
4339     * declared multiple times for different streams. So to avoid
4340     * redundant signature entries, geometry shader output signature is done
4341     * outside of the declaration.
4342     */
4343    struct svga_shader_signature *sgn = &emit->signature;
4344    SVGA3dDXShaderSignatureEntry *sgnEntry;
4345 
4346    for (i = 0; i < emit->info.num_outputs; i++) {
4347       if (emit->output_usage_mask[i]) {
4348          enum tgsi_semantic sem_name = emit->info.output_semantic_name[i];
4349 
4350          sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++];
4351          set_shader_signature_entry(sgnEntry, i,
4352                                     map_tgsi_semantic_to_sgn_name(sem_name),
4353                                     emit->output_usage_mask[i],
4354                                     SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4355                                     SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4356       }
4357    }
4358 }
4359 
4360 
4361 /**
4362  * Emit the declaration for the tess inner/outer output.
4363  * \param opcodeType either VGPU10_OPCODE_DCL_OUTPUT_SIV or _INPUT_SIV
4364  * \param operandType either VGPU10_OPERAND_TYPE_OUTPUT or _INPUT
4365  * \param name VGPU10_NAME_FINAL_*_TESSFACTOR value
4366  */
4367 static void
emit_tesslevel_declaration(struct svga_shader_emitter_v10 * emit,unsigned index,unsigned opcodeType,unsigned operandType,VGPU10_SYSTEM_NAME name,SVGA3dDXSignatureSemanticName sgnName)4368 emit_tesslevel_declaration(struct svga_shader_emitter_v10 *emit,
4369                            unsigned index, unsigned opcodeType,
4370                            unsigned operandType, VGPU10_SYSTEM_NAME name,
4371                            SVGA3dDXSignatureSemanticName sgnName)
4372 {
4373    VGPU10OpcodeToken0 opcode0;
4374    VGPU10OperandToken0 operand0;
4375    VGPU10NameToken name_token;
4376 
4377    assert(emit->version >= 50);
4378    assert(name >= VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR ||
4379           (emit->key.tcs.prim_mode == PIPE_PRIM_LINES &&
4380            name == VGPU10_NAME_UNDEFINED));
4381    assert(name <= VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
4382 
4383    assert(operandType == VGPU10_OPERAND_TYPE_OUTPUT ||
4384           operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT);
4385 
4386    opcode0.value = operand0.value = name_token.value = 0;
4387 
4388    opcode0.opcodeType = opcodeType;
4389    operand0.operandType = operandType;
4390    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
4391    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
4392    operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
4393    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
4394    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
4395 
4396    name_token.name = name;
4397    emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
4398 
4399    /* Capture patch constant signature */
4400    struct svga_shader_signature *sgn = &emit->signature;
4401    SVGA3dDXShaderSignatureEntry *sgnEntry =
4402       &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
4403    set_shader_signature_entry(sgnEntry, index,
4404                               sgnName, VGPU10_OPERAND_4_COMPONENT_MASK_X,
4405                               SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4406                               SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4407 }
4408 
4409 
4410 /**
4411  * Emit output declarations for tessellation control shader.
4412  */
4413 static void
emit_tcs_output_declarations(struct svga_shader_emitter_v10 * emit)4414 emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit)
4415 {
4416    unsigned int i;
4417    unsigned outputIndex = emit->num_outputs;
4418    struct svga_shader_signature *sgn = &emit->signature;
4419 
4420    /**
4421     * Initialize patch_generic_out_count so it won't be counted twice
4422     * since this function is called twice, one for control point phase
4423     * and another time for patch constant phase.
4424     */
4425    emit->tcs.patch_generic_out_count = 0;
4426 
4427    for (i = 0; i < emit->info.num_outputs; i++) {
4428       unsigned index = i;
4429       const enum tgsi_semantic semantic_name =
4430          emit->info.output_semantic_name[i];
4431 
4432       switch (semantic_name) {
4433       case TGSI_SEMANTIC_TESSINNER:
4434          emit->tcs.inner.tgsi_index = i;
4435 
4436          /* skip per-patch output declarations in control point phase */
4437          if (emit->tcs.control_point_phase)
4438             break;
4439 
4440          emit->tcs.inner.out_index = outputIndex;
4441          switch (emit->key.tcs.prim_mode) {
4442          case PIPE_PRIM_QUADS:
4443             emit_tesslevel_declaration(emit, outputIndex++,
4444                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4445                VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4446                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4447 
4448             emit_tesslevel_declaration(emit, outputIndex++,
4449                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4450                VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4451                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4452             break;
4453          case PIPE_PRIM_TRIANGLES:
4454             emit_tesslevel_declaration(emit, outputIndex++,
4455                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4456                VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
4457                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
4458             break;
4459          case PIPE_PRIM_LINES:
4460             break;
4461          default:
4462             debug_printf("Unsupported primitive type");
4463          }
4464          break;
4465 
4466       case TGSI_SEMANTIC_TESSOUTER:
4467          emit->tcs.outer.tgsi_index = i;
4468 
4469          /* skip per-patch output declarations in control point phase */
4470          if (emit->tcs.control_point_phase)
4471             break;
4472 
4473          emit->tcs.outer.out_index = outputIndex;
4474          switch (emit->key.tcs.prim_mode) {
4475          case PIPE_PRIM_QUADS:
4476             for (int j = 0; j < 4; j++) {
4477                emit_tesslevel_declaration(emit, outputIndex++,
4478                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4479                   VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j,
4480                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j);
4481             }
4482             break;
4483          case PIPE_PRIM_TRIANGLES:
4484             for (int j = 0; j < 3; j++) {
4485                emit_tesslevel_declaration(emit, outputIndex++,
4486                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4487                   VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j,
4488                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j);
4489             }
4490             break;
4491          case PIPE_PRIM_LINES:
4492             for (int j = 0; j < 2; j++) {
4493                emit_tesslevel_declaration(emit, outputIndex++,
4494                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4495                   VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j,
4496                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j);
4497             }
4498             break;
4499          default:
4500             debug_printf("Unsupported primitive type");
4501          }
4502          break;
4503 
4504       case TGSI_SEMANTIC_PATCH:
4505          if (emit->tcs.patch_generic_out_index == INVALID_INDEX)
4506             emit->tcs.patch_generic_out_index= i;
4507          emit->tcs.patch_generic_out_count++;
4508 
4509          /* skip per-patch output declarations in control point phase */
4510          if (emit->tcs.control_point_phase)
4511             break;
4512 
4513          emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, index,
4514                                  VGPU10_NAME_UNDEFINED,
4515                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4516                                  FALSE,
4517                                  map_tgsi_semantic_to_sgn_name(semantic_name));
4518 
4519          SVGA3dDXShaderSignatureEntry *sgnEntry =
4520             &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
4521          set_shader_signature_entry(sgnEntry, index,
4522                                     map_tgsi_semantic_to_sgn_name(semantic_name),
4523                                     VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4524                                     SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4525                                     SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4526 
4527          break;
4528 
4529       default:
4530          /* save the starting index of control point outputs */
4531          if (emit->tcs.control_point_out_index == INVALID_INDEX)
4532             emit->tcs.control_point_out_index = i;
4533          emit->tcs.control_point_out_count++;
4534 
4535          /* skip control point output declarations in patch constant phase */
4536          if (!emit->tcs.control_point_phase)
4537             break;
4538 
4539          emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i],
4540                                         TRUE);
4541 
4542       }
4543    }
4544 
4545    if (emit->tcs.control_point_phase) {
4546       /**
4547        * Add missing control point output in control point phase.
4548        */
4549       if (emit->tcs.control_point_out_index == INVALID_INDEX) {
4550          /* use register index after tessellation factors */
4551          switch (emit->key.tcs.prim_mode) {
4552          case PIPE_PRIM_QUADS:
4553             emit->tcs.control_point_out_index = outputIndex + 6;
4554             break;
4555          case PIPE_PRIM_TRIANGLES:
4556             emit->tcs.control_point_out_index = outputIndex + 4;
4557             break;
4558          default:
4559             emit->tcs.control_point_out_index = outputIndex + 2;
4560             break;
4561          }
4562          emit->tcs.control_point_out_count++;
4563          emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV,
4564                                  emit->tcs.control_point_out_index,
4565                                  VGPU10_NAME_POSITION,
4566                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4567                                  TRUE,
4568                                  SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
4569 
4570          /* If tcs does not output any control point output,
4571           * we can end the hull shader control point phase here
4572           * after emitting the default control point output.
4573           */
4574          emit->skip_instruction = TRUE;
4575       }
4576    }
4577    else {
4578       if (emit->tcs.outer.out_index == INVALID_INDEX) {
4579          /* since the TCS did not declare out outer tess level output register,
4580           * we declare it here for patch constant phase only.
4581           */
4582          emit->tcs.outer.out_index = outputIndex;
4583          if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
4584             for (int i = 0; i < 4; i++) {
4585                emit_tesslevel_declaration(emit, outputIndex++,
4586                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4587                   VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i,
4588                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i);
4589             }
4590          }
4591          else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
4592             for (int i = 0; i < 3; i++) {
4593                emit_tesslevel_declaration(emit, outputIndex++,
4594                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4595                   VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i,
4596                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i);
4597             }
4598          }
4599       }
4600 
4601       if (emit->tcs.inner.out_index == INVALID_INDEX) {
4602          /* since the TCS did not declare out inner tess level output register,
4603           * we declare it here
4604           */
4605          emit->tcs.inner.out_index = outputIndex;
4606          if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
4607             emit_tesslevel_declaration(emit, outputIndex++,
4608                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4609                VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4610                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4611             emit_tesslevel_declaration(emit, outputIndex++,
4612                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4613                VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4614                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4615          }
4616          else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
4617             emit_tesslevel_declaration(emit, outputIndex++,
4618                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4619                VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
4620                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
4621          }
4622       }
4623    }
4624    emit->num_outputs = outputIndex;
4625 }
4626 
4627 
4628 /**
4629  * Emit output declarations for tessellation evaluation shader.
4630  */
4631 static void
emit_tes_output_declarations(struct svga_shader_emitter_v10 * emit)4632 emit_tes_output_declarations(struct svga_shader_emitter_v10 *emit)
4633 {
4634    unsigned int i;
4635 
4636    for (i = 0; i < emit->info.num_outputs; i++) {
4637       emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE);
4638    }
4639 }
4640 
4641 
4642 /**
4643  * Emit the declaration for a system value input/output.
4644  */
4645 static void
emit_system_value_declaration(struct svga_shader_emitter_v10 * emit,enum tgsi_semantic semantic_name,unsigned index)4646 emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
4647                               enum tgsi_semantic semantic_name, unsigned index)
4648 {
4649    switch (semantic_name) {
4650    case TGSI_SEMANTIC_INSTANCEID:
4651       index = alloc_system_value_index(emit, index);
4652       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
4653                              VGPU10_OPERAND_TYPE_INPUT,
4654                              VGPU10_OPERAND_INDEX_1D,
4655                              index, 1,
4656                              VGPU10_NAME_INSTANCE_ID,
4657                              VGPU10_OPERAND_4_COMPONENT,
4658                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4659                              VGPU10_OPERAND_4_COMPONENT_MASK_X,
4660                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4661                              map_tgsi_semantic_to_sgn_name(semantic_name));
4662       break;
4663    case TGSI_SEMANTIC_VERTEXID:
4664       emit->vs.vertex_id_sys_index = index;
4665       index = alloc_system_value_index(emit, index);
4666       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
4667                              VGPU10_OPERAND_TYPE_INPUT,
4668                              VGPU10_OPERAND_INDEX_1D,
4669                              index, 1,
4670                              VGPU10_NAME_VERTEX_ID,
4671                              VGPU10_OPERAND_4_COMPONENT,
4672                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4673                              VGPU10_OPERAND_4_COMPONENT_MASK_X,
4674                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4675                              map_tgsi_semantic_to_sgn_name(semantic_name));
4676       break;
4677    case TGSI_SEMANTIC_SAMPLEID:
4678       assert(emit->unit == PIPE_SHADER_FRAGMENT);
4679       emit->fs.sample_id_sys_index = index;
4680       index = alloc_system_value_index(emit, index);
4681       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_PS_SIV,
4682                              VGPU10_OPERAND_TYPE_INPUT,
4683                              VGPU10_OPERAND_INDEX_1D,
4684                              index, 1,
4685                              VGPU10_NAME_SAMPLE_INDEX,
4686                              VGPU10_OPERAND_4_COMPONENT,
4687                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4688                              VGPU10_OPERAND_4_COMPONENT_MASK_X,
4689                              VGPU10_INTERPOLATION_CONSTANT, TRUE,
4690                              map_tgsi_semantic_to_sgn_name(semantic_name));
4691       break;
4692    case TGSI_SEMANTIC_SAMPLEPOS:
4693       /* This system value contains the position of the current sample
4694        * when using per-sample shading.  We implement this by calling
4695        * the VGPU10_OPCODE_SAMPLE_POS instruction with the current sample
4696        * index as the argument.  See emit_sample_position_instructions().
4697        */
4698       assert(emit->version >= 41);
4699       emit->fs.sample_pos_sys_index = index;
4700       index = alloc_system_value_index(emit, index);
4701       break;
4702    case TGSI_SEMANTIC_INVOCATIONID:
4703       /* Note: invocation id input is mapped to different register depending
4704        * on the shader type. In GS, it will be mapped to vGSInstanceID#.
4705        * In TCS, it will be mapped to vOutputControlPointID#.
4706        * Since in both cases, the mapped name is unique rather than
4707        * just a generic input name ("v#"), so there is no need to remap
4708        * the index value.
4709        */
4710       assert(emit->unit == PIPE_SHADER_GEOMETRY ||
4711              emit->unit == PIPE_SHADER_TESS_CTRL);
4712       assert(emit->version >= 50);
4713 
4714       if (emit->unit == PIPE_SHADER_GEOMETRY) {
4715          emit->gs.invocation_id_sys_index = index;
4716          emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4717                                 VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID,
4718                                 VGPU10_OPERAND_INDEX_0D,
4719                                 index, 1,
4720                                 VGPU10_NAME_UNDEFINED,
4721                                 VGPU10_OPERAND_0_COMPONENT,
4722                                 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4723                                 0,
4724                                 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4725                                 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4726       } else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4727          /* The emission of the control point id will be done
4728           * in the control point phase in emit_hull_shader_control_point_phase().
4729           */
4730          emit->tcs.invocation_id_sys_index = index;
4731       }
4732       break;
4733    case TGSI_SEMANTIC_SAMPLEMASK:
4734       /* Note: the PS sample mask input has a unique name ("vCoverage#")
4735        * rather than just a generic input name ("v#") so no need to remap the
4736        * index value.
4737        */
4738       assert(emit->unit == PIPE_SHADER_FRAGMENT);
4739       assert(emit->version >= 50);
4740       emit->fs.sample_mask_in_sys_index = index;
4741       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4742                              VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK,
4743                              VGPU10_OPERAND_INDEX_0D,
4744                              index, 1,
4745                              VGPU10_NAME_UNDEFINED,
4746                              VGPU10_OPERAND_1_COMPONENT,
4747                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4748                              0,
4749                              VGPU10_INTERPOLATION_CONSTANT, TRUE,
4750                              SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4751       break;
4752    case TGSI_SEMANTIC_TESSCOORD:
4753       assert(emit->version >= 50);
4754 
4755       unsigned usageMask = 0;
4756 
4757       if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) {
4758          usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XYZ;
4759       }
4760       else if (emit->tes.prim_mode == PIPE_PRIM_LINES ||
4761                emit->tes.prim_mode == PIPE_PRIM_QUADS) {
4762          usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XY;
4763       }
4764 
4765       emit->tes.tesscoord_sys_index = index;
4766       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4767                              VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT,
4768                              VGPU10_OPERAND_INDEX_0D,
4769                              index, 1,
4770                              VGPU10_NAME_UNDEFINED,
4771                              VGPU10_OPERAND_4_COMPONENT,
4772                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4773                              usageMask,
4774                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4775                              SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4776       break;
4777    case TGSI_SEMANTIC_TESSINNER:
4778       assert(emit->version >= 50);
4779       emit->tes.inner.tgsi_index = index;
4780       break;
4781    case TGSI_SEMANTIC_TESSOUTER:
4782       assert(emit->version >= 50);
4783       emit->tes.outer.tgsi_index = index;
4784       break;
4785    case TGSI_SEMANTIC_VERTICESIN:
4786       assert(emit->unit == PIPE_SHADER_TESS_CTRL);
4787       assert(emit->version >= 50);
4788 
4789       /* save the system value index */
4790       emit->tcs.vertices_per_patch_index = index;
4791       break;
4792    case TGSI_SEMANTIC_PRIMID:
4793       assert(emit->version >= 50);
4794       if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4795          emit->tcs.prim_id_index = index;
4796       }
4797       else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
4798          emit->tes.prim_id_index = index;
4799          emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4800                                 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
4801                                 VGPU10_OPERAND_INDEX_0D,
4802                                 index, 1,
4803                                 VGPU10_NAME_UNDEFINED,
4804                                 VGPU10_OPERAND_0_COMPONENT,
4805                                 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4806                                 0,
4807                                 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4808                                 map_tgsi_semantic_to_sgn_name(semantic_name));
4809       }
4810       break;
4811    case TGSI_SEMANTIC_THREAD_ID:
4812       assert(emit->unit >= PIPE_SHADER_COMPUTE);
4813       assert(emit->version >= 50);
4814       emit->cs.thread_id_index = index;
4815       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4816                              VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP,
4817                              VGPU10_OPERAND_INDEX_0D,
4818                              index, 1,
4819                              VGPU10_NAME_UNDEFINED,
4820                              VGPU10_OPERAND_4_COMPONENT,
4821                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4822                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4823                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4824                              map_tgsi_semantic_to_sgn_name(semantic_name));
4825       break;
4826    case TGSI_SEMANTIC_BLOCK_ID:
4827       assert(emit->unit >= PIPE_SHADER_COMPUTE);
4828       assert(emit->version >= 50);
4829       emit->cs.block_id_index = index;
4830       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4831                              VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID,
4832                              VGPU10_OPERAND_INDEX_0D,
4833                              index, 1,
4834                              VGPU10_NAME_UNDEFINED,
4835                              VGPU10_OPERAND_4_COMPONENT,
4836                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4837                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4838                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4839                              map_tgsi_semantic_to_sgn_name(semantic_name));
4840       break;
4841    case TGSI_SEMANTIC_GRID_SIZE:
4842       assert(emit->unit == PIPE_SHADER_COMPUTE);
4843       assert(emit->version >= 50);
4844       emit->cs.grid_size.tgsi_index = index;
4845       break;
4846    default:
4847       debug_printf("unexpected system value semantic index %u / %s\n",
4848                    semantic_name, tgsi_semantic_names[semantic_name]);
4849    }
4850 }
4851 
4852 /**
4853  * Translate a TGSI declaration to VGPU10.
4854  */
4855 static boolean
emit_vgpu10_declaration(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_declaration * decl)4856 emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
4857                         const struct tgsi_full_declaration *decl)
4858 {
4859    switch (decl->Declaration.File) {
4860    case TGSI_FILE_INPUT:
4861       /* do nothing - see emit_input_declarations() */
4862       return TRUE;
4863 
4864    case TGSI_FILE_OUTPUT:
4865       assert(decl->Range.First == decl->Range.Last);
4866       emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask;
4867       return TRUE;
4868 
4869    case TGSI_FILE_TEMPORARY:
4870       /* Don't declare the temps here.  Just keep track of how many
4871        * and emit the declaration later.
4872        */
4873       if (decl->Declaration.Array) {
4874          /* Indexed temporary array.  Save the start index of the array
4875           * and the size of the array.
4876           */
4877          const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS);
4878          assert(arrayID < ARRAY_SIZE(emit->temp_arrays));
4879 
4880          /* Save this array so we can emit the declaration for it later */
4881          create_temp_array(emit, arrayID, decl->Range.First,
4882                            decl->Range.Last - decl->Range.First + 1,
4883                            decl->Range.First);
4884       }
4885 
4886       /* for all temps, indexed or not, keep track of highest index */
4887       emit->num_shader_temps = MAX2(emit->num_shader_temps,
4888                                     decl->Range.Last + 1);
4889       return TRUE;
4890 
4891    case TGSI_FILE_CONSTANT:
4892       /* Don't declare constants here.  Just keep track and emit later. */
4893       {
4894          unsigned constbuf = 0, num_consts;
4895          if (decl->Declaration.Dimension) {
4896             constbuf = decl->Dim.Index2D;
4897          }
4898          /* We throw an assertion here when, in fact, the shader should never
4899           * have linked due to constbuf index out of bounds, so we shouldn't
4900           * have reached here.
4901           */
4902          assert(constbuf < ARRAY_SIZE(emit->num_shader_consts));
4903 
4904          num_consts = MAX2(emit->num_shader_consts[constbuf],
4905                            decl->Range.Last + 1);
4906 
4907          if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
4908             debug_printf("Warning: constant buffer is declared to size [%u]"
4909                          " but [%u] is the limit.\n",
4910                          num_consts,
4911                          VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
4912             emit->register_overflow = TRUE;
4913          }
4914          /* The linker doesn't enforce the max UBO size so we clamp here */
4915          emit->num_shader_consts[constbuf] =
4916             MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
4917       }
4918       return TRUE;
4919 
4920    case TGSI_FILE_IMMEDIATE:
4921       assert(!"TGSI_FILE_IMMEDIATE not handled yet!");
4922       return FALSE;
4923 
4924    case TGSI_FILE_SYSTEM_VALUE:
4925       emit_system_value_declaration(emit, decl->Semantic.Name,
4926                                     decl->Range.First);
4927       return TRUE;
4928 
4929    case TGSI_FILE_SAMPLER:
4930       /* Don't declare samplers here.  Just keep track and emit later. */
4931       emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
4932       return TRUE;
4933 
4934 #if 0
4935    case TGSI_FILE_RESOURCE:
4936       /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/
4937       /* XXX more, VGPU10_RETURN_TYPE_FLOAT */
4938       assert(!"TGSI_FILE_RESOURCE not handled yet");
4939       return FALSE;
4940 #endif
4941 
4942    case TGSI_FILE_ADDRESS:
4943       emit->num_address_regs = MAX2(emit->num_address_regs,
4944                                     decl->Range.Last + 1);
4945       return TRUE;
4946 
4947    case TGSI_FILE_SAMPLER_VIEW:
4948       {
4949          unsigned unit = decl->Range.First;
4950          assert(decl->Range.First == decl->Range.Last);
4951          emit->sampler_target[unit] = decl->SamplerView.Resource;
4952 
4953          /* Note: we can ignore YZW return types for now */
4954          emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX;
4955          emit->sampler_view[unit] = TRUE;
4956       }
4957       return TRUE;
4958 
4959    case TGSI_FILE_IMAGE:
4960       {
4961          unsigned unit = decl->Range.First;
4962          assert(decl->Range.First == decl->Range.Last);
4963          assert(unit < PIPE_MAX_SHADER_IMAGES);
4964          emit->image[unit] = decl->Image;
4965          emit->image_mask |= 1 << unit;
4966          emit->num_images++;
4967       }
4968       return TRUE;
4969 
4970    case TGSI_FILE_HW_ATOMIC:
4971       /* Declare the atomic buffer if it is not already declared. */
4972       if (!(emit->atomic_bufs_mask & (1 << decl->Dim.Index2D))) {
4973          emit->num_atomic_bufs++;
4974          emit->atomic_bufs_mask |= (1 << decl->Dim.Index2D);
4975       }
4976 
4977       /* Remember the maximum atomic counter index encountered */
4978       emit->max_atomic_counter_index =
4979          MAX2(emit->max_atomic_counter_index, decl->Range.Last);
4980       return TRUE;
4981 
4982    case TGSI_FILE_MEMORY:
4983       /* Record memory has been used. */
4984       if (emit->unit == PIPE_SHADER_COMPUTE &&
4985           decl->Declaration.MemType == TGSI_MEMORY_TYPE_SHARED) {
4986          emit->cs.shared_memory_declared = TRUE;
4987       }
4988 
4989       return TRUE;
4990 
4991    case TGSI_FILE_BUFFER:
4992       assert(emit->version >= 50);
4993       emit->num_shader_bufs++;
4994       return TRUE;
4995 
4996    default:
4997       assert(!"Unexpected type of declaration");
4998       return FALSE;
4999    }
5000 }
5001 
5002 
5003 /**
5004  * Emit input declarations for fragment shader.
5005  */
5006 static void
emit_fs_input_declarations(struct svga_shader_emitter_v10 * emit)5007 emit_fs_input_declarations(struct svga_shader_emitter_v10 *emit)
5008 {
5009    unsigned i;
5010 
5011    for (i = 0; i < emit->linkage.num_inputs; i++) {
5012       enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
5013       unsigned usage_mask = emit->info.input_usage_mask[i];
5014       unsigned index = emit->linkage.input_map[i];
5015       unsigned type, interpolationMode, name;
5016       unsigned mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
5017 
5018       if (usage_mask == 0)
5019          continue;  /* register is not actually used */
5020 
5021       if (semantic_name == TGSI_SEMANTIC_POSITION) {
5022          /* fragment position input */
5023          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5024          interpolationMode = VGPU10_INTERPOLATION_LINEAR;
5025          name = VGPU10_NAME_POSITION;
5026          if (usage_mask & TGSI_WRITEMASK_W) {
5027             /* we need to replace use of 'w' with '1/w' */
5028             emit->fs.fragcoord_input_index = i;
5029          }
5030       }
5031       else if (semantic_name == TGSI_SEMANTIC_FACE) {
5032          /* fragment front-facing input */
5033          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5034          interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5035          name = VGPU10_NAME_IS_FRONT_FACE;
5036          emit->fs.face_input_index = i;
5037       }
5038       else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
5039          /* primitive ID */
5040          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5041          interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5042          name = VGPU10_NAME_PRIMITIVE_ID;
5043       }
5044       else if (semantic_name == TGSI_SEMANTIC_SAMPLEID) {
5045          /* sample index / ID */
5046          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5047          interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5048          name = VGPU10_NAME_SAMPLE_INDEX;
5049       }
5050       else if (semantic_name == TGSI_SEMANTIC_LAYER) {
5051          /* render target array index */
5052          if (emit->key.fs.layer_to_zero) {
5053             /**
5054              * The shader from the previous stage does not write to layer,
5055              * so reading the layer index in fragment shader should return 0.
5056              */
5057             emit->fs.layer_input_index = i;
5058             continue;
5059          } else {
5060             type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5061             interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5062             name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX;
5063             mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
5064          }
5065       }
5066       else if (semantic_name == TGSI_SEMANTIC_VIEWPORT_INDEX) {
5067          /* viewport index */
5068          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
5069          interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
5070          name = VGPU10_NAME_VIEWPORT_ARRAY_INDEX;
5071          mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
5072       }
5073       else {
5074          /* general fragment input */
5075          type = VGPU10_OPCODE_DCL_INPUT_PS;
5076          interpolationMode =
5077                translate_interpolation(emit,
5078                                        emit->info.input_interpolate[i],
5079                                        emit->info.input_interpolate_loc[i]);
5080 
5081          /* keeps track if flat interpolation mode is being used */
5082          emit->uses_flat_interp = emit->uses_flat_interp ||
5083                (interpolationMode == VGPU10_INTERPOLATION_CONSTANT);
5084 
5085          name = VGPU10_NAME_UNDEFINED;
5086       }
5087 
5088       emit_input_declaration(emit, type,
5089                              VGPU10_OPERAND_TYPE_INPUT,
5090                              VGPU10_OPERAND_INDEX_1D, index, 1,
5091                              name,
5092                              VGPU10_OPERAND_4_COMPONENT,
5093                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5094                              mask,
5095                              interpolationMode, TRUE,
5096                              map_tgsi_semantic_to_sgn_name(semantic_name));
5097    }
5098 }
5099 
5100 
5101 /**
5102  * Emit input declarations for vertex shader.
5103  */
5104 static void
emit_vs_input_declarations(struct svga_shader_emitter_v10 * emit)5105 emit_vs_input_declarations(struct svga_shader_emitter_v10 *emit)
5106 {
5107    unsigned i;
5108 
5109    for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) {
5110       unsigned usage_mask = emit->info.input_usage_mask[i];
5111       unsigned index = i;
5112 
5113       if (usage_mask == 0)
5114          continue;  /* register is not actually used */
5115 
5116       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5117                              VGPU10_OPERAND_TYPE_INPUT,
5118                              VGPU10_OPERAND_INDEX_1D, index, 1,
5119                              VGPU10_NAME_UNDEFINED,
5120                              VGPU10_OPERAND_4_COMPONENT,
5121                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5122                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5123                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
5124                              SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5125    }
5126 }
5127 
5128 
5129 /**
5130  * Emit input declarations for geometry shader.
5131  */
5132 static void
emit_gs_input_declarations(struct svga_shader_emitter_v10 * emit)5133 emit_gs_input_declarations(struct svga_shader_emitter_v10 *emit)
5134 {
5135    unsigned i;
5136 
5137    for (i = 0; i < emit->info.num_inputs; i++) {
5138       enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
5139       unsigned usage_mask = emit->info.input_usage_mask[i];
5140       unsigned index = emit->linkage.input_map[i];
5141       unsigned opcodeType, operandType;
5142       unsigned numComp, selMode;
5143       unsigned name;
5144       unsigned dim;
5145 
5146       if (usage_mask == 0)
5147          continue;  /* register is not actually used */
5148 
5149       opcodeType = VGPU10_OPCODE_DCL_INPUT;
5150       operandType = VGPU10_OPERAND_TYPE_INPUT;
5151       numComp = VGPU10_OPERAND_4_COMPONENT;
5152       selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
5153       name = VGPU10_NAME_UNDEFINED;
5154 
5155       /* all geometry shader inputs are two dimensional except
5156        * gl_PrimitiveID
5157        */
5158       dim = VGPU10_OPERAND_INDEX_2D;
5159 
5160       if (semantic_name == TGSI_SEMANTIC_PRIMID) {
5161          /* Primitive ID */
5162          operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
5163          dim = VGPU10_OPERAND_INDEX_0D;
5164          numComp = VGPU10_OPERAND_0_COMPONENT;
5165          selMode = 0;
5166 
5167          /* also save the register index so we can check for
5168           * primitive id when emit src register. We need to modify the
5169           * operand type, index dimension when emit primitive id src reg.
5170           */
5171           emit->gs.prim_id_index = i;
5172       }
5173       else if (semantic_name == TGSI_SEMANTIC_POSITION) {
5174          /* vertex position input */
5175          opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV;
5176          name = VGPU10_NAME_POSITION;
5177       }
5178 
5179       emit_input_declaration(emit, opcodeType, operandType,
5180                              dim, index,
5181                              emit->gs.input_size,
5182                              name,
5183                              numComp, selMode,
5184                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5185                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
5186                              map_tgsi_semantic_to_sgn_name(semantic_name));
5187    }
5188 }
5189 
5190 
5191 /**
5192  * Emit input declarations for tessellation control shader.
5193  */
5194 static void
emit_tcs_input_declarations(struct svga_shader_emitter_v10 * emit)5195 emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit)
5196 {
5197    unsigned i;
5198    unsigned size = emit->key.tcs.vertices_per_patch;
5199    unsigned indicesMask = 0;
5200    boolean addSignature = TRUE;
5201 
5202    if (!emit->tcs.control_point_phase)
5203       addSignature = emit->tcs.fork_phase_add_signature;
5204 
5205    for (i = 0; i < emit->info.num_inputs; i++) {
5206       unsigned usage_mask = emit->info.input_usage_mask[i];
5207       unsigned index = emit->linkage.input_map[i];
5208       enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
5209       VGPU10_SYSTEM_NAME name = VGPU10_NAME_UNDEFINED;
5210       VGPU10_OPERAND_TYPE operandType = VGPU10_OPERAND_TYPE_INPUT;
5211       SVGA3dDXSignatureSemanticName sgn_name =
5212          map_tgsi_semantic_to_sgn_name(semantic_name);
5213 
5214       /* indices that are declared */
5215       indicesMask |= 1 << index;
5216 
5217       if (semantic_name == TGSI_SEMANTIC_POSITION ||
5218           index == emit->linkage.position_index) {
5219          /* save the input control point index for later use */
5220          emit->tcs.control_point_input_index = i;
5221       }
5222       else if (usage_mask == 0) {
5223          continue;  /* register is not actually used */
5224       }
5225       else if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
5226          /* The shadow copy is being used here. So set the signature name
5227           * to UNDEFINED.
5228           */
5229          sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
5230       }
5231 
5232       /* input control points in the patch constant phase are emitted in the
5233        * vicp register rather than the v register.
5234        */
5235       if (!emit->tcs.control_point_phase) {
5236          operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
5237       }
5238 
5239       /* Tessellation control shader inputs are two dimensional.
5240        * The array size is determined by the patch vertex count.
5241        */
5242       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5243                              operandType,
5244                              VGPU10_OPERAND_INDEX_2D,
5245                              index, size, name,
5246                              VGPU10_OPERAND_4_COMPONENT,
5247                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5248                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5249                              VGPU10_INTERPOLATION_UNDEFINED,
5250                              addSignature, sgn_name);
5251    }
5252 
5253    if (emit->tcs.control_point_phase) {
5254 
5255       /* Also add an address register for the indirection to the
5256        * input control points
5257        */
5258       emit->tcs.control_point_addr_index = emit->num_address_regs++;
5259    }
5260 }
5261 
5262 
5263 static void
emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 * emit)5264 emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 *emit)
5265 {
5266 
5267    /* In tcs, tess factors are emitted as extra outputs.
5268     * The starting register index for the tess factors is captured
5269     * in the compile key.
5270     */
5271    unsigned inputIndex = emit->key.tes.tessfactor_index;
5272 
5273    if (emit->tes.prim_mode == PIPE_PRIM_QUADS) {
5274       if (emit->key.tes.need_tessouter) {
5275          emit->tes.outer.in_index = inputIndex;
5276          for (int i = 0; i < 4; i++) {
5277             emit_tesslevel_declaration(emit, inputIndex++,
5278                VGPU10_OPCODE_DCL_INPUT_SIV,
5279                VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5280                VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i,
5281                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i);
5282          }
5283       }
5284 
5285       if (emit->key.tes.need_tessinner) {
5286          emit->tes.inner.in_index = inputIndex;
5287          emit_tesslevel_declaration(emit, inputIndex++,
5288             VGPU10_OPCODE_DCL_INPUT_SIV,
5289             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5290             VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
5291             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
5292 
5293          emit_tesslevel_declaration(emit, inputIndex++,
5294             VGPU10_OPCODE_DCL_INPUT_SIV,
5295             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5296             VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
5297             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
5298       }
5299    }
5300    else if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) {
5301       if (emit->key.tes.need_tessouter) {
5302          emit->tes.outer.in_index = inputIndex;
5303          for (int i = 0; i < 3; i++) {
5304             emit_tesslevel_declaration(emit, inputIndex++,
5305                VGPU10_OPCODE_DCL_INPUT_SIV,
5306                VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5307                VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i,
5308                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i);
5309          }
5310       }
5311 
5312       if (emit->key.tes.need_tessinner) {
5313          emit->tes.inner.in_index = inputIndex;
5314          emit_tesslevel_declaration(emit, inputIndex++,
5315             VGPU10_OPCODE_DCL_INPUT_SIV,
5316             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5317             VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
5318             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
5319       }
5320    }
5321    else if (emit->tes.prim_mode == PIPE_PRIM_LINES) {
5322       if (emit->key.tes.need_tessouter) {
5323          emit->tes.outer.in_index = inputIndex;
5324          emit_tesslevel_declaration(emit, inputIndex++,
5325             VGPU10_OPCODE_DCL_INPUT_SIV,
5326             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5327             VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR,
5328             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR);
5329 
5330          emit_tesslevel_declaration(emit, inputIndex++,
5331             VGPU10_OPCODE_DCL_INPUT_SIV,
5332             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5333             VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR,
5334             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
5335       }
5336    }
5337 }
5338 
5339 
5340 /**
5341  * Emit input declarations for tessellation evaluation shader.
5342  */
5343 static void
emit_tes_input_declarations(struct svga_shader_emitter_v10 * emit)5344 emit_tes_input_declarations(struct svga_shader_emitter_v10 *emit)
5345 {
5346    unsigned i;
5347 
5348    for (i = 0; i < emit->info.num_inputs; i++) {
5349       unsigned usage_mask = emit->info.input_usage_mask[i];
5350       unsigned index = emit->linkage.input_map[i];
5351       unsigned size;
5352       const enum tgsi_semantic semantic_name =
5353          emit->info.input_semantic_name[i];
5354       SVGA3dDXSignatureSemanticName sgn_name;
5355       VGPU10_OPERAND_TYPE operandType;
5356       VGPU10_OPERAND_INDEX_DIMENSION dim;
5357 
5358       if (usage_mask == 0)
5359          usage_mask = 1;  /* at least set usage mask to one */
5360 
5361       if (semantic_name == TGSI_SEMANTIC_PATCH) {
5362          operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT;
5363          dim = VGPU10_OPERAND_INDEX_1D;
5364          size = 1;
5365          sgn_name = map_tgsi_semantic_to_sgn_name(semantic_name);
5366       }
5367       else {
5368          operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
5369          dim = VGPU10_OPERAND_INDEX_2D;
5370          size = emit->key.tes.vertices_per_patch;
5371          sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
5372       }
5373 
5374       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, operandType,
5375                              dim, index, size, VGPU10_NAME_UNDEFINED,
5376                              VGPU10_OPERAND_4_COMPONENT,
5377                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5378                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5379                              VGPU10_INTERPOLATION_UNDEFINED,
5380                              TRUE, sgn_name);
5381    }
5382 
5383    emit_tessfactor_input_declarations(emit);
5384 
5385    /* DX spec requires DS input controlpoint/patch-constant signatures to match
5386     * the HS output controlpoint/patch-constant signatures exactly.
5387     * Add missing input declarations even if they are not used in the shader.
5388     */
5389    if (emit->linkage.num_inputs < emit->linkage.prevShader.num_outputs) {
5390       struct tgsi_shader_info *prevInfo = emit->prevShaderInfo;
5391       for (i = 0; i < emit->linkage.prevShader.num_outputs; i++) {
5392 
5393           /* If a tcs output does not have a corresponding input register in
5394            * tes, add one.
5395            */
5396           if (emit->linkage.prevShader.output_map[i] >
5397               emit->linkage.input_map_max) {
5398              const enum tgsi_semantic sem_name = prevInfo->output_semantic_name[i];
5399 
5400              if (sem_name == TGSI_SEMANTIC_PATCH) {
5401                 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5402                                        VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5403                                        VGPU10_OPERAND_INDEX_1D,
5404                                        i, 1, VGPU10_NAME_UNDEFINED,
5405                                        VGPU10_OPERAND_4_COMPONENT,
5406                                        VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5407                                        VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5408                                        VGPU10_INTERPOLATION_UNDEFINED,
5409                                        TRUE,
5410                                        map_tgsi_semantic_to_sgn_name(sem_name));
5411 
5412              } else if (sem_name != TGSI_SEMANTIC_TESSINNER &&
5413                         sem_name != TGSI_SEMANTIC_TESSOUTER) {
5414                 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5415                                        VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT,
5416                                        VGPU10_OPERAND_INDEX_2D,
5417                                        i, emit->key.tes.vertices_per_patch,
5418                                        VGPU10_NAME_UNDEFINED,
5419                                        VGPU10_OPERAND_4_COMPONENT,
5420                                        VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5421                                        VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5422                                        VGPU10_INTERPOLATION_UNDEFINED,
5423                                        TRUE,
5424                                        map_tgsi_semantic_to_sgn_name(sem_name));
5425              }
5426              /* tessellation factors are taken care of in
5427               * emit_tessfactor_input_declarations().
5428               */
5429          }
5430       }
5431    }
5432 }
5433 
5434 
5435 /**
5436  * Emit all input declarations.
5437  */
5438 static boolean
emit_input_declarations(struct svga_shader_emitter_v10 * emit)5439 emit_input_declarations(struct svga_shader_emitter_v10 *emit)
5440 {
5441    emit->index_range.required =
5442       emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE;
5443 
5444    switch (emit->unit) {
5445    case PIPE_SHADER_FRAGMENT:
5446       emit_fs_input_declarations(emit);
5447       break;
5448    case PIPE_SHADER_GEOMETRY:
5449       emit_gs_input_declarations(emit);
5450       break;
5451    case PIPE_SHADER_VERTEX:
5452       emit_vs_input_declarations(emit);
5453       break;
5454    case PIPE_SHADER_TESS_CTRL:
5455       emit_tcs_input_declarations(emit);
5456       break;
5457    case PIPE_SHADER_TESS_EVAL:
5458       emit_tes_input_declarations(emit);
5459       break;
5460    case PIPE_SHADER_COMPUTE:
5461       //XXX emit_cs_input_declarations(emit);
5462       break;
5463    default:
5464       assert(0);
5465    }
5466 
5467    if (emit->index_range.start_index != INVALID_INDEX) {
5468       emit_index_range_declaration(emit);
5469    }
5470    emit->index_range.required = FALSE;
5471    return TRUE;
5472 }
5473 
5474 
5475 /**
5476  * Emit all output declarations.
5477  */
5478 static boolean
emit_output_declarations(struct svga_shader_emitter_v10 * emit)5479 emit_output_declarations(struct svga_shader_emitter_v10 *emit)
5480 {
5481    emit->index_range.required =
5482       emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE;
5483 
5484    switch (emit->unit) {
5485    case PIPE_SHADER_FRAGMENT:
5486       emit_fs_output_declarations(emit);
5487       break;
5488    case PIPE_SHADER_GEOMETRY:
5489       emit_gs_output_declarations(emit);
5490       break;
5491    case PIPE_SHADER_VERTEX:
5492       emit_vs_output_declarations(emit);
5493       break;
5494    case PIPE_SHADER_TESS_CTRL:
5495       emit_tcs_output_declarations(emit);
5496       break;
5497    case PIPE_SHADER_TESS_EVAL:
5498       emit_tes_output_declarations(emit);
5499       break;
5500    case PIPE_SHADER_COMPUTE:
5501       //XXX emit_cs_output_declarations(emit);
5502       break;
5503    default:
5504       assert(0);
5505    }
5506 
5507    if (emit->vposition.so_index != INVALID_INDEX &&
5508        emit->vposition.out_index != INVALID_INDEX) {
5509 
5510       assert(emit->unit != PIPE_SHADER_FRAGMENT);
5511 
5512       /* Emit the declaration for the non-adjusted vertex position
5513        * for stream output purpose
5514        */
5515       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5516                               emit->vposition.so_index,
5517                               VGPU10_NAME_UNDEFINED,
5518                               VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5519                               TRUE,
5520                               SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
5521    }
5522 
5523    if (emit->clip_dist_so_index != INVALID_INDEX &&
5524        emit->clip_dist_out_index != INVALID_INDEX) {
5525 
5526       assert(emit->unit != PIPE_SHADER_FRAGMENT);
5527 
5528       /* Emit the declaration for the clip distance shadow copy which
5529        * will be used for stream output purpose and for clip distance
5530        * varying variable. Note all clip distances
5531        * will be written regardless of the enabled clipping planes.
5532        */
5533       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5534                               emit->clip_dist_so_index,
5535                               VGPU10_NAME_UNDEFINED,
5536                               VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5537                               TRUE,
5538                               SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5539 
5540       if (emit->info.num_written_clipdistance > 4) {
5541          /* for the second clip distance register, each handles 4 planes */
5542          emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5543                                  emit->clip_dist_so_index + 1,
5544                                  VGPU10_NAME_UNDEFINED,
5545                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5546                                  TRUE,
5547                                  SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5548       }
5549    }
5550 
5551    if (emit->index_range.start_index != INVALID_INDEX) {
5552       emit_index_range_declaration(emit);
5553    }
5554    emit->index_range.required = FALSE;
5555    return TRUE;
5556 }
5557 
5558 
5559 /**
5560  * A helper function to create a temporary indexable array
5561  * and initialize the corresponding entries in the temp_map array.
5562  */
5563 static void
create_temp_array(struct svga_shader_emitter_v10 * emit,unsigned arrayID,unsigned first,unsigned count,unsigned startIndex)5564 create_temp_array(struct svga_shader_emitter_v10 *emit,
5565                   unsigned arrayID, unsigned first, unsigned count,
5566                   unsigned startIndex)
5567 {
5568    unsigned i, tempIndex = startIndex;
5569 
5570    emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1);
5571    assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS);
5572    emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS);
5573 
5574    emit->temp_arrays[arrayID].start = first;
5575    emit->temp_arrays[arrayID].size = count;
5576 
5577    /* Fill in the temp_map entries for this temp array */
5578    for (i = 0; i < count; i++, tempIndex++) {
5579       emit->temp_map[tempIndex].arrayId = arrayID;
5580       emit->temp_map[tempIndex].index = i;
5581    }
5582 }
5583 
5584 
5585 /**
5586  * Emit the declaration for the temporary registers.
5587  */
5588 static boolean
emit_temporaries_declaration(struct svga_shader_emitter_v10 * emit)5589 emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
5590 {
5591    unsigned total_temps, reg, i;
5592 
5593    total_temps = emit->num_shader_temps;
5594 
5595    /* If there is indirect access to non-indexable temps in the shader,
5596     * convert those temps to indexable temps. This works around a bug
5597     * in the GLSL->TGSI translator exposed in piglit test
5598     * glsl-1.20/execution/fs-const-array-of-struct-of-array.shader_test.
5599     * Internal temps added by the driver remain as non-indexable temps.
5600     */
5601    if ((emit->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) &&
5602        emit->num_temp_arrays == 0) {
5603       create_temp_array(emit, 1, 0, total_temps, 0);
5604    }
5605 
5606    /* Allocate extra temps for specially-implemented instructions,
5607     * such as LIT.
5608     */
5609    total_temps += MAX_INTERNAL_TEMPS;
5610 
5611    /* Allocate extra temps for clip distance or clip vertex.
5612     */
5613    if (emit->clip_mode == CLIP_DISTANCE) {
5614       /* We need to write the clip distance to a temporary register
5615        * first. Then it will be copied to the shadow copy for
5616        * the clip distance varying variable and stream output purpose.
5617        * It will also be copied to the actual CLIPDIST register
5618        * according to the enabled clip planes
5619        */
5620       emit->clip_dist_tmp_index = total_temps++;
5621       if (emit->info.num_written_clipdistance > 4)
5622          total_temps++; /* second clip register */
5623    }
5624    else if (emit->clip_mode == CLIP_VERTEX && emit->key.last_vertex_stage) {
5625       /* If the current shader is in the last vertex processing stage,
5626        * We need to convert the TGSI CLIPVERTEX output to one or more
5627        * clip distances.  Allocate a temp reg for the clipvertex here.
5628        */
5629       assert(emit->info.writes_clipvertex > 0);
5630       emit->clip_vertex_tmp_index = total_temps;
5631       total_temps++;
5632    }
5633 
5634    if (emit->info.uses_vertexid) {
5635       assert(emit->unit == PIPE_SHADER_VERTEX);
5636       emit->vs.vertex_id_tmp_index = total_temps++;
5637    }
5638 
5639    if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) {
5640       if (emit->vposition.need_prescale || emit->key.vs.undo_viewport ||
5641           emit->key.clip_plane_enable ||
5642           emit->vposition.so_index != INVALID_INDEX) {
5643          emit->vposition.tmp_index = total_temps;
5644          total_temps += 1;
5645       }
5646 
5647       if (emit->vposition.need_prescale) {
5648          emit->vposition.prescale_scale_index = total_temps++;
5649          emit->vposition.prescale_trans_index = total_temps++;
5650       }
5651 
5652       if (emit->unit == PIPE_SHADER_VERTEX) {
5653          unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 |
5654                                  emit->key.vs.adjust_attrib_itof |
5655                                  emit->key.vs.adjust_attrib_utof |
5656                                  emit->key.vs.attrib_is_bgra |
5657                                  emit->key.vs.attrib_puint_to_snorm |
5658                                  emit->key.vs.attrib_puint_to_uscaled |
5659                                  emit->key.vs.attrib_puint_to_sscaled);
5660          while (attrib_mask) {
5661             unsigned index = u_bit_scan(&attrib_mask);
5662             emit->vs.adjusted_input[index] = total_temps++;
5663          }
5664       }
5665       else if (emit->unit == PIPE_SHADER_GEOMETRY) {
5666          if (emit->key.gs.writes_viewport_index)
5667             emit->gs.viewport_index_tmp_index = total_temps++;
5668       }
5669    }
5670    else if (emit->unit == PIPE_SHADER_FRAGMENT) {
5671       if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS ||
5672           emit->key.fs.write_color0_to_n_cbufs > 1) {
5673          /* Allocate a temp to hold the output color */
5674          emit->fs.color_tmp_index = total_temps;
5675          total_temps += 1;
5676       }
5677 
5678       if (emit->fs.face_input_index != INVALID_INDEX) {
5679          /* Allocate a temp for the +/-1 face register */
5680          emit->fs.face_tmp_index = total_temps;
5681          total_temps += 1;
5682       }
5683 
5684       if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
5685          /* Allocate a temp for modified fragment position register */
5686          emit->fs.fragcoord_tmp_index = total_temps;
5687          total_temps += 1;
5688       }
5689 
5690       if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
5691          /* Allocate a temp for the sample position */
5692          emit->fs.sample_pos_tmp_index = total_temps++;
5693       }
5694    }
5695    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
5696       if (emit->vposition.need_prescale) {
5697          emit->vposition.tmp_index = total_temps++;
5698          emit->vposition.prescale_scale_index = total_temps++;
5699          emit->vposition.prescale_trans_index = total_temps++;
5700       }
5701 
5702       if (emit->tes.inner.tgsi_index) {
5703          emit->tes.inner.temp_index = total_temps;
5704          total_temps += 1;
5705       }
5706 
5707       if (emit->tes.outer.tgsi_index) {
5708          emit->tes.outer.temp_index = total_temps;
5709          total_temps += 1;
5710       }
5711    }
5712    else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
5713       if (emit->tcs.inner.tgsi_index != INVALID_INDEX) {
5714          if (!emit->tcs.control_point_phase) {
5715             emit->tcs.inner.temp_index = total_temps;
5716             total_temps += 1;
5717          }
5718       }
5719       if (emit->tcs.outer.tgsi_index != INVALID_INDEX) {
5720          if (!emit->tcs.control_point_phase) {
5721             emit->tcs.outer.temp_index = total_temps;
5722             total_temps += 1;
5723          }
5724       }
5725 
5726       if (emit->tcs.control_point_phase &&
5727           emit->info.reads_pervertex_outputs) {
5728          emit->tcs.control_point_tmp_index = total_temps;
5729          total_temps += emit->tcs.control_point_out_count;
5730       }
5731       else if (!emit->tcs.control_point_phase &&
5732                emit->info.reads_perpatch_outputs) {
5733 
5734          /* If there is indirect access to the patch constant outputs
5735           * in the control point phase, then an indexable temporary array
5736           * will be created for these patch constant outputs.
5737           * Note, indirect access can only be applicable to
5738           * patch constant outputs in the control point phase.
5739           */
5740          if (emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
5741             unsigned arrayID =
5742                emit->num_temp_arrays ? emit->num_temp_arrays : 1;
5743             create_temp_array(emit, arrayID, 0,
5744                               emit->tcs.patch_generic_out_count, total_temps);
5745          }
5746          emit->tcs.patch_generic_tmp_index = total_temps;
5747          total_temps += emit->tcs.patch_generic_out_count;
5748       }
5749 
5750       emit->tcs.invocation_id_tmp_index = total_temps++;
5751    }
5752 
5753    if (emit->raw_bufs) {
5754       /**
5755        * Add 3 more temporaries if we need to translate constant buffer
5756        * to srv raw buffer. Since we need to load the value to a temporary
5757        * before it can be used as a source. There could be three source
5758        * register in an instruction.
5759        */
5760       emit->raw_buf_tmp_index = total_temps;
5761       total_temps+=3;
5762    }
5763 
5764    for (i = 0; i < emit->num_address_regs; i++) {
5765       emit->address_reg_index[i] = total_temps++;
5766    }
5767 
5768    /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10
5769     * temp indexes.  Basically, we compact all the non-array temp register
5770     * indexes into a consecutive series.
5771     *
5772     * Before, we may have some TGSI declarations like:
5773     *   DCL TEMP[0..1], LOCAL
5774     *   DCL TEMP[2..4], ARRAY(1), LOCAL
5775     *   DCL TEMP[5..7], ARRAY(2), LOCAL
5776     *   plus, some extra temps, like TEMP[8], TEMP[9] for misc things
5777     *
5778     * After, we'll have a map like this:
5779     *   temp_map[0] = { array 0, index 0 }
5780     *   temp_map[1] = { array 0, index 1 }
5781     *   temp_map[2] = { array 1, index 0 }
5782     *   temp_map[3] = { array 1, index 1 }
5783     *   temp_map[4] = { array 1, index 2 }
5784     *   temp_map[5] = { array 2, index 0 }
5785     *   temp_map[6] = { array 2, index 1 }
5786     *   temp_map[7] = { array 2, index 2 }
5787     *   temp_map[8] = { array 0, index 2 }
5788     *   temp_map[9] = { array 0, index 3 }
5789     *
5790     * We'll declare two arrays of 3 elements, plus a set of four non-indexed
5791     * temps numbered 0..3
5792     *
5793     * Any time we emit a temporary register index, we'll have to use the
5794     * temp_map[] table to convert the TGSI index to the VGPU10 index.
5795     *
5796     * Finally, we recompute the total_temps value here.
5797     */
5798    reg = 0;
5799    for (i = 0; i < total_temps; i++) {
5800       if (emit->temp_map[i].arrayId == 0) {
5801          emit->temp_map[i].index = reg++;
5802       }
5803    }
5804 
5805    if (0) {
5806       debug_printf("total_temps %u\n", total_temps);
5807       for (i = 0; i < total_temps; i++) {
5808          debug_printf("temp %u ->  array %u  index %u\n",
5809                       i, emit->temp_map[i].arrayId, emit->temp_map[i].index);
5810       }
5811    }
5812 
5813    total_temps = reg;
5814 
5815    /* Emit declaration of ordinary temp registers */
5816    if (total_temps > 0) {
5817       VGPU10OpcodeToken0 opcode0;
5818 
5819       opcode0.value = 0;
5820       opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS;
5821 
5822       begin_emit_instruction(emit);
5823       emit_dword(emit, opcode0.value);
5824       emit_dword(emit, total_temps);
5825       end_emit_instruction(emit);
5826    }
5827 
5828    /* Emit declarations for indexable temp arrays.  Skip 0th entry since
5829     * it's unused.
5830     */
5831    for (i = 1; i < emit->num_temp_arrays; i++) {
5832       unsigned num_temps = emit->temp_arrays[i].size;
5833 
5834       if (num_temps > 0) {
5835          VGPU10OpcodeToken0 opcode0;
5836 
5837          opcode0.value = 0;
5838          opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP;
5839 
5840          begin_emit_instruction(emit);
5841          emit_dword(emit, opcode0.value);
5842          emit_dword(emit, i); /* which array */
5843          emit_dword(emit, num_temps);
5844          emit_dword(emit, 4); /* num components */
5845          end_emit_instruction(emit);
5846 
5847          total_temps += num_temps;
5848       }
5849    }
5850 
5851    /* Check that the grand total of all regular and indexed temps is
5852     * under the limit.
5853     */
5854    check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1);
5855 
5856    return TRUE;
5857 }
5858 
5859 
5860 static boolean
emit_constant_declaration(struct svga_shader_emitter_v10 * emit)5861 emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
5862 {
5863    VGPU10OpcodeToken0 opcode0;
5864    VGPU10OperandToken0 operand0;
5865    unsigned total_consts, i;
5866 
5867    opcode0.value = 0;
5868    opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER;
5869    opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED;
5870    /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */
5871 
5872    operand0.value = 0;
5873    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
5874    operand0.indexDimension = VGPU10_OPERAND_INDEX_2D;
5875    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5876    operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5877    operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
5878    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
5879    operand0.swizzleX = 0;
5880    operand0.swizzleY = 1;
5881    operand0.swizzleZ = 2;
5882    operand0.swizzleW = 3;
5883 
5884    /**
5885     * Emit declaration for constant buffer [0].  We also allocate
5886     * room for the extra constants here.
5887     */
5888    total_consts = emit->num_shader_consts[0];
5889 
5890    /* Now, allocate constant slots for the "extra" constants.
5891     * Note: it's critical that these extra constant locations
5892     * exactly match what's emitted by the "extra" constants code
5893     * in svga_state_constants.c
5894     */
5895 
5896    /* Vertex position scale/translation */
5897    if (emit->vposition.need_prescale) {
5898       emit->vposition.prescale_cbuf_index = total_consts;
5899       total_consts += (2 * emit->vposition.num_prescale);
5900    }
5901 
5902    if (emit->unit == PIPE_SHADER_VERTEX) {
5903       if (emit->key.vs.undo_viewport) {
5904          emit->vs.viewport_index = total_consts++;
5905       }
5906       if (emit->key.vs.need_vertex_id_bias) {
5907          emit->vs.vertex_id_bias_index = total_consts++;
5908       }
5909    }
5910 
5911    /* user-defined clip planes */
5912    if (emit->key.clip_plane_enable) {
5913       unsigned n = util_bitcount(emit->key.clip_plane_enable);
5914       assert(emit->unit != PIPE_SHADER_FRAGMENT &&
5915              emit->unit != PIPE_SHADER_COMPUTE);
5916       for (i = 0; i < n; i++) {
5917          emit->clip_plane_const[i] = total_consts++;
5918       }
5919    }
5920 
5921    for (i = 0; i < emit->num_samplers; i++) {
5922 
5923       if (emit->key.tex[i].sampler_view) {
5924          /* Texcoord scale factors for RECT textures */
5925          if (emit->key.tex[i].unnormalized) {
5926             emit->texcoord_scale_index[i] = total_consts++;
5927          }
5928 
5929          /* Texture buffer sizes */
5930          if (emit->key.tex[i].target == PIPE_BUFFER) {
5931             emit->texture_buffer_size_index[i] = total_consts++;
5932          }
5933       }
5934    }
5935    if (emit->key.image_size_used) {
5936       emit->image_size_index = total_consts;
5937       total_consts += emit->num_images;
5938    }
5939 
5940    if (total_consts > 0) {
5941       if (total_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
5942          debug_printf("Warning: Too many constants [%u] declared in constant"
5943                       " buffer 0. %u is the limit.\n",
5944                       total_consts,
5945                       VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
5946          total_consts = VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT;
5947          emit->register_overflow = TRUE;
5948       }
5949       begin_emit_instruction(emit);
5950       emit_dword(emit, opcode0.value);
5951       emit_dword(emit, operand0.value);
5952       emit_dword(emit, 0);  /* which const buffer slot */
5953       emit_dword(emit, total_consts);
5954       end_emit_instruction(emit);
5955    }
5956 
5957    /* Declare remaining constant buffers (UBOs) */
5958 
5959    for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) {
5960       if (emit->num_shader_consts[i] > 0) {
5961          if (emit->raw_bufs & (1 << i)) {
5962             /* UBO declared as srv raw buffer */
5963 
5964             VGPU10OpcodeToken0 opcode1;
5965             VGPU10OperandToken0 operand1;
5966 
5967             opcode1.value = 0;
5968             opcode1.opcodeType = VGPU10_OPCODE_DCL_RESOURCE_RAW;
5969             opcode1.resourceDimension = VGPU10_RESOURCE_DIMENSION_UNKNOWN;
5970 
5971             operand1.value = 0;
5972             operand1.numComponents = VGPU10_OPERAND_0_COMPONENT;
5973             operand1.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
5974             operand1.indexDimension = VGPU10_OPERAND_INDEX_1D;
5975             operand1.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5976 
5977             begin_emit_instruction(emit);
5978             emit_dword(emit, opcode1.value);
5979             emit_dword(emit, operand1.value);
5980             emit_dword(emit, i + emit->raw_buf_srv_start_index);
5981             end_emit_instruction(emit);
5982          }
5983          else {
5984 
5985             /* UBO declared as const buffer */
5986             begin_emit_instruction(emit);
5987             emit_dword(emit, opcode0.value);
5988             emit_dword(emit, operand0.value);
5989             emit_dword(emit, i);  /* which const buffer slot */
5990             emit_dword(emit, emit->num_shader_consts[i]);
5991             end_emit_instruction(emit);
5992          }
5993       }
5994    }
5995 
5996    return TRUE;
5997 }
5998 
5999 
6000 /**
6001  * Emit declarations for samplers.
6002  */
6003 static boolean
emit_sampler_declarations(struct svga_shader_emitter_v10 * emit)6004 emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
6005 {
6006    unsigned i;
6007 
6008    for (i = 0; i < emit->key.num_samplers; i++) {
6009 
6010       VGPU10OpcodeToken0 opcode0;
6011       VGPU10OperandToken0 operand0;
6012 
6013       opcode0.value = 0;
6014       opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER;
6015       opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT;
6016 
6017       operand0.value = 0;
6018       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6019       operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
6020       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6021       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6022 
6023       begin_emit_instruction(emit);
6024       emit_dword(emit, opcode0.value);
6025       emit_dword(emit, operand0.value);
6026       emit_dword(emit, i);
6027       end_emit_instruction(emit);
6028    }
6029 
6030    return TRUE;
6031 }
6032 
6033 
6034 /**
6035  * Translate PIPE_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x.
6036  */
6037 static unsigned
pipe_texture_to_resource_dimension(enum tgsi_texture_type target,unsigned num_samples,boolean is_array,boolean is_uav)6038 pipe_texture_to_resource_dimension(enum tgsi_texture_type target,
6039                                    unsigned num_samples,
6040                                    boolean is_array,
6041                                    boolean is_uav)
6042 {
6043    switch (target) {
6044    case PIPE_BUFFER:
6045       return VGPU10_RESOURCE_DIMENSION_BUFFER;
6046    case PIPE_TEXTURE_1D:
6047       return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6048    case PIPE_TEXTURE_2D:
6049       return num_samples > 2 ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS :
6050          VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6051    case PIPE_TEXTURE_RECT:
6052       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6053    case PIPE_TEXTURE_3D:
6054       return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
6055    case PIPE_TEXTURE_CUBE:
6056       return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
6057    case PIPE_TEXTURE_1D_ARRAY:
6058       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
6059          : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6060    case PIPE_TEXTURE_2D_ARRAY:
6061       if (num_samples > 2 && is_array)
6062          return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY;
6063       else if (is_array)
6064          return VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY;
6065       else
6066          return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6067    case PIPE_TEXTURE_CUBE_ARRAY:
6068       return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY :
6069              (is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY :
6070                          VGPU10_RESOURCE_DIMENSION_TEXTURECUBE);
6071    default:
6072       assert(!"Unexpected resource type");
6073       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6074    }
6075 }
6076 
6077 
6078 /**
6079  * Translate TGSI_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x.
6080  */
6081 static unsigned
tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,unsigned num_samples,boolean is_array,boolean is_uav)6082 tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,
6083                                    unsigned num_samples,
6084                                    boolean is_array,
6085                                    boolean is_uav)
6086 {
6087    if (target == TGSI_TEXTURE_2D_MSAA && num_samples < 2) {
6088       target = TGSI_TEXTURE_2D;
6089    }
6090    else if (target == TGSI_TEXTURE_2D_ARRAY_MSAA && num_samples < 2) {
6091       target = TGSI_TEXTURE_2D_ARRAY;
6092    }
6093 
6094    switch (target) {
6095    case TGSI_TEXTURE_BUFFER:
6096       return VGPU10_RESOURCE_DIMENSION_BUFFER;
6097    case TGSI_TEXTURE_1D:
6098       return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6099    case TGSI_TEXTURE_2D:
6100    case TGSI_TEXTURE_RECT:
6101       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6102    case TGSI_TEXTURE_3D:
6103       return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
6104    case TGSI_TEXTURE_CUBE:
6105    case TGSI_TEXTURE_SHADOWCUBE:
6106       return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY :
6107                       VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
6108    case TGSI_TEXTURE_SHADOW1D:
6109       return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6110    case TGSI_TEXTURE_SHADOW2D:
6111    case TGSI_TEXTURE_SHADOWRECT:
6112       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6113    case TGSI_TEXTURE_1D_ARRAY:
6114    case TGSI_TEXTURE_SHADOW1D_ARRAY:
6115       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
6116          : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
6117    case TGSI_TEXTURE_2D_ARRAY:
6118    case TGSI_TEXTURE_SHADOW2D_ARRAY:
6119       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY
6120          : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6121    case TGSI_TEXTURE_2D_MSAA:
6122       return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
6123    case TGSI_TEXTURE_2D_ARRAY_MSAA:
6124       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
6125          : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
6126    case TGSI_TEXTURE_CUBE_ARRAY:
6127       return is_uav ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY :
6128              (is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY :
6129                          VGPU10_RESOURCE_DIMENSION_TEXTURECUBE);
6130    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
6131       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY
6132          : VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
6133    default:
6134       assert(!"Unexpected resource type");
6135       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
6136    }
6137 }
6138 
6139 
6140 /**
6141  * Given a tgsi_return_type, return true iff it is an integer type.
6142  */
6143 static boolean
is_integer_type(enum tgsi_return_type type)6144 is_integer_type(enum tgsi_return_type type)
6145 {
6146    switch (type) {
6147       case TGSI_RETURN_TYPE_SINT:
6148       case TGSI_RETURN_TYPE_UINT:
6149          return TRUE;
6150       case TGSI_RETURN_TYPE_FLOAT:
6151       case TGSI_RETURN_TYPE_UNORM:
6152       case TGSI_RETURN_TYPE_SNORM:
6153          return FALSE;
6154       case TGSI_RETURN_TYPE_COUNT:
6155       default:
6156          assert(!"is_integer_type: Unknown tgsi_return_type");
6157          return FALSE;
6158    }
6159 }
6160 
6161 
6162 /**
6163  * Emit declarations for resources.
6164  * XXX When we're sure that all TGSI shaders will be generated with
6165  * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may
6166  * rework this code.
6167  */
6168 static boolean
emit_resource_declarations(struct svga_shader_emitter_v10 * emit)6169 emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
6170 {
6171    unsigned i;
6172 
6173    /* Emit resource decl for each sampler */
6174    for (i = 0; i < emit->num_samplers; i++) {
6175       if (!(emit->info.samplers_declared & (1 << i)))
6176          continue;
6177 
6178       VGPU10OpcodeToken0 opcode0;
6179       VGPU10OperandToken0 operand0;
6180       VGPU10ResourceReturnTypeToken return_type;
6181       VGPU10_RESOURCE_RETURN_TYPE rt;
6182 
6183       opcode0.value = 0;
6184       opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;
6185       if (emit->sampler_view[i] || !emit->key.tex[i].sampler_view) {
6186          opcode0.resourceDimension =
6187             tgsi_texture_to_resource_dimension(emit->sampler_target[i],
6188                                                emit->key.tex[i].num_samples,
6189                                                emit->key.tex[i].is_array,
6190                                                FALSE);
6191       }
6192       else {
6193          opcode0.resourceDimension =
6194             pipe_texture_to_resource_dimension(emit->key.tex[i].target,
6195                                                emit->key.tex[i].num_samples,
6196                                                emit->key.tex[i].is_array,
6197                                                FALSE);
6198       }
6199       opcode0.sampleCount = emit->key.tex[i].num_samples;
6200       operand0.value = 0;
6201       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6202       operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
6203       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6204       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6205 
6206 #if 1
6207       /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */
6208       STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1);
6209       STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1);
6210       STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1);
6211       STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1);
6212       STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1);
6213       assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT);
6214       if (emit->sampler_view[i] || !emit->key.tex[i].sampler_view) {
6215          rt = emit->sampler_return_type[i] + 1;
6216       }
6217       else {
6218          rt = emit->key.tex[i].sampler_return_type;
6219       }
6220 #else
6221       switch (emit->sampler_return_type[i]) {
6222          case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break;
6223          case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break;
6224          case TGSI_RETURN_TYPE_SINT:  rt = VGPU10_RETURN_TYPE_SINT;  break;
6225          case TGSI_RETURN_TYPE_UINT:  rt = VGPU10_RETURN_TYPE_UINT;  break;
6226          case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break;
6227          case TGSI_RETURN_TYPE_COUNT:
6228          default:
6229             rt = VGPU10_RETURN_TYPE_FLOAT;
6230             assert(!"emit_resource_declarations: Unknown tgsi_return_type");
6231       }
6232 #endif
6233 
6234       return_type.value = 0;
6235       return_type.component0 = rt;
6236       return_type.component1 = rt;
6237       return_type.component2 = rt;
6238       return_type.component3 = rt;
6239 
6240       begin_emit_instruction(emit);
6241       emit_dword(emit, opcode0.value);
6242       emit_dword(emit, operand0.value);
6243       emit_dword(emit, i);
6244       emit_dword(emit, return_type.value);
6245       end_emit_instruction(emit);
6246    }
6247 
6248    return TRUE;
6249 }
6250 
6251 
6252 /**
6253  * Emit instruction to declare uav for the shader image
6254  */
6255 static void
emit_image_declarations(struct svga_shader_emitter_v10 * emit)6256 emit_image_declarations(struct svga_shader_emitter_v10 *emit)
6257 {
6258    unsigned i = 0;
6259    unsigned unit = 0;
6260    unsigned uav_mask = 0;
6261 
6262    /* Emit uav decl for each image */
6263    for (i = 0; i < emit->num_images; i++, unit++) {
6264 
6265       /* Find the unit index of the next declared image.
6266        */
6267       while (!(emit->image_mask & (1 << unit))) {
6268          unit++;
6269       }
6270 
6271       VGPU10OpcodeToken0 opcode0;
6272       VGPU10OperandToken0 operand0;
6273       VGPU10ResourceReturnTypeToken return_type;
6274 
6275       /* If the corresponding uav for the image is already declared,
6276        * skip this image declaration.
6277        */
6278       if (uav_mask & (1 << emit->key.images[unit].uav_index))
6279          continue;
6280 
6281       opcode0.value = 0;
6282       opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_TYPED;
6283       opcode0.uavResourceDimension =
6284          tgsi_texture_to_resource_dimension(emit->image[unit].Resource,
6285                                             0, emit->key.images[unit].is_array,
6286                                             TRUE);
6287 
6288       if (emit->key.images[unit].is_single_layer &&
6289           emit->key.images[unit].resource_target == PIPE_TEXTURE_3D) {
6290          opcode0.uavResourceDimension = VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
6291       }
6292 
6293       /* Declare the uav as global coherent if the shader includes memory
6294        * barrier instructions.
6295        */
6296       opcode0.globallyCoherent =
6297          (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
6298 
6299       operand0.value = 0;
6300       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6301       operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
6302       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6303       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6304 
6305       return_type.value = 0;
6306       return_type.component0 =
6307          return_type.component1 =
6308          return_type.component2 =
6309          return_type.component3 = emit->key.images[unit].return_type + 1;
6310 
6311       assert(emit->key.images[unit].uav_index != SVGA3D_INVALID_ID);
6312       begin_emit_instruction(emit);
6313       emit_dword(emit, opcode0.value);
6314       emit_dword(emit, operand0.value);
6315       emit_dword(emit, emit->key.images[unit].uav_index);
6316       emit_dword(emit, return_type.value);
6317       end_emit_instruction(emit);
6318 
6319       /* Mark the uav is already declared */
6320       uav_mask |= 1 << emit->key.images[unit].uav_index;
6321    }
6322 
6323    emit->uav_declared |= uav_mask;
6324 }
6325 
6326 
6327 /**
6328  * Emit instruction to declare uav for the shader buffer
6329  */
6330 static void
emit_shader_buf_declarations(struct svga_shader_emitter_v10 * emit)6331 emit_shader_buf_declarations(struct svga_shader_emitter_v10 *emit)
6332 {
6333    unsigned i;
6334    unsigned uav_mask = 0;
6335 
6336    /* Emit uav decl for each shader buffer */
6337    for (i = 0; i < emit->num_shader_bufs; i++) {
6338       VGPU10OpcodeToken0 opcode0;
6339       VGPU10OperandToken0 operand0;
6340 
6341       /* If the corresponding uav for the shader buf is already declared,
6342        * skip this shader buffer declaration.
6343        */
6344       if (uav_mask & (1 << emit->key.shader_buf_uav_index[i]))
6345          continue;
6346 
6347       opcode0.value = 0;
6348       opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_RAW;
6349 
6350       /* Declare the uav as global coherent if the shader includes memory
6351        * barrier instructions.
6352        */
6353       opcode0.globallyCoherent =
6354          (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
6355 
6356       operand0.value = 0;
6357       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6358       operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
6359       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6360       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6361 
6362       assert(emit->key.shader_buf_uav_index[i] != SVGA3D_INVALID_ID);
6363       begin_emit_instruction(emit);
6364       emit_dword(emit, opcode0.value);
6365       emit_dword(emit, operand0.value);
6366       emit_dword(emit, emit->key.shader_buf_uav_index[i]);
6367       end_emit_instruction(emit);
6368 
6369       /* Mark the uav is already declared */
6370       uav_mask |= 1 << emit->key.shader_buf_uav_index[i];
6371    }
6372 
6373    emit->uav_declared |= uav_mask;
6374 }
6375 
6376 
6377 /**
6378  * Emit instruction to declare thread group shared memory(tgsm) for shared memory
6379  */
6380 static void
emit_memory_declarations(struct svga_shader_emitter_v10 * emit)6381 emit_memory_declarations(struct svga_shader_emitter_v10 *emit)
6382 {
6383    if (emit->cs.shared_memory_declared) {
6384       VGPU10OpcodeToken0 opcode0;
6385       VGPU10OperandToken0 operand0;
6386 
6387       opcode0.value = 0;
6388       opcode0.opcodeType = VGPU10_OPCODE_DCL_TGSM_RAW;
6389 
6390       /* Declare the uav as global coherent if the shader includes memory
6391        * barrier instructions.
6392        */
6393       opcode0.globallyCoherent =
6394          (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
6395 
6396       operand0.value = 0;
6397       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6398       operand0.operandType = VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY;
6399       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6400       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6401 
6402       begin_emit_instruction(emit);
6403       emit_dword(emit, opcode0.value);
6404       emit_dword(emit, operand0.value);
6405 
6406       /* Current state tracker only declares one shared memory for GLSL.
6407        * Use index 0 for this shared memory.
6408        */
6409       emit_dword(emit, 0);
6410       emit_dword(emit, emit->key.cs.mem_size); /* byte Count */
6411       end_emit_instruction(emit);
6412    }
6413 }
6414 
6415 
6416 /**
6417  * Emit instruction to declare uav for atomic buffers
6418  */
6419 static void
emit_atomic_buf_declarations(struct svga_shader_emitter_v10 * emit)6420 emit_atomic_buf_declarations(struct svga_shader_emitter_v10 *emit)
6421 {
6422    unsigned atomic_bufs_mask = emit->atomic_bufs_mask;
6423    unsigned uav_mask = 0;
6424 
6425    /* Emit uav decl for each atomic buffer */
6426    while (atomic_bufs_mask) {
6427       unsigned buf_index = u_bit_scan(&atomic_bufs_mask);
6428       unsigned uav_index = emit->key.atomic_buf_uav_index[buf_index];
6429 
6430       /* If the corresponding uav for the shader buf is already declared,
6431        * skip this shader buffer declaration.
6432        */
6433       if (uav_mask & (1 << uav_index))
6434          continue;
6435 
6436       VGPU10OpcodeToken0 opcode0;
6437       VGPU10OperandToken0 operand0;
6438 
6439       assert(uav_index != SVGA3D_INVALID_ID);
6440 
6441       opcode0.value = 0;
6442       opcode0.opcodeType = VGPU10_OPCODE_DCL_UAV_RAW;
6443       opcode0.uavResourceDimension = VGPU10_RESOURCE_DIMENSION_BUFFER;
6444 
6445       /* Declare the uav as global coherent if the shader includes memory
6446        * barrier instructions.
6447        */
6448       opcode0.globallyCoherent =
6449          (emit->info.opcode_count[TGSI_OPCODE_MEMBAR] > 0) ? 1 : 0;
6450       opcode0.uavHasCounter = 1;
6451 
6452       operand0.value = 0;
6453       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
6454       operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
6455       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
6456       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
6457 
6458       begin_emit_instruction(emit);
6459       emit_dword(emit, opcode0.value);
6460       emit_dword(emit, operand0.value);
6461       emit_dword(emit, uav_index);
6462       end_emit_instruction(emit);
6463 
6464       /* Mark the uav is already declared */
6465       uav_mask |= 1 << uav_index;
6466    }
6467 
6468    emit->uav_declared |= uav_mask;
6469 
6470    /* Allocate immediates to be used for index to the atomic buffers */
6471    unsigned j = 0;
6472    for (unsigned i = 0; i <= emit->num_atomic_bufs / 4; i++, j+=4) {
6473       alloc_immediate_int4(emit, j+0, j+1, j+2, j+3);
6474    }
6475 
6476    /* Allocate immediates for the atomic counter index */
6477    for (; j <= emit->max_atomic_counter_index; j+=4) {
6478       alloc_immediate_int4(emit, j+0, j+1, j+2, j+3);
6479    }
6480 }
6481 
6482 
6483 /**
6484  * Emit instruction with n=1, 2 or 3 source registers.
6485  */
6486 static void
emit_instruction_opn(struct svga_shader_emitter_v10 * emit,unsigned opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src1,const struct tgsi_full_src_register * src2,const struct tgsi_full_src_register * src3,boolean saturate,bool precise)6487 emit_instruction_opn(struct svga_shader_emitter_v10 *emit,
6488                      unsigned opcode,
6489                      const struct tgsi_full_dst_register *dst,
6490                      const struct tgsi_full_src_register *src1,
6491                      const struct tgsi_full_src_register *src2,
6492                      const struct tgsi_full_src_register *src3,
6493                      boolean saturate, bool precise)
6494 {
6495    begin_emit_instruction(emit);
6496    emit_opcode_precise(emit, opcode, saturate, precise);
6497    emit_dst_register(emit, dst);
6498    emit_src_register(emit, src1);
6499    if (src2) {
6500       emit_src_register(emit, src2);
6501    }
6502    if (src3) {
6503       emit_src_register(emit, src3);
6504    }
6505    end_emit_instruction(emit);
6506 }
6507 
6508 static void
emit_instruction_op1(struct svga_shader_emitter_v10 * emit,unsigned opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6509 emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
6510                      unsigned opcode,
6511                      const struct tgsi_full_dst_register *dst,
6512                      const struct tgsi_full_src_register *src)
6513 {
6514    emit_instruction_opn(emit, opcode, dst, src, NULL, NULL, FALSE, FALSE);
6515 }
6516 
6517 static void
emit_instruction_op2(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src1,const struct tgsi_full_src_register * src2)6518 emit_instruction_op2(struct svga_shader_emitter_v10 *emit,
6519                      VGPU10_OPCODE_TYPE opcode,
6520                      const struct tgsi_full_dst_register *dst,
6521                      const struct tgsi_full_src_register *src1,
6522                      const struct tgsi_full_src_register *src2)
6523 {
6524    emit_instruction_opn(emit, opcode, dst, src1, src2, NULL, FALSE, FALSE);
6525 }
6526 
6527 static void
emit_instruction_op3(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src1,const struct tgsi_full_src_register * src2,const struct tgsi_full_src_register * src3)6528 emit_instruction_op3(struct svga_shader_emitter_v10 *emit,
6529                      VGPU10_OPCODE_TYPE opcode,
6530                      const struct tgsi_full_dst_register *dst,
6531                      const struct tgsi_full_src_register *src1,
6532                      const struct tgsi_full_src_register *src2,
6533                      const struct tgsi_full_src_register *src3)
6534 {
6535    emit_instruction_opn(emit, opcode, dst, src1, src2, src3, FALSE, FALSE);
6536 }
6537 
6538 static void
emit_instruction_op0(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcode)6539 emit_instruction_op0(struct svga_shader_emitter_v10 *emit,
6540                      VGPU10_OPCODE_TYPE opcode)
6541 {
6542    begin_emit_instruction(emit);
6543    emit_opcode(emit, opcode, FALSE);
6544    end_emit_instruction(emit);
6545 }
6546 
6547 /**
6548  * Tessellation inner/outer levels needs to be store into its
6549  * appropriate registers depending on prim_mode.
6550  */
6551 static void
store_tesslevels(struct svga_shader_emitter_v10 * emit)6552 store_tesslevels(struct svga_shader_emitter_v10 *emit)
6553 {
6554    int i;
6555 
6556    /* tessellation levels are required input/out in hull shader.
6557     * emitting the inner/outer tessellation levels, either from
6558     * values provided in tcs or fallback default values which is 1.0
6559     */
6560    if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
6561       struct tgsi_full_src_register temp_src;
6562 
6563       if (emit->tcs.inner.tgsi_index != INVALID_INDEX)
6564          temp_src = make_src_temp_reg(emit->tcs.inner.temp_index);
6565       else
6566          temp_src = make_immediate_reg_float(emit, 1.0f);
6567 
6568       for (i = 0; i < 2; i++) {
6569          struct tgsi_full_src_register src =
6570             scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6571          struct tgsi_full_dst_register dst =
6572             make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index + i);
6573          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6574          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6575       }
6576 
6577       if (emit->tcs.outer.tgsi_index != INVALID_INDEX)
6578          temp_src = make_src_temp_reg(emit->tcs.outer.temp_index);
6579       else
6580          temp_src = make_immediate_reg_float(emit, 1.0f);
6581 
6582       for (i = 0; i < 4; i++) {
6583          struct tgsi_full_src_register src =
6584             scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6585          struct tgsi_full_dst_register dst =
6586             make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i);
6587          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6588          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6589       }
6590    }
6591    else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
6592       struct tgsi_full_src_register temp_src;
6593 
6594       if (emit->tcs.inner.tgsi_index != INVALID_INDEX)
6595          temp_src = make_src_temp_reg(emit->tcs.inner.temp_index);
6596       else
6597          temp_src = make_immediate_reg_float(emit, 1.0f);
6598 
6599       struct tgsi_full_src_register src =
6600          scalar_src(&temp_src, TGSI_SWIZZLE_X);
6601       struct tgsi_full_dst_register dst =
6602          make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index);
6603       dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6604       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6605 
6606       if (emit->tcs.outer.tgsi_index != INVALID_INDEX)
6607          temp_src = make_src_temp_reg(emit->tcs.outer.temp_index);
6608       else
6609          temp_src = make_immediate_reg_float(emit, 1.0f);
6610 
6611       for (i = 0; i < 3; i++) {
6612          struct tgsi_full_src_register src =
6613             scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6614          struct tgsi_full_dst_register dst =
6615             make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i);
6616          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6617          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6618       }
6619    }
6620    else if (emit->key.tcs.prim_mode ==  PIPE_PRIM_LINES) {
6621       if (emit->tcs.outer.tgsi_index != INVALID_INDEX) {
6622          struct tgsi_full_src_register temp_src =
6623             make_src_temp_reg(emit->tcs.outer.temp_index);
6624          for (i = 0; i < 2; i++) {
6625             struct tgsi_full_src_register src =
6626                scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
6627             struct tgsi_full_dst_register dst =
6628                make_dst_reg(TGSI_FILE_OUTPUT,
6629                             emit->tcs.outer.out_index + i);
6630             dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
6631             emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
6632          }
6633       }
6634    }
6635    else {
6636       debug_printf("Unsupported primitive type");
6637    }
6638 }
6639 
6640 
6641 /**
6642  * Emit the actual clip distance instructions to be used for clipping
6643  * by copying the clip distance from the temporary registers to the
6644  * CLIPDIST registers written with the enabled planes mask.
6645  * Also copy the clip distance from the temporary to the clip distance
6646  * shadow copy register which will be referenced by the input shader
6647  */
6648 static void
emit_clip_distance_instructions(struct svga_shader_emitter_v10 * emit)6649 emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
6650 {
6651    struct tgsi_full_src_register tmp_clip_dist_src;
6652    struct tgsi_full_dst_register clip_dist_dst;
6653 
6654    unsigned i;
6655    unsigned clip_plane_enable = emit->key.clip_plane_enable;
6656    unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index;
6657    int num_written_clipdist = emit->info.num_written_clipdistance;
6658 
6659    assert(emit->clip_dist_out_index != INVALID_INDEX);
6660    assert(emit->clip_dist_tmp_index != INVALID_INDEX);
6661 
6662    /**
6663     * Temporary reset the temporary clip dist register index so
6664     * that the copy to the real clip dist register will not
6665     * attempt to copy to the temporary register again
6666     */
6667    emit->clip_dist_tmp_index = INVALID_INDEX;
6668 
6669    for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) {
6670 
6671       tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i);
6672 
6673       /**
6674        * copy to the shadow copy for use by varying variable and
6675        * stream output. All clip distances
6676        * will be written regardless of the enabled clipping planes.
6677        */
6678       clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
6679                                    emit->clip_dist_so_index + i);
6680 
6681       /* MOV clip_dist_so, tmp_clip_dist */
6682       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
6683                            &tmp_clip_dist_src);
6684 
6685       /**
6686        * copy those clip distances to enabled clipping planes
6687        * to CLIPDIST registers for clipping
6688        */
6689       if (clip_plane_enable & 0xf) {
6690          clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
6691                                       emit->clip_dist_out_index + i);
6692          clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf);
6693 
6694          /* MOV CLIPDIST, tmp_clip_dist */
6695          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
6696                               &tmp_clip_dist_src);
6697       }
6698       /* four clip planes per clip register */
6699       clip_plane_enable >>= 4;
6700    }
6701    /**
6702     * set the temporary clip dist register index back to the
6703     * temporary index for the next vertex
6704     */
6705    emit->clip_dist_tmp_index = clip_dist_tmp_index;
6706 }
6707 
6708 /* Declare clip distance output registers for user-defined clip planes
6709  * or the TGSI_CLIPVERTEX output.
6710  */
6711 static void
emit_clip_distance_declarations(struct svga_shader_emitter_v10 * emit)6712 emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit)
6713 {
6714    unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
6715    unsigned index = emit->num_outputs;
6716    unsigned plane_mask;
6717 
6718    assert(emit->unit != PIPE_SHADER_FRAGMENT);
6719    assert(num_clip_planes <= 8);
6720 
6721    if (emit->clip_mode != CLIP_LEGACY &&
6722        emit->clip_mode != CLIP_VERTEX) {
6723       return;
6724    }
6725 
6726    if (num_clip_planes == 0)
6727       return;
6728 
6729    /* Convert clip vertex to clip distances only in the last vertex stage */
6730    if (!emit->key.last_vertex_stage)
6731       return;
6732 
6733    /* Declare one or two clip output registers.  The number of components
6734     * in the mask reflects the number of clip planes.  For example, if 5
6735     * clip planes are needed, we'll declare outputs similar to:
6736     * dcl_output_siv o2.xyzw, clip_distance
6737     * dcl_output_siv o3.x, clip_distance
6738     */
6739    emit->clip_dist_out_index = index; /* save the starting clip dist reg index */
6740 
6741    plane_mask = (1 << num_clip_planes) - 1;
6742    if (plane_mask & 0xf) {
6743       unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
6744       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index,
6745                               VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE,
6746                               SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
6747       emit->num_outputs++;
6748    }
6749    if (plane_mask & 0xf0) {
6750       unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
6751       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1,
6752                               VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE,
6753                               SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
6754       emit->num_outputs++;
6755    }
6756 }
6757 
6758 
6759 /**
6760  * Emit the instructions for writing to the clip distance registers
6761  * to handle legacy/automatic clip planes.
6762  * For each clip plane, the distance is the dot product of the vertex
6763  * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients.
6764  * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE
6765  * output registers already declared.
6766  */
6767 static void
emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 * emit,unsigned vpos_tmp_index)6768 emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit,
6769                              unsigned vpos_tmp_index)
6770 {
6771    unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
6772 
6773    assert(emit->clip_mode == CLIP_LEGACY);
6774    assert(num_clip_planes <= 8);
6775 
6776    assert(emit->unit == PIPE_SHADER_VERTEX ||
6777           emit->unit == PIPE_SHADER_GEOMETRY ||
6778           emit->unit == PIPE_SHADER_TESS_EVAL);
6779 
6780    for (i = 0; i < num_clip_planes; i++) {
6781       struct tgsi_full_dst_register dst;
6782       struct tgsi_full_src_register plane_src, vpos_src;
6783       unsigned reg_index = emit->clip_dist_out_index + i / 4;
6784       unsigned comp = i % 4;
6785       unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
6786 
6787       /* create dst, src regs */
6788       dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
6789       dst = writemask_dst(&dst, writemask);
6790 
6791       plane_src = make_src_const_reg(emit->clip_plane_const[i]);
6792       vpos_src = make_src_temp_reg(vpos_tmp_index);
6793 
6794       /* DP4 clip_dist, plane, vpos */
6795       emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
6796                            &plane_src, &vpos_src);
6797    }
6798 }
6799 
6800 
6801 /**
6802  * Emit the instructions for computing the clip distance results from
6803  * the clip vertex temporary.
6804  * For each clip plane, the distance is the dot product of the clip vertex
6805  * position (found in a temp reg) and the clip plane coefficients.
6806  */
6807 static void
emit_clip_vertex_instructions(struct svga_shader_emitter_v10 * emit)6808 emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit)
6809 {
6810    const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable);
6811    unsigned i;
6812    struct tgsi_full_dst_register dst;
6813    struct tgsi_full_src_register clipvert_src;
6814    const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index;
6815 
6816    assert(emit->unit == PIPE_SHADER_VERTEX ||
6817           emit->unit == PIPE_SHADER_GEOMETRY ||
6818           emit->unit == PIPE_SHADER_TESS_EVAL);
6819 
6820    assert(emit->clip_mode == CLIP_VERTEX);
6821 
6822    clipvert_src = make_src_temp_reg(clip_vertex_tmp);
6823 
6824    for (i = 0; i < num_clip; i++) {
6825       struct tgsi_full_src_register plane_src;
6826       unsigned reg_index = emit->clip_dist_out_index + i / 4;
6827       unsigned comp = i % 4;
6828       unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
6829 
6830       /* create dst, src regs */
6831       dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
6832       dst = writemask_dst(&dst, writemask);
6833 
6834       plane_src = make_src_const_reg(emit->clip_plane_const[i]);
6835 
6836       /* DP4 clip_dist, plane, vpos */
6837       emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
6838                            &plane_src, &clipvert_src);
6839    }
6840 
6841    /* copy temporary clip vertex register to the clip vertex register */
6842 
6843    assert(emit->clip_vertex_out_index != INVALID_INDEX);
6844 
6845    /**
6846     * temporary reset the temporary clip vertex register index so
6847     * that copy to the clip vertex register will not attempt
6848     * to copy to the temporary register again
6849     */
6850    emit->clip_vertex_tmp_index = INVALID_INDEX;
6851 
6852    /* MOV clip_vertex, clip_vertex_tmp */
6853    dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index);
6854    emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
6855                         &dst, &clipvert_src);
6856 
6857    /**
6858     * set the temporary clip vertex register index back to the
6859     * temporary index for the next vertex
6860     */
6861    emit->clip_vertex_tmp_index = clip_vertex_tmp;
6862 }
6863 
6864 /**
6865  * Emit code to convert RGBA to BGRA
6866  */
6867 static void
emit_swap_r_b(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6868 emit_swap_r_b(struct svga_shader_emitter_v10 *emit,
6869                      const struct tgsi_full_dst_register *dst,
6870                      const struct tgsi_full_src_register *src)
6871 {
6872    struct tgsi_full_src_register bgra_src =
6873       swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W);
6874 
6875    begin_emit_instruction(emit);
6876    emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
6877    emit_dst_register(emit, dst);
6878    emit_src_register(emit, &bgra_src);
6879    end_emit_instruction(emit);
6880 }
6881 
6882 
6883 /** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */
6884 static void
emit_puint_to_snorm(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6885 emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit,
6886                     const struct tgsi_full_dst_register *dst,
6887                     const struct tgsi_full_src_register *src)
6888 {
6889    struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f);
6890    struct tgsi_full_src_register two =
6891       make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f);
6892    struct tgsi_full_src_register neg_two =
6893       make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
6894 
6895    unsigned val_tmp = get_temp_index(emit);
6896    struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp);
6897    struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp);
6898 
6899    unsigned bias_tmp = get_temp_index(emit);
6900    struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp);
6901    struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp);
6902 
6903    /* val = src * 2.0 */
6904    emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, src, &two);
6905 
6906    /* bias = src > 0.5 */
6907    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, src, &half);
6908 
6909    /* bias = bias & -2.0 */
6910    emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst,
6911                         &bias_src, &neg_two);
6912 
6913    /* dst = val + bias */
6914    emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst,
6915                         &val_src, &bias_src);
6916 
6917    free_temp_indexes(emit);
6918 }
6919 
6920 
6921 /** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */
6922 static void
emit_puint_to_uscaled(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6923 emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit,
6924                       const struct tgsi_full_dst_register *dst,
6925                       const struct tgsi_full_src_register *src)
6926 {
6927    struct tgsi_full_src_register scale =
6928       make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f);
6929 
6930    /* dst = src * scale */
6931    emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale);
6932 }
6933 
6934 
6935 /** Convert from R32_UINT to 10_10_10_2_sscaled */
6936 static void
emit_puint_to_sscaled(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6937 emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit,
6938                       const struct tgsi_full_dst_register *dst,
6939                       const struct tgsi_full_src_register *src)
6940 {
6941    struct tgsi_full_src_register lshift =
6942       make_immediate_reg_int4(emit, 22, 12, 2, 0);
6943    struct tgsi_full_src_register rshift =
6944       make_immediate_reg_int4(emit, 22, 22, 22, 30);
6945 
6946    struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X);
6947 
6948    unsigned tmp = get_temp_index(emit);
6949    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6950    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6951 
6952    /*
6953     * r = (pixel << 22) >> 22;   # signed int in [511, -512]
6954     * g = (pixel << 12) >> 22;   # signed int in [511, -512]
6955     * b = (pixel <<  2) >> 22;   # signed int in [511, -512]
6956     * a = (pixel <<  0) >> 30;   # signed int in [1, -2]
6957     * dst = i_to_f(r,g,b,a);     # convert to float
6958     */
6959    emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst,
6960                         &src_xxxx, &lshift);
6961    emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst,
6962                         &tmp_src, &rshift);
6963    emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src);
6964 
6965    free_temp_indexes(emit);
6966 }
6967 
6968 
6969 /**
6970  * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction.
6971  */
6972 static boolean
emit_arl_uarl(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6973 emit_arl_uarl(struct svga_shader_emitter_v10 *emit,
6974               const struct tgsi_full_instruction *inst)
6975 {
6976    unsigned index = inst->Dst[0].Register.Index;
6977    struct tgsi_full_dst_register dst;
6978    VGPU10_OPCODE_TYPE opcode;
6979 
6980    assert(index < MAX_VGPU10_ADDR_REGS);
6981    dst = make_dst_temp_reg(emit->address_reg_index[index]);
6982    dst = writemask_dst(&dst, inst->Dst[0].Register.WriteMask);
6983 
6984    /* ARL dst, s0
6985     * Translates into:
6986     * FTOI address_tmp, s0
6987     *
6988     * UARL dst, s0
6989     * Translates into:
6990     * MOV address_tmp, s0
6991     */
6992    if (inst->Instruction.Opcode == TGSI_OPCODE_ARL)
6993       opcode = VGPU10_OPCODE_FTOI;
6994    else
6995       opcode = VGPU10_OPCODE_MOV;
6996 
6997    emit_instruction_op1(emit, opcode, &dst, &inst->Src[0]);
6998 
6999    return TRUE;
7000 }
7001 
7002 
7003 /**
7004  * Emit code for TGSI_OPCODE_CAL instruction.
7005  */
7006 static boolean
emit_cal(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7007 emit_cal(struct svga_shader_emitter_v10 *emit,
7008          const struct tgsi_full_instruction *inst)
7009 {
7010    unsigned label = inst->Label.Label;
7011    VGPU10OperandToken0 operand;
7012    operand.value = 0;
7013    operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
7014 
7015    begin_emit_instruction(emit);
7016    emit_dword(emit, operand.value);
7017    emit_dword(emit, label);
7018    end_emit_instruction(emit);
7019 
7020    return TRUE;
7021 }
7022 
7023 
7024 /**
7025  * Emit code for TGSI_OPCODE_IABS instruction.
7026  */
7027 static boolean
emit_iabs(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7028 emit_iabs(struct svga_shader_emitter_v10 *emit,
7029           const struct tgsi_full_instruction *inst)
7030 {
7031    /* dst.x = (src0.x < 0) ? -src0.x : src0.x
7032     * dst.y = (src0.y < 0) ? -src0.y : src0.y
7033     * dst.z = (src0.z < 0) ? -src0.z : src0.z
7034     * dst.w = (src0.w < 0) ? -src0.w : src0.w
7035     *
7036     * Translates into
7037     *   IMAX dst, src, neg(src)
7038     */
7039    struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
7040    emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0],
7041                         &inst->Src[0], &neg_src);
7042 
7043    return TRUE;
7044 }
7045 
7046 
7047 /**
7048  * Emit code for TGSI_OPCODE_CMP instruction.
7049  */
7050 static boolean
emit_cmp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7051 emit_cmp(struct svga_shader_emitter_v10 *emit,
7052          const struct tgsi_full_instruction *inst)
7053 {
7054    /* dst.x = (src0.x < 0) ? src1.x : src2.x
7055     * dst.y = (src0.y < 0) ? src1.y : src2.y
7056     * dst.z = (src0.z < 0) ? src1.z : src2.z
7057     * dst.w = (src0.w < 0) ? src1.w : src2.w
7058     *
7059     * Translates into
7060     *   LT tmp, src0, 0.0
7061     *   MOVC dst, tmp, src1, src2
7062     */
7063    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7064    unsigned tmp = get_temp_index(emit);
7065    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7066    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7067 
7068    emit_instruction_opn(emit, VGPU10_OPCODE_LT, &tmp_dst,
7069                         &inst->Src[0], &zero, NULL, FALSE,
7070                         inst->Instruction.Precise);
7071    emit_instruction_opn(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0],
7072                         &tmp_src, &inst->Src[1], &inst->Src[2],
7073                         inst->Instruction.Saturate, FALSE);
7074 
7075    free_temp_indexes(emit);
7076 
7077    return TRUE;
7078 }
7079 
7080 
7081 /**
7082  * Emit code for TGSI_OPCODE_DST instruction.
7083  */
7084 static boolean
emit_dst(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7085 emit_dst(struct svga_shader_emitter_v10 *emit,
7086          const struct tgsi_full_instruction *inst)
7087 {
7088    /*
7089     * dst.x = 1
7090     * dst.y = src0.y * src1.y
7091     * dst.z = src0.z
7092     * dst.w = src1.w
7093     */
7094 
7095    struct tgsi_full_src_register s0_yyyy =
7096       scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
7097    struct tgsi_full_src_register s0_zzzz =
7098       scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
7099    struct tgsi_full_src_register s1_yyyy =
7100       scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
7101    struct tgsi_full_src_register s1_wwww =
7102       scalar_src(&inst->Src[1], TGSI_SWIZZLE_W);
7103 
7104    /*
7105     * If dst and either src0 and src1 are the same we need
7106     * to create a temporary for it and insert a extra move.
7107     */
7108    unsigned tmp_move = get_temp_index(emit);
7109    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
7110    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
7111 
7112    /* MOV dst.x, 1.0 */
7113    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7114       struct tgsi_full_dst_register dst_x =
7115          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
7116       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7117 
7118       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one);
7119    }
7120 
7121    /* MUL dst.y, s0.y, s1.y */
7122    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7123       struct tgsi_full_dst_register dst_y =
7124          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
7125 
7126       emit_instruction_opn(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy,
7127                            &s1_yyyy, NULL, inst->Instruction.Saturate,
7128                            inst->Instruction.Precise);
7129    }
7130 
7131    /* MOV dst.z, s0.z */
7132    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7133       struct tgsi_full_dst_register dst_z =
7134          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
7135 
7136       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7137                            &dst_z, &s0_zzzz, NULL, NULL,
7138                            inst->Instruction.Saturate,
7139                            inst->Instruction.Precise);
7140   }
7141 
7142    /* MOV dst.w, s1.w */
7143    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7144       struct tgsi_full_dst_register dst_w =
7145          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
7146 
7147       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7148                            &dst_w, &s1_wwww, NULL, NULL,
7149                            inst->Instruction.Saturate,
7150                            inst->Instruction.Precise);
7151    }
7152 
7153    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
7154    free_temp_indexes(emit);
7155 
7156    return TRUE;
7157 }
7158 
7159 
7160 /**
7161  * A helper function to return the stream index as specified in
7162  * the immediate register
7163  */
7164 static inline unsigned
find_stream_index(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * src)7165 find_stream_index(struct svga_shader_emitter_v10 *emit,
7166                   const struct tgsi_full_src_register *src)
7167 {
7168    return emit->immediates[src->Register.Index][src->Register.SwizzleX].Int;
7169 }
7170 
7171 
7172 /**
7173  * Emit code for TGSI_OPCODE_ENDPRIM (GS only)
7174  */
7175 static boolean
emit_endprim(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7176 emit_endprim(struct svga_shader_emitter_v10 *emit,
7177              const struct tgsi_full_instruction *inst)
7178 {
7179    assert(emit->unit == PIPE_SHADER_GEOMETRY);
7180 
7181    begin_emit_instruction(emit);
7182    if (emit->version >= 50) {
7183       unsigned streamIndex = find_stream_index(emit, &inst->Src[0]);
7184 
7185       if (emit->info.num_stream_output_components[streamIndex] == 0) {
7186          /**
7187           * If there is no output for this stream, discard this instruction.
7188           */
7189          emit->discard_instruction = TRUE;
7190       }
7191       else {
7192          emit_opcode(emit, VGPU10_OPCODE_CUT_STREAM, FALSE);
7193          assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE);
7194          emit_stream_register(emit, streamIndex);
7195       }
7196    }
7197    else {
7198       emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE);
7199    }
7200    end_emit_instruction(emit);
7201    return TRUE;
7202 }
7203 
7204 
7205 /**
7206  * Emit code for TGSI_OPCODE_EX2 (2^x) instruction.
7207  */
7208 static boolean
emit_ex2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7209 emit_ex2(struct svga_shader_emitter_v10 *emit,
7210          const struct tgsi_full_instruction *inst)
7211 {
7212    /* Note that TGSI_OPCODE_EX2 computes only one value from src.x
7213     * while VGPU10 computes four values.
7214     *
7215     * dst = EX2(src):
7216     *   dst.xyzw = 2.0 ^ src.x
7217     */
7218 
7219    struct tgsi_full_src_register src_xxxx =
7220       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7221                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7222 
7223    /* EXP tmp, s0.xxxx */
7224    emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx,
7225                         NULL, NULL,
7226                         inst->Instruction.Saturate,
7227                         inst->Instruction.Precise);
7228 
7229    return TRUE;
7230 }
7231 
7232 
7233 /**
7234  * Emit code for TGSI_OPCODE_EXP instruction.
7235  */
7236 static boolean
emit_exp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7237 emit_exp(struct svga_shader_emitter_v10 *emit,
7238          const struct tgsi_full_instruction *inst)
7239 {
7240    /*
7241     * dst.x = 2 ^ floor(s0.x)
7242     * dst.y = s0.x - floor(s0.x)
7243     * dst.z = 2 ^ s0.x
7244     * dst.w = 1.0
7245     */
7246 
7247    struct tgsi_full_src_register src_xxxx =
7248       scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
7249    unsigned tmp = get_temp_index(emit);
7250    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7251    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7252 
7253    /*
7254     * If dst and src are the same we need to create
7255     * a temporary for it and insert a extra move.
7256     */
7257    unsigned tmp_move = get_temp_index(emit);
7258    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
7259    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
7260 
7261    /* only use X component of temp reg */
7262    tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7263    tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7264 
7265    /* ROUND_NI tmp.x, s0.x */
7266    emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
7267                         &src_xxxx); /* round to -infinity */
7268 
7269    /* EXP dst.x, tmp.x */
7270    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7271       struct tgsi_full_dst_register dst_x =
7272          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
7273 
7274       emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src,
7275                            NULL, NULL,
7276                            inst->Instruction.Saturate,
7277                            inst->Instruction.Precise);
7278    }
7279 
7280    /* ADD dst.y, s0.x, -tmp */
7281    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7282       struct tgsi_full_dst_register dst_y =
7283          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
7284       struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src);
7285 
7286       emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx,
7287                            &neg_tmp_src, NULL,
7288                            inst->Instruction.Saturate,
7289                            inst->Instruction.Precise);
7290    }
7291 
7292    /* EXP dst.z, s0.x */
7293    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7294       struct tgsi_full_dst_register dst_z =
7295          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
7296 
7297       emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx,
7298                            NULL, NULL,
7299                            inst->Instruction.Saturate,
7300                            inst->Instruction.Precise);
7301    }
7302 
7303    /* MOV dst.w, 1.0 */
7304    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7305       struct tgsi_full_dst_register dst_w =
7306          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
7307       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7308 
7309       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
7310    }
7311 
7312    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
7313 
7314    free_temp_indexes(emit);
7315 
7316    return TRUE;
7317 }
7318 
7319 
7320 /**
7321  * Emit code for TGSI_OPCODE_IF instruction.
7322  */
7323 static boolean
emit_if(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * src)7324 emit_if(struct svga_shader_emitter_v10 *emit,
7325         const struct tgsi_full_src_register *src)
7326 {
7327    VGPU10OpcodeToken0 opcode0;
7328 
7329    /* The src register should be a scalar */
7330    assert(src->Register.SwizzleX == src->Register.SwizzleY &&
7331           src->Register.SwizzleX == src->Register.SwizzleZ &&
7332           src->Register.SwizzleX == src->Register.SwizzleW);
7333 
7334    /* The only special thing here is that we need to set the
7335     * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if
7336     * src.x is non-zero.
7337     */
7338    opcode0.value = 0;
7339    opcode0.opcodeType = VGPU10_OPCODE_IF;
7340    opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
7341 
7342    begin_emit_instruction(emit);
7343    emit_dword(emit, opcode0.value);
7344    emit_src_register(emit, src);
7345    end_emit_instruction(emit);
7346 
7347    return TRUE;
7348 }
7349 
7350 
7351 /**
7352  * Emit code for conditional discard instruction (discard fragment if any of
7353  * the register components are negative).
7354  */
7355 static boolean
emit_cond_discard(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7356 emit_cond_discard(struct svga_shader_emitter_v10 *emit,
7357                   const struct tgsi_full_instruction *inst)
7358 {
7359    unsigned tmp = get_temp_index(emit);
7360    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7361    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7362 
7363    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7364 
7365    struct tgsi_full_dst_register tmp_dst_x =
7366       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7367    struct tgsi_full_src_register tmp_src_xxxx =
7368       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7369 
7370    /* tmp = src[0] < 0.0 */
7371    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], &zero);
7372 
7373    if (!same_swizzle_terms(&inst->Src[0])) {
7374       /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to
7375        * logically OR the swizzle terms.  Most uses of this conditional
7376        * discard instruction only test one channel so it's good to
7377        * avoid these extra steps.
7378        */
7379       struct tgsi_full_src_register tmp_src_yyyy =
7380          scalar_src(&tmp_src, TGSI_SWIZZLE_Y);
7381       struct tgsi_full_src_register tmp_src_zzzz =
7382          scalar_src(&tmp_src, TGSI_SWIZZLE_Z);
7383       struct tgsi_full_src_register tmp_src_wwww =
7384          scalar_src(&tmp_src, TGSI_SWIZZLE_W);
7385 
7386       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
7387                            &tmp_src_yyyy);
7388       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
7389                            &tmp_src_zzzz);
7390       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
7391                            &tmp_src_wwww);
7392    }
7393 
7394    begin_emit_instruction(emit);
7395    emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */
7396    emit_src_register(emit, &tmp_src_xxxx);
7397    end_emit_instruction(emit);
7398 
7399    free_temp_indexes(emit);
7400 
7401    return TRUE;
7402 }
7403 
7404 
7405 /**
7406  * Emit code for the unconditional discard instruction.
7407  */
7408 static boolean
emit_discard(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7409 emit_discard(struct svga_shader_emitter_v10 *emit,
7410              const struct tgsi_full_instruction *inst)
7411 {
7412    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7413 
7414    /* DISCARD if 0.0 is zero */
7415    begin_emit_instruction(emit);
7416    emit_discard_opcode(emit, FALSE);
7417    emit_src_register(emit, &zero);
7418    end_emit_instruction(emit);
7419 
7420    return TRUE;
7421 }
7422 
7423 
7424 /**
7425  * Emit code for TGSI_OPCODE_LG2 instruction.
7426  */
7427 static boolean
emit_lg2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7428 emit_lg2(struct svga_shader_emitter_v10 *emit,
7429          const struct tgsi_full_instruction *inst)
7430 {
7431    /* Note that TGSI_OPCODE_LG2 computes only one value from src.x
7432     * while VGPU10 computes four values.
7433     *
7434     * dst = LG2(src):
7435     *   dst.xyzw = log2(src.x)
7436     */
7437 
7438    struct tgsi_full_src_register src_xxxx =
7439       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7440                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7441 
7442    /* LOG tmp, s0.xxxx */
7443    emit_instruction_opn(emit, VGPU10_OPCODE_LOG,
7444                         &inst->Dst[0], &src_xxxx, NULL, NULL,
7445                         inst->Instruction.Saturate,
7446                         inst->Instruction.Precise);
7447 
7448    return TRUE;
7449 }
7450 
7451 
7452 /**
7453  * Emit code for TGSI_OPCODE_LIT instruction.
7454  */
7455 static boolean
emit_lit(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7456 emit_lit(struct svga_shader_emitter_v10 *emit,
7457          const struct tgsi_full_instruction *inst)
7458 {
7459    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7460 
7461    /*
7462     * If dst and src are the same we need to create
7463     * a temporary for it and insert a extra move.
7464     */
7465    unsigned tmp_move = get_temp_index(emit);
7466    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
7467    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
7468 
7469    /*
7470     * dst.x = 1
7471     * dst.y = max(src.x, 0)
7472     * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
7473     * dst.w = 1
7474     */
7475 
7476    /* MOV dst.x, 1.0 */
7477    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7478       struct tgsi_full_dst_register dst_x =
7479          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
7480       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one);
7481    }
7482 
7483    /* MOV dst.w, 1.0 */
7484    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7485       struct tgsi_full_dst_register dst_w =
7486          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
7487       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
7488    }
7489 
7490    /* MAX dst.y, src.x, 0.0 */
7491    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7492       struct tgsi_full_dst_register dst_y =
7493          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
7494       struct tgsi_full_src_register zero =
7495          make_immediate_reg_float(emit, 0.0f);
7496       struct tgsi_full_src_register src_xxxx =
7497          swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7498                      TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7499 
7500       emit_instruction_opn(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx,
7501                            &zero, NULL, inst->Instruction.Saturate, FALSE);
7502    }
7503 
7504    /*
7505     * tmp1 = clamp(src.w, -128, 128);
7506     *   MAX tmp1, src.w, -128
7507     *   MIN tmp1, tmp1, 128
7508     *
7509     * tmp2 = max(tmp2, 0);
7510     *   MAX tmp2, src.y, 0
7511     *
7512     * tmp1 = pow(tmp2, tmp1);
7513     *   LOG tmp2, tmp2
7514     *   MUL tmp1, tmp2, tmp1
7515     *   EXP tmp1, tmp1
7516     *
7517     * tmp1 = (src.w == 0) ? 1 : tmp1;
7518     *   EQ tmp2, 0, src.w
7519     *   MOVC tmp1, tmp2, 1.0, tmp1
7520     *
7521     * dst.z = (0 < src.x) ? tmp1 : 0;
7522     *   LT tmp2, 0, src.x
7523     *   MOVC dst.z, tmp2, tmp1, 0.0
7524     */
7525    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7526       struct tgsi_full_dst_register dst_z =
7527          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
7528 
7529       unsigned tmp1 = get_temp_index(emit);
7530       struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
7531       struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
7532       unsigned tmp2 = get_temp_index(emit);
7533       struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
7534       struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
7535 
7536       struct tgsi_full_src_register src_xxxx =
7537          scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
7538       struct tgsi_full_src_register src_yyyy =
7539          scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
7540       struct tgsi_full_src_register src_wwww =
7541          scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
7542 
7543       struct tgsi_full_src_register zero =
7544          make_immediate_reg_float(emit, 0.0f);
7545       struct tgsi_full_src_register lowerbound =
7546          make_immediate_reg_float(emit, -128.0f);
7547       struct tgsi_full_src_register upperbound =
7548          make_immediate_reg_float(emit, 128.0f);
7549 
7550       emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww,
7551                            &lowerbound);
7552       emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src,
7553                            &upperbound);
7554       emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy,
7555                            &zero);
7556 
7557       /* POW tmp1, tmp2, tmp1 */
7558       /* LOG tmp2, tmp2 */
7559       emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src);
7560 
7561       /* MUL tmp1, tmp2, tmp1 */
7562       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src,
7563                            &tmp1_src);
7564 
7565       /* EXP tmp1, tmp1 */
7566       emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src);
7567 
7568       /* EQ tmp2, 0, src.w */
7569       emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, &src_wwww);
7570       /* MOVC tmp1.z, tmp2, tmp1, 1.0 */
7571       emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst,
7572                            &tmp2_src, &one, &tmp1_src);
7573 
7574       /* LT tmp2, 0, src.x */
7575       emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, &src_xxxx);
7576       /* MOVC dst.z, tmp2, tmp1, 0.0 */
7577       emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z,
7578                            &tmp2_src, &tmp1_src, &zero);
7579    }
7580 
7581    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
7582    free_temp_indexes(emit);
7583 
7584    return TRUE;
7585 }
7586 
7587 
7588 /**
7589  * Emit Level Of Detail Query (LODQ) instruction.
7590  */
7591 static boolean
emit_lodq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7592 emit_lodq(struct svga_shader_emitter_v10 *emit,
7593           const struct tgsi_full_instruction *inst)
7594 {
7595    const uint unit = inst->Src[1].Register.Index;
7596 
7597    assert(emit->version >= 41);
7598 
7599    /* LOD dst, coord, resource, sampler */
7600    begin_emit_instruction(emit);
7601    emit_opcode(emit, VGPU10_OPCODE_LOD, FALSE);
7602    emit_dst_register(emit, &inst->Dst[0]);
7603    emit_src_register(emit, &inst->Src[0]); /* coord */
7604    emit_resource_register(emit, unit);
7605    emit_sampler_register(emit, unit);
7606    end_emit_instruction(emit);
7607 
7608    return TRUE;
7609 }
7610 
7611 
7612 /**
7613  * Emit code for TGSI_OPCODE_LOG instruction.
7614  */
7615 static boolean
emit_log(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7616 emit_log(struct svga_shader_emitter_v10 *emit,
7617          const struct tgsi_full_instruction *inst)
7618 {
7619    /*
7620     * dst.x = floor(lg2(abs(s0.x)))
7621     * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x))))
7622     * dst.z = lg2(abs(s0.x))
7623     * dst.w = 1.0
7624     */
7625 
7626    struct tgsi_full_src_register src_xxxx =
7627       scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
7628    unsigned tmp = get_temp_index(emit);
7629    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7630    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7631    struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx);
7632 
7633    /* only use X component of temp reg */
7634    tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7635    tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7636 
7637    /* LOG tmp.x, abs(s0.x) */
7638    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
7639       emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &abs_src_xxxx);
7640    }
7641 
7642    /* MOV dst.z, tmp.x */
7643    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
7644       struct tgsi_full_dst_register dst_z =
7645          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z);
7646 
7647       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7648                            &dst_z, &tmp_src, NULL, NULL,
7649                            inst->Instruction.Saturate, FALSE);
7650    }
7651 
7652    /* FLR tmp.x, tmp.x */
7653    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
7654       emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, &tmp_src);
7655    }
7656 
7657    /* MOV dst.x, tmp.x */
7658    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7659       struct tgsi_full_dst_register dst_x =
7660          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
7661 
7662       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7663                            &dst_x, &tmp_src, NULL, NULL,
7664                            inst->Instruction.Saturate, FALSE);
7665    }
7666 
7667    /* EXP tmp.x, tmp.x */
7668    /* DIV dst.y, abs(s0.x), tmp.x */
7669    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7670       struct tgsi_full_dst_register dst_y =
7671          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
7672 
7673       emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src);
7674       emit_instruction_opn(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx,
7675                            &tmp_src, NULL, inst->Instruction.Saturate, FALSE);
7676    }
7677 
7678    /* MOV dst.w, 1.0 */
7679    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7680       struct tgsi_full_dst_register dst_w =
7681          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W);
7682       struct tgsi_full_src_register one =
7683          make_immediate_reg_float(emit, 1.0f);
7684 
7685       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
7686    }
7687 
7688    free_temp_indexes(emit);
7689 
7690    return TRUE;
7691 }
7692 
7693 
7694 /**
7695  * Emit code for TGSI_OPCODE_LRP instruction.
7696  */
7697 static boolean
emit_lrp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7698 emit_lrp(struct svga_shader_emitter_v10 *emit,
7699          const struct tgsi_full_instruction *inst)
7700 {
7701    /* dst = LRP(s0, s1, s2):
7702     *   dst = s0 * (s1 - s2) + s2
7703     * Translates into:
7704     *   SUB tmp, s1, s2;        tmp = s1 - s2
7705     *   MAD dst, s0, tmp, s2;   dst = s0 * t1 + s2
7706     */
7707    unsigned tmp = get_temp_index(emit);
7708    struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp);
7709    struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp);
7710    struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]);
7711 
7712    /* ADD tmp, s1, -s2 */
7713    emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_tmp,
7714                         &inst->Src[1], &neg_src2, NULL, FALSE,
7715                         inst->Instruction.Precise);
7716 
7717    /* MAD dst, s1, tmp, s3 */
7718    emit_instruction_opn(emit, VGPU10_OPCODE_MAD, &inst->Dst[0],
7719                         &inst->Src[0], &src_tmp, &inst->Src[2],
7720                         inst->Instruction.Saturate,
7721                         inst->Instruction.Precise);
7722 
7723    free_temp_indexes(emit);
7724 
7725    return TRUE;
7726 }
7727 
7728 
7729 /**
7730  * Emit code for TGSI_OPCODE_POW instruction.
7731  */
7732 static boolean
emit_pow(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7733 emit_pow(struct svga_shader_emitter_v10 *emit,
7734          const struct tgsi_full_instruction *inst)
7735 {
7736    /* Note that TGSI_OPCODE_POW computes only one value from src0.x and
7737     * src1.x while VGPU10 computes four values.
7738     *
7739     * dst = POW(src0, src1):
7740     *   dst.xyzw = src0.x ^ src1.x
7741     */
7742    unsigned tmp = get_temp_index(emit);
7743    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7744    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7745    struct tgsi_full_src_register src0_xxxx =
7746       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7747                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7748    struct tgsi_full_src_register src1_xxxx =
7749       swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7750                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7751 
7752    /* LOG tmp, s0.xxxx */
7753    emit_instruction_opn(emit, VGPU10_OPCODE_LOG,
7754                         &tmp_dst, &src0_xxxx, NULL, NULL,
7755                         FALSE, inst->Instruction.Precise);
7756 
7757    /* MUL tmp, tmp, s1.xxxx */
7758    emit_instruction_opn(emit, VGPU10_OPCODE_MUL,
7759                         &tmp_dst, &tmp_src, &src1_xxxx, NULL,
7760                         FALSE, inst->Instruction.Precise);
7761 
7762    /* EXP tmp, s0.xxxx */
7763    emit_instruction_opn(emit, VGPU10_OPCODE_EXP,
7764                         &inst->Dst[0], &tmp_src, NULL, NULL,
7765                         inst->Instruction.Saturate,
7766                         inst->Instruction.Precise);
7767 
7768    /* free tmp */
7769    free_temp_indexes(emit);
7770 
7771    return TRUE;
7772 }
7773 
7774 
7775 /**
7776  * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction.
7777  */
7778 static boolean
emit_rcp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7779 emit_rcp(struct svga_shader_emitter_v10 *emit,
7780          const struct tgsi_full_instruction *inst)
7781 {
7782    if (emit->version >= 50) {
7783       /* use new RCP instruction.  But VGPU10_OPCODE_RCP is component-wise
7784        * while TGSI_OPCODE_RCP computes dst.xyzw = 1.0 / src.xxxx so we need
7785        * to manipulate the src register's swizzle.
7786        */
7787       struct tgsi_full_src_register src = inst->Src[0];
7788       src.Register.SwizzleY =
7789       src.Register.SwizzleZ =
7790       src.Register.SwizzleW = src.Register.SwizzleX;
7791 
7792       begin_emit_instruction(emit);
7793       emit_opcode_precise(emit, VGPU10_OPCODE_RCP,
7794                           inst->Instruction.Saturate,
7795                           inst->Instruction.Precise);
7796       emit_dst_register(emit, &inst->Dst[0]);
7797       emit_src_register(emit, &src);
7798       end_emit_instruction(emit);
7799    }
7800    else {
7801       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7802 
7803       unsigned tmp = get_temp_index(emit);
7804       struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7805       struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7806 
7807       struct tgsi_full_dst_register tmp_dst_x =
7808          writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7809       struct tgsi_full_src_register tmp_src_xxxx =
7810          scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7811 
7812       /* DIV tmp.x, 1.0, s0 */
7813       emit_instruction_opn(emit, VGPU10_OPCODE_DIV,
7814                            &tmp_dst_x, &one, &inst->Src[0], NULL,
7815                            FALSE, inst->Instruction.Precise);
7816 
7817       /* MOV dst, tmp.xxxx */
7818       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7819                            &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7820                            inst->Instruction.Saturate,
7821                            inst->Instruction.Precise);
7822 
7823       free_temp_indexes(emit);
7824    }
7825 
7826    return TRUE;
7827 }
7828 
7829 
7830 /**
7831  * Emit code for TGSI_OPCODE_RSQ instruction.
7832  */
7833 static boolean
emit_rsq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7834 emit_rsq(struct svga_shader_emitter_v10 *emit,
7835          const struct tgsi_full_instruction *inst)
7836 {
7837    /* dst = RSQ(src):
7838     *   dst.xyzw = 1 / sqrt(src.x)
7839     * Translates into:
7840     *   RSQ tmp, src.x
7841     *   MOV dst, tmp.xxxx
7842     */
7843 
7844    unsigned tmp = get_temp_index(emit);
7845    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7846    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7847 
7848    struct tgsi_full_dst_register tmp_dst_x =
7849       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7850    struct tgsi_full_src_register tmp_src_xxxx =
7851       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7852 
7853    /* RSQ tmp, src.x */
7854    emit_instruction_opn(emit, VGPU10_OPCODE_RSQ,
7855                         &tmp_dst_x, &inst->Src[0], NULL, NULL,
7856                         FALSE, inst->Instruction.Precise);
7857 
7858    /* MOV dst, tmp.xxxx */
7859    emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7860                         &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7861                         inst->Instruction.Saturate,
7862                         inst->Instruction.Precise);
7863 
7864    /* free tmp */
7865    free_temp_indexes(emit);
7866 
7867    return TRUE;
7868 }
7869 
7870 
7871 /**
7872  * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
7873  */
7874 static boolean
emit_seq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7875 emit_seq(struct svga_shader_emitter_v10 *emit,
7876          const struct tgsi_full_instruction *inst)
7877 {
7878    /* dst = SEQ(s0, s1):
7879     *   dst = s0 == s1 ? 1.0 : 0.0  (per component)
7880     * Translates into:
7881     *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
7882     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7883     */
7884    unsigned tmp = get_temp_index(emit);
7885    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7886    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7887    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7888    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7889 
7890    /* EQ tmp, s0, s1 */
7891    emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0],
7892                         &inst->Src[1]);
7893 
7894    /* MOVC dst, tmp, one, zero */
7895    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7896                         &one, &zero);
7897 
7898    free_temp_indexes(emit);
7899 
7900    return TRUE;
7901 }
7902 
7903 
7904 /**
7905  * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction.
7906  */
7907 static boolean
emit_sge(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7908 emit_sge(struct svga_shader_emitter_v10 *emit,
7909          const struct tgsi_full_instruction *inst)
7910 {
7911    /* dst = SGE(s0, s1):
7912     *   dst = s0 >= s1 ? 1.0 : 0.0  (per component)
7913     * Translates into:
7914     *   GE tmp, s0, s1;           tmp = s0 >= s1 : 0xffffffff : 0 (per comp)
7915     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7916     */
7917    unsigned tmp = get_temp_index(emit);
7918    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7919    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7920    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7921    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7922 
7923    /* GE tmp, s0, s1 */
7924    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0],
7925                         &inst->Src[1]);
7926 
7927    /* MOVC dst, tmp, one, zero */
7928    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7929                         &one, &zero);
7930 
7931    free_temp_indexes(emit);
7932 
7933    return TRUE;
7934 }
7935 
7936 
7937 /**
7938  * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction.
7939  */
7940 static boolean
emit_sgt(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7941 emit_sgt(struct svga_shader_emitter_v10 *emit,
7942          const struct tgsi_full_instruction *inst)
7943 {
7944    /* dst = SGT(s0, s1):
7945     *   dst = s0 > s1 ? 1.0 : 0.0  (per component)
7946     * Translates into:
7947     *   LT tmp, s1, s0;           tmp = s1 < s0 ? 0xffffffff : 0 (per comp)
7948     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7949     */
7950    unsigned tmp = get_temp_index(emit);
7951    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7952    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7953    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7954    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7955 
7956    /* LT tmp, s1, s0 */
7957    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1],
7958                         &inst->Src[0]);
7959 
7960    /* MOVC dst, tmp, one, zero */
7961    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7962                         &one, &zero);
7963 
7964    free_temp_indexes(emit);
7965 
7966    return TRUE;
7967 }
7968 
7969 
7970 /**
7971  * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions.
7972  */
7973 static boolean
emit_sincos(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7974 emit_sincos(struct svga_shader_emitter_v10 *emit,
7975          const struct tgsi_full_instruction *inst)
7976 {
7977    unsigned tmp = get_temp_index(emit);
7978    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7979    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7980 
7981    struct tgsi_full_src_register tmp_src_xxxx =
7982       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7983    struct tgsi_full_dst_register tmp_dst_x =
7984       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7985 
7986    begin_emit_instruction(emit);
7987    emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE);
7988 
7989    if(inst->Instruction.Opcode == TGSI_OPCODE_SIN)
7990    {
7991       emit_dst_register(emit, &tmp_dst_x);  /* first destination register */
7992       emit_null_dst_register(emit);  /* second destination register */
7993    }
7994    else {
7995       emit_null_dst_register(emit);
7996       emit_dst_register(emit, &tmp_dst_x);
7997    }
7998 
7999    emit_src_register(emit, &inst->Src[0]);
8000    end_emit_instruction(emit);
8001 
8002    emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
8003                         &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
8004                         inst->Instruction.Saturate,
8005                         inst->Instruction.Precise);
8006 
8007    free_temp_indexes(emit);
8008 
8009    return TRUE;
8010 }
8011 
8012 
8013 /**
8014  * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction.
8015  */
8016 static boolean
emit_sle(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8017 emit_sle(struct svga_shader_emitter_v10 *emit,
8018          const struct tgsi_full_instruction *inst)
8019 {
8020    /* dst = SLE(s0, s1):
8021     *   dst = s0 <= s1 ? 1.0 : 0.0  (per component)
8022     * Translates into:
8023     *   GE tmp, s1, s0;           tmp = s1 >= s0 : 0xffffffff : 0 (per comp)
8024     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
8025     */
8026    unsigned tmp = get_temp_index(emit);
8027    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8028    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8029    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8030    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8031 
8032    /* GE tmp, s1, s0 */
8033    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1],
8034                         &inst->Src[0]);
8035 
8036    /* MOVC dst, tmp, one, zero */
8037    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
8038                         &one, &zero);
8039 
8040    free_temp_indexes(emit);
8041 
8042    return TRUE;
8043 }
8044 
8045 
8046 /**
8047  * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction.
8048  */
8049 static boolean
emit_slt(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8050 emit_slt(struct svga_shader_emitter_v10 *emit,
8051          const struct tgsi_full_instruction *inst)
8052 {
8053    /* dst = SLT(s0, s1):
8054     *   dst = s0 < s1 ? 1.0 : 0.0  (per component)
8055     * Translates into:
8056     *   LT tmp, s0, s1;           tmp = s0 < s1 ? 0xffffffff : 0 (per comp)
8057     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
8058     */
8059    unsigned tmp = get_temp_index(emit);
8060    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8061    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8062    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8063    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8064 
8065    /* LT tmp, s0, s1 */
8066    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
8067                         &inst->Src[1]);
8068 
8069    /* MOVC dst, tmp, one, zero */
8070    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
8071                         &one, &zero);
8072 
8073    free_temp_indexes(emit);
8074 
8075    return TRUE;
8076 }
8077 
8078 
8079 /**
8080  * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction.
8081  */
8082 static boolean
emit_sne(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8083 emit_sne(struct svga_shader_emitter_v10 *emit,
8084          const struct tgsi_full_instruction *inst)
8085 {
8086    /* dst = SNE(s0, s1):
8087     *   dst = s0 != s1 ? 1.0 : 0.0  (per component)
8088     * Translates into:
8089     *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
8090     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
8091     */
8092    unsigned tmp = get_temp_index(emit);
8093    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8094    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8095    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8096    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8097 
8098    /* NE tmp, s0, s1 */
8099    emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0],
8100                         &inst->Src[1]);
8101 
8102    /* MOVC dst, tmp, one, zero */
8103    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
8104                         &one, &zero);
8105 
8106    free_temp_indexes(emit);
8107 
8108    return TRUE;
8109 }
8110 
8111 
8112 /**
8113  * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction.
8114  */
8115 static boolean
emit_ssg(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8116 emit_ssg(struct svga_shader_emitter_v10 *emit,
8117          const struct tgsi_full_instruction *inst)
8118 {
8119    /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
8120     * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
8121     * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
8122     * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
8123     * Translates into:
8124     *   LT tmp1, src, zero;           tmp1 = src < zero ? 0xffffffff : 0 (per comp)
8125     *   MOVC tmp2, tmp1, -1.0, 0.0;   tmp2 = tmp1 ? -1.0 : 0.0 (per component)
8126     *   LT tmp1, zero, src;           tmp1 = zero < src ? 0xffffffff : 0 (per comp)
8127     *   MOVC dst, tmp1, 1.0, tmp2;    dst = tmp1 ? 1.0 : tmp2 (per component)
8128     */
8129    struct tgsi_full_src_register zero =
8130       make_immediate_reg_float(emit, 0.0f);
8131    struct tgsi_full_src_register one =
8132       make_immediate_reg_float(emit, 1.0f);
8133    struct tgsi_full_src_register neg_one =
8134       make_immediate_reg_float(emit, -1.0f);
8135 
8136    unsigned tmp1 = get_temp_index(emit);
8137    struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
8138    struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
8139 
8140    unsigned tmp2 = get_temp_index(emit);
8141    struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
8142    struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
8143 
8144    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0],
8145                         &zero);
8146    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src,
8147                         &neg_one, &zero);
8148    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero,
8149                         &inst->Src[0]);
8150    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src,
8151                         &one, &tmp2_src);
8152 
8153    free_temp_indexes(emit);
8154 
8155    return TRUE;
8156 }
8157 
8158 
8159 /**
8160  * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction.
8161  */
8162 static boolean
emit_issg(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8163 emit_issg(struct svga_shader_emitter_v10 *emit,
8164           const struct tgsi_full_instruction *inst)
8165 {
8166    /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
8167     * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
8168     * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
8169     * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
8170     * Translates into:
8171     *   ILT tmp1, src, 0              tmp1 = src < 0 ? -1 : 0 (per component)
8172     *   ILT tmp2, 0, src              tmp2 = 0 < src ? -1 : 0 (per component)
8173     *   IADD dst, tmp1, neg(tmp2)     dst  = tmp1 - tmp2      (per component)
8174     */
8175    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
8176 
8177    unsigned tmp1 = get_temp_index(emit);
8178    struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
8179    struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
8180 
8181    unsigned tmp2 = get_temp_index(emit);
8182    struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
8183    struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
8184 
8185    struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src);
8186 
8187    emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst,
8188                         &inst->Src[0], &zero);
8189    emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst,
8190                         &zero, &inst->Src[0]);
8191    emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0],
8192                         &tmp1_src, &neg_tmp2);
8193 
8194    free_temp_indexes(emit);
8195 
8196    return TRUE;
8197 }
8198 
8199 
8200 /**
8201  * Emit a comparison instruction.  The dest register will get
8202  * 0 or ~0 values depending on the outcome of comparing src0 to src1.
8203  */
8204 static void
emit_comparison(struct svga_shader_emitter_v10 * emit,SVGA3dCmpFunc func,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src0,const struct tgsi_full_src_register * src1)8205 emit_comparison(struct svga_shader_emitter_v10 *emit,
8206                 SVGA3dCmpFunc func,
8207                 const struct tgsi_full_dst_register *dst,
8208                 const struct tgsi_full_src_register *src0,
8209                 const struct tgsi_full_src_register *src1)
8210 {
8211    struct tgsi_full_src_register immediate;
8212    VGPU10OpcodeToken0 opcode0;
8213    boolean swapSrc = FALSE;
8214 
8215    /* Sanity checks for svga vs. gallium enums */
8216    STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1));
8217    STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1));
8218 
8219    opcode0.value = 0;
8220 
8221    switch (func) {
8222    case SVGA3D_CMP_NEVER:
8223       immediate = make_immediate_reg_int(emit, 0);
8224       /* MOV dst, {0} */
8225       begin_emit_instruction(emit);
8226       emit_dword(emit, VGPU10_OPCODE_MOV);
8227       emit_dst_register(emit, dst);
8228       emit_src_register(emit, &immediate);
8229       end_emit_instruction(emit);
8230       return;
8231    case SVGA3D_CMP_ALWAYS:
8232       immediate = make_immediate_reg_int(emit, -1);
8233       /* MOV dst, {-1} */
8234       begin_emit_instruction(emit);
8235       emit_dword(emit, VGPU10_OPCODE_MOV);
8236       emit_dst_register(emit, dst);
8237       emit_src_register(emit, &immediate);
8238       end_emit_instruction(emit);
8239       return;
8240    case SVGA3D_CMP_LESS:
8241       opcode0.opcodeType = VGPU10_OPCODE_LT;
8242       break;
8243    case SVGA3D_CMP_EQUAL:
8244       opcode0.opcodeType = VGPU10_OPCODE_EQ;
8245       break;
8246    case SVGA3D_CMP_LESSEQUAL:
8247       opcode0.opcodeType = VGPU10_OPCODE_GE;
8248       swapSrc = TRUE;
8249       break;
8250    case SVGA3D_CMP_GREATER:
8251       opcode0.opcodeType = VGPU10_OPCODE_LT;
8252       swapSrc = TRUE;
8253       break;
8254    case SVGA3D_CMP_NOTEQUAL:
8255       opcode0.opcodeType = VGPU10_OPCODE_NE;
8256       break;
8257    case SVGA3D_CMP_GREATEREQUAL:
8258       opcode0.opcodeType = VGPU10_OPCODE_GE;
8259       break;
8260    default:
8261       assert(!"Unexpected comparison mode");
8262       opcode0.opcodeType = VGPU10_OPCODE_EQ;
8263    }
8264 
8265    begin_emit_instruction(emit);
8266    emit_dword(emit, opcode0.value);
8267    emit_dst_register(emit, dst);
8268    if (swapSrc) {
8269       emit_src_register(emit, src1);
8270       emit_src_register(emit, src0);
8271    }
8272    else {
8273       emit_src_register(emit, src0);
8274       emit_src_register(emit, src1);
8275    }
8276    end_emit_instruction(emit);
8277 }
8278 
8279 
8280 /**
8281  * Get texel/address offsets for a texture instruction.
8282  */
8283 static void
get_texel_offsets(const struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,int offsets[3])8284 get_texel_offsets(const struct svga_shader_emitter_v10 *emit,
8285                   const struct tgsi_full_instruction *inst, int offsets[3])
8286 {
8287    if (inst->Texture.NumOffsets == 1) {
8288       /* According to OpenGL Shader Language spec the offsets are only
8289        * fetched from a previously-declared immediate/literal.
8290        */
8291       const struct tgsi_texture_offset *off = inst->TexOffsets;
8292       const unsigned index = off[0].Index;
8293       const unsigned swizzleX = off[0].SwizzleX;
8294       const unsigned swizzleY = off[0].SwizzleY;
8295       const unsigned swizzleZ = off[0].SwizzleZ;
8296       const union tgsi_immediate_data *imm = emit->immediates[index];
8297 
8298       assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE);
8299 
8300       offsets[0] = imm[swizzleX].Int;
8301       offsets[1] = imm[swizzleY].Int;
8302       offsets[2] = imm[swizzleZ].Int;
8303    }
8304    else {
8305       offsets[0] = offsets[1] = offsets[2] = 0;
8306    }
8307 }
8308 
8309 
8310 /**
8311  * Set up the coordinate register for texture sampling.
8312  * When we're sampling from a RECT texture we have to scale the
8313  * unnormalized coordinate to a normalized coordinate.
8314  * We do that by multiplying the coordinate by an "extra" constant.
8315  * An alternative would be to use the RESINFO instruction to query the
8316  * texture's size.
8317  */
8318 static struct tgsi_full_src_register
setup_texcoord(struct svga_shader_emitter_v10 * emit,unsigned unit,const struct tgsi_full_src_register * coord)8319 setup_texcoord(struct svga_shader_emitter_v10 *emit,
8320                unsigned unit,
8321                const struct tgsi_full_src_register *coord)
8322 {
8323    if (emit->key.tex[unit].sampler_view && emit->key.tex[unit].unnormalized) {
8324       unsigned scale_index = emit->texcoord_scale_index[unit];
8325       unsigned tmp = get_temp_index(emit);
8326       struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8327       struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8328       struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index);
8329 
8330       if (emit->key.tex[unit].texel_bias) {
8331          /* to fix texture coordinate rounding issue, 0.0001 offset is
8332           * been added. This fixes piglit test fbo-blit-scaled-linear. */
8333          struct tgsi_full_src_register offset =
8334             make_immediate_reg_float(emit, 0.0001f);
8335 
8336          /* ADD tmp, coord, offset */
8337          emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_dst,
8338                               coord, &offset);
8339          /* MUL tmp, tmp, scale */
8340          emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
8341                               &tmp_src, &scale_src);
8342       }
8343       else {
8344          /* MUL tmp, coord, const[] */
8345          emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
8346                               coord, &scale_src);
8347       }
8348       return tmp_src;
8349    }
8350    else {
8351       /* use texcoord as-is */
8352       return *coord;
8353    }
8354 }
8355 
8356 
8357 /**
8358  * For SAMPLE_C instructions, emit the extra src register which indicates
8359  * the reference/comparision value.
8360  */
8361 static void
emit_tex_compare_refcoord(struct svga_shader_emitter_v10 * emit,enum tgsi_texture_type target,const struct tgsi_full_src_register * coord)8362 emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit,
8363                           enum tgsi_texture_type target,
8364                           const struct tgsi_full_src_register *coord)
8365 {
8366    struct tgsi_full_src_register coord_src_ref;
8367    int component;
8368 
8369    assert(tgsi_is_shadow_target(target));
8370 
8371    component = tgsi_util_get_shadow_ref_src_index(target) % 4;
8372    assert(component >= 0);
8373 
8374    coord_src_ref = scalar_src(coord, component);
8375 
8376    emit_src_register(emit, &coord_src_ref);
8377 }
8378 
8379 
8380 /**
8381  * Info for implementing texture swizzles.
8382  * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle()
8383  * functions use this to encapsulate the extra steps needed to perform
8384  * a texture swizzle, or shadow/depth comparisons.
8385  * The shadow/depth comparison is only done here if for the cases where
8386  * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare).
8387  */
8388 struct tex_swizzle_info
8389 {
8390    boolean swizzled;
8391    boolean shadow_compare;
8392    unsigned unit;
8393    enum tgsi_texture_type texture_target;  /**< TGSI_TEXTURE_x */
8394    struct tgsi_full_src_register tmp_src;
8395    struct tgsi_full_dst_register tmp_dst;
8396    const struct tgsi_full_dst_register *inst_dst;
8397    const struct tgsi_full_src_register *coord_src;
8398 };
8399 
8400 
8401 /**
8402  * Do setup for handling texture swizzles or shadow compares.
8403  * \param unit  the texture unit
8404  * \param inst  the TGSI texture instruction
8405  * \param shadow_compare  do shadow/depth comparison?
8406  * \param swz  returns the swizzle info
8407  */
8408 static void
begin_tex_swizzle(struct svga_shader_emitter_v10 * emit,unsigned unit,const struct tgsi_full_instruction * inst,boolean shadow_compare,struct tex_swizzle_info * swz)8409 begin_tex_swizzle(struct svga_shader_emitter_v10 *emit,
8410                   unsigned unit,
8411                   const struct tgsi_full_instruction *inst,
8412                   boolean shadow_compare,
8413                   struct tex_swizzle_info *swz)
8414 {
8415    swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X ||
8416                     emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y ||
8417                     emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z ||
8418                     emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W);
8419 
8420    swz->shadow_compare = shadow_compare;
8421    swz->texture_target = inst->Texture.Texture;
8422 
8423    if (swz->swizzled || shadow_compare) {
8424       /* Allocate temp register for the result of the SAMPLE instruction
8425        * and the source of the MOV/compare/swizzle instructions.
8426        */
8427       unsigned tmp = get_temp_index(emit);
8428       swz->tmp_src = make_src_temp_reg(tmp);
8429       swz->tmp_dst = make_dst_temp_reg(tmp);
8430 
8431       swz->unit = unit;
8432    }
8433    swz->inst_dst = &inst->Dst[0];
8434    swz->coord_src = &inst->Src[0];
8435 
8436    emit->shadow_compare_units |= shadow_compare << unit;
8437 }
8438 
8439 
8440 /**
8441  * Returns the register to put the SAMPLE instruction results into.
8442  * This will either be the original instruction dst reg (if no swizzle
8443  * and no shadow comparison) or a temporary reg if there is a swizzle.
8444  */
8445 static const struct tgsi_full_dst_register *
get_tex_swizzle_dst(const struct tex_swizzle_info * swz)8446 get_tex_swizzle_dst(const struct tex_swizzle_info *swz)
8447 {
8448    return (swz->swizzled || swz->shadow_compare)
8449       ? &swz->tmp_dst : swz->inst_dst;
8450 }
8451 
8452 
8453 /**
8454  * This emits the MOV instruction that actually implements a texture swizzle
8455  * and/or shadow comparison.
8456  */
8457 static void
end_tex_swizzle(struct svga_shader_emitter_v10 * emit,const struct tex_swizzle_info * swz)8458 end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
8459                 const struct tex_swizzle_info *swz)
8460 {
8461    if (swz->shadow_compare) {
8462       /* Emit extra instructions to compare the fetched texel value against
8463        * a texture coordinate component.  The result of the comparison
8464        * is 0.0 or 1.0.
8465        */
8466       struct tgsi_full_src_register coord_src;
8467       struct tgsi_full_src_register texel_src =
8468          scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X);
8469       struct tgsi_full_src_register one =
8470          make_immediate_reg_float(emit, 1.0f);
8471       /* convert gallium comparison func to SVGA comparison func */
8472       SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1;
8473 
8474       int component =
8475          tgsi_util_get_shadow_ref_src_index(swz->texture_target) % 4;
8476       assert(component >= 0);
8477       coord_src = scalar_src(swz->coord_src, component);
8478 
8479       /* COMPARE tmp, coord, texel */
8480       emit_comparison(emit, compare_func,
8481                       &swz->tmp_dst, &coord_src, &texel_src);
8482 
8483       /* AND dest, tmp, {1.0} */
8484       begin_emit_instruction(emit);
8485       emit_opcode(emit, VGPU10_OPCODE_AND, FALSE);
8486       if (swz->swizzled) {
8487          emit_dst_register(emit, &swz->tmp_dst);
8488       }
8489       else {
8490          emit_dst_register(emit, swz->inst_dst);
8491       }
8492       emit_src_register(emit, &swz->tmp_src);
8493       emit_src_register(emit, &one);
8494       end_emit_instruction(emit);
8495    }
8496 
8497    if (swz->swizzled) {
8498       unsigned swz_r = emit->key.tex[swz->unit].swizzle_r;
8499       unsigned swz_g = emit->key.tex[swz->unit].swizzle_g;
8500       unsigned swz_b = emit->key.tex[swz->unit].swizzle_b;
8501       unsigned swz_a = emit->key.tex[swz->unit].swizzle_a;
8502       unsigned writemask_0 = 0, writemask_1 = 0;
8503       boolean int_tex = is_integer_type(emit->sampler_return_type[swz->unit]);
8504 
8505       /* Swizzle w/out zero/one terms */
8506       struct tgsi_full_src_register src_swizzled =
8507          swizzle_src(&swz->tmp_src,
8508                      swz_r < PIPE_SWIZZLE_0 ? swz_r : PIPE_SWIZZLE_X,
8509                      swz_g < PIPE_SWIZZLE_0 ? swz_g : PIPE_SWIZZLE_Y,
8510                      swz_b < PIPE_SWIZZLE_0 ? swz_b : PIPE_SWIZZLE_Z,
8511                      swz_a < PIPE_SWIZZLE_0 ? swz_a : PIPE_SWIZZLE_W);
8512 
8513       /* MOV dst, color(tmp).<swizzle> */
8514       emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
8515                            swz->inst_dst, &src_swizzled);
8516 
8517       /* handle swizzle zero terms */
8518       writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) |
8519                      ((swz_g == PIPE_SWIZZLE_0) << 1) |
8520                      ((swz_b == PIPE_SWIZZLE_0) << 2) |
8521                      ((swz_a == PIPE_SWIZZLE_0) << 3));
8522       writemask_0 &= swz->inst_dst->Register.WriteMask;
8523 
8524       if (writemask_0) {
8525          struct tgsi_full_src_register zero = int_tex ?
8526             make_immediate_reg_int(emit, 0) :
8527             make_immediate_reg_float(emit, 0.0f);
8528          struct tgsi_full_dst_register dst =
8529             writemask_dst(swz->inst_dst, writemask_0);
8530 
8531          /* MOV dst.writemask_0, {0,0,0,0} */
8532          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &zero);
8533       }
8534 
8535       /* handle swizzle one terms */
8536       writemask_1 = (((swz_r == PIPE_SWIZZLE_1) << 0) |
8537                      ((swz_g == PIPE_SWIZZLE_1) << 1) |
8538                      ((swz_b == PIPE_SWIZZLE_1) << 2) |
8539                      ((swz_a == PIPE_SWIZZLE_1) << 3));
8540       writemask_1 &= swz->inst_dst->Register.WriteMask;
8541 
8542       if (writemask_1) {
8543          struct tgsi_full_src_register one = int_tex ?
8544             make_immediate_reg_int(emit, 1) :
8545             make_immediate_reg_float(emit, 1.0f);
8546          struct tgsi_full_dst_register dst =
8547             writemask_dst(swz->inst_dst, writemask_1);
8548 
8549          /* MOV dst.writemask_1, {1,1,1,1} */
8550          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one);
8551       }
8552    }
8553 }
8554 
8555 
8556 /**
8557  * Emit code for TGSI_OPCODE_SAMPLE instruction.
8558  */
8559 static boolean
emit_sample(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8560 emit_sample(struct svga_shader_emitter_v10 *emit,
8561             const struct tgsi_full_instruction *inst)
8562 {
8563    const unsigned resource_unit = inst->Src[1].Register.Index;
8564    const unsigned sampler_unit = inst->Src[2].Register.Index;
8565    struct tgsi_full_src_register coord;
8566    int offsets[3];
8567    struct tex_swizzle_info swz_info;
8568 
8569    begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info);
8570 
8571    get_texel_offsets(emit, inst, offsets);
8572 
8573    coord = setup_texcoord(emit, resource_unit, &inst->Src[0]);
8574 
8575    /* SAMPLE dst, coord(s0), resource, sampler */
8576    begin_emit_instruction(emit);
8577 
8578    /* NOTE: for non-fragment shaders, we should use VGPU10_OPCODE_SAMPLE_L
8579     * with LOD=0.  But our virtual GPU accepts this as-is.
8580     */
8581    emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE,
8582                       inst->Instruction.Saturate, offsets);
8583    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8584    emit_src_register(emit, &coord);
8585    emit_resource_register(emit, resource_unit);
8586    emit_sampler_register(emit, sampler_unit);
8587    end_emit_instruction(emit);
8588 
8589    end_tex_swizzle(emit, &swz_info);
8590 
8591    free_temp_indexes(emit);
8592 
8593    return TRUE;
8594 }
8595 
8596 
8597 /**
8598  * Check if a texture instruction is valid.
8599  * An example of an invalid texture instruction is doing shadow comparison
8600  * with an integer-valued texture.
8601  * If we detect an invalid texture instruction, we replace it with:
8602  *   MOV dst, {1,1,1,1};
8603  * \return TRUE if valid, FALSE if invalid.
8604  */
8605 static boolean
is_valid_tex_instruction(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8606 is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit,
8607                          const struct tgsi_full_instruction *inst)
8608 {
8609    const unsigned unit = inst->Src[1].Register.Index;
8610    const enum tgsi_texture_type target = inst->Texture.Texture;
8611    boolean valid = TRUE;
8612 
8613    if (tgsi_is_shadow_target(target) &&
8614        is_integer_type(emit->sampler_return_type[unit])) {
8615       debug_printf("Invalid SAMPLE_C with an integer texture!\n");
8616       valid = FALSE;
8617    }
8618    /* XXX might check for other conditions in the future here */
8619 
8620    if (!valid) {
8621       /* emit a MOV dst, {1,1,1,1} instruction. */
8622       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
8623       begin_emit_instruction(emit);
8624       emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
8625       emit_dst_register(emit, &inst->Dst[0]);
8626       emit_src_register(emit, &one);
8627       end_emit_instruction(emit);
8628    }
8629 
8630    return valid;
8631 }
8632 
8633 
8634 /**
8635  * Emit code for TGSI_OPCODE_TEX (simple texture lookup)
8636  */
8637 static boolean
emit_tex(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8638 emit_tex(struct svga_shader_emitter_v10 *emit,
8639          const struct tgsi_full_instruction *inst)
8640 {
8641    const uint unit = inst->Src[1].Register.Index;
8642    const enum tgsi_texture_type target = inst->Texture.Texture;
8643    VGPU10_OPCODE_TYPE opcode;
8644    struct tgsi_full_src_register coord;
8645    int offsets[3];
8646    struct tex_swizzle_info swz_info;
8647    boolean compare_in_shader;
8648 
8649    /* check that the sampler returns a float */
8650    if (!is_valid_tex_instruction(emit, inst))
8651       return TRUE;
8652 
8653    compare_in_shader = tgsi_is_shadow_target(target) &&
8654                        emit->key.tex[unit].compare_in_shader;
8655 
8656    begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info);
8657 
8658    get_texel_offsets(emit, inst, offsets);
8659 
8660    coord = setup_texcoord(emit, unit, &inst->Src[0]);
8661 
8662    /* SAMPLE dst, coord(s0), resource, sampler */
8663    begin_emit_instruction(emit);
8664 
8665    if (tgsi_is_shadow_target(target) && !compare_in_shader)
8666       opcode = VGPU10_OPCODE_SAMPLE_C;
8667    else
8668       opcode = VGPU10_OPCODE_SAMPLE;
8669 
8670    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8671    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8672    emit_src_register(emit, &coord);
8673    emit_resource_register(emit, unit);
8674    emit_sampler_register(emit, unit);
8675    if (opcode == VGPU10_OPCODE_SAMPLE_C) {
8676       emit_tex_compare_refcoord(emit, target, &coord);
8677    }
8678    end_emit_instruction(emit);
8679 
8680    end_tex_swizzle(emit, &swz_info);
8681 
8682    free_temp_indexes(emit);
8683 
8684    return TRUE;
8685 }
8686 
8687 /**
8688  * Emit code for TGSI_OPCODE_TG4 (texture lookup for texture gather)
8689  */
8690 static boolean
emit_tg4(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8691 emit_tg4(struct svga_shader_emitter_v10 *emit,
8692          const struct tgsi_full_instruction *inst)
8693 {
8694    const uint unit = inst->Src[2].Register.Index;
8695    struct tgsi_full_src_register src;
8696    struct tgsi_full_src_register offset_src, sampler, ref;
8697    int offsets[3];
8698 
8699    /* check that the sampler returns a float */
8700    if (!is_valid_tex_instruction(emit, inst))
8701       return TRUE;
8702 
8703    if (emit->version >= 50) {
8704       unsigned target = inst->Texture.Texture;
8705       int index = inst->Src[1].Register.Index;
8706       const union tgsi_immediate_data *imm = emit->immediates[index];
8707       int select_comp  = imm[inst->Src[1].Register.SwizzleX].Int;
8708       unsigned select_swizzle = PIPE_SWIZZLE_X;
8709 
8710       if (!tgsi_is_shadow_target(target)) {
8711          switch (select_comp) {
8712          case 0:
8713             select_swizzle = emit->key.tex[unit].swizzle_r;
8714             break;
8715          case 1:
8716             select_swizzle = emit->key.tex[unit].swizzle_g;
8717             break;
8718          case 2:
8719             select_swizzle = emit->key.tex[unit].swizzle_b;
8720             break;
8721          case 3:
8722             select_swizzle = emit->key.tex[unit].swizzle_a;
8723             break;
8724          default:
8725             assert(!"Unexpected component in texture gather swizzle");
8726          }
8727       }
8728       else {
8729          select_swizzle = emit->key.tex[unit].swizzle_r;
8730       }
8731 
8732       if (select_swizzle == PIPE_SWIZZLE_1) {
8733          src = make_immediate_reg_float(emit, 1.0);
8734          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8735          return TRUE;
8736       }
8737       else if (select_swizzle == PIPE_SWIZZLE_0) {
8738          src = make_immediate_reg_float(emit, 0.0);
8739          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8740          return TRUE;
8741       }
8742 
8743       src = setup_texcoord(emit, unit, &inst->Src[0]);
8744 
8745       /* GATHER4 dst, coord, resource, sampler */
8746       /* GATHER4_C dst, coord, resource, sampler ref */
8747       /* GATHER4_PO dst, coord, offset resource, sampler */
8748       /* GATHER4_PO_C dst, coord, offset resource, sampler, ref */
8749       begin_emit_instruction(emit);
8750       if (inst->Texture.NumOffsets == 1) {
8751          if (tgsi_is_shadow_target(target)) {
8752             emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO_C,
8753                         inst->Instruction.Saturate);
8754          }
8755          else {
8756             emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO,
8757                         inst->Instruction.Saturate);
8758          }
8759       }
8760       else {
8761          if (tgsi_is_shadow_target(target)) {
8762             emit_opcode(emit, VGPU10_OPCODE_GATHER4_C,
8763                         inst->Instruction.Saturate);
8764          }
8765          else {
8766             emit_opcode(emit, VGPU10_OPCODE_GATHER4,
8767                         inst->Instruction.Saturate);
8768          }
8769       }
8770 
8771       emit_dst_register(emit, &inst->Dst[0]);
8772       emit_src_register(emit, &src);
8773       if (inst->Texture.NumOffsets == 1) {
8774          /* offset */
8775          offset_src = make_src_reg(inst->TexOffsets[0].File,
8776                                    inst->TexOffsets[0].Index);
8777          offset_src = swizzle_src(&offset_src, inst->TexOffsets[0].SwizzleX,
8778                                   inst->TexOffsets[0].SwizzleY,
8779                                   inst->TexOffsets[0].SwizzleZ,
8780                                   TGSI_SWIZZLE_W);
8781          emit_src_register(emit, &offset_src);
8782       }
8783 
8784       /* resource */
8785       emit_resource_register(emit, unit);
8786 
8787       /* sampler */
8788       sampler = make_src_reg(TGSI_FILE_SAMPLER,
8789                              emit->key.tex[unit].sampler_index);
8790       sampler.Register.SwizzleX =
8791       sampler.Register.SwizzleY =
8792       sampler.Register.SwizzleZ =
8793       sampler.Register.SwizzleW = select_swizzle;
8794       emit_src_register(emit, &sampler);
8795 
8796       if (tgsi_is_shadow_target(target)) {
8797          /* ref */
8798          if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
8799             ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8800             emit_tex_compare_refcoord(emit, target, &ref);
8801          }
8802          else {
8803             emit_tex_compare_refcoord(emit, target, &src);
8804          }
8805       }
8806 
8807       end_emit_instruction(emit);
8808       free_temp_indexes(emit);
8809    }
8810    else {
8811       /* Only a single channel is supported in SM4_1 and we report
8812        * PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 1.
8813        * Only the 0th component will be gathered.
8814        */
8815       switch (emit->key.tex[unit].swizzle_r) {
8816       case PIPE_SWIZZLE_X:
8817          get_texel_offsets(emit, inst, offsets);
8818          src = setup_texcoord(emit, unit, &inst->Src[0]);
8819 
8820          /* Gather dst, coord, resource, sampler */
8821          begin_emit_instruction(emit);
8822          emit_sample_opcode(emit, VGPU10_OPCODE_GATHER4,
8823                             inst->Instruction.Saturate, offsets);
8824          emit_dst_register(emit, &inst->Dst[0]);
8825          emit_src_register(emit, &src);
8826          emit_resource_register(emit, unit);
8827 
8828          /* sampler */
8829          sampler = make_src_reg(TGSI_FILE_SAMPLER,
8830                                 emit->key.tex[unit].sampler_index);
8831          sampler.Register.SwizzleX =
8832          sampler.Register.SwizzleY =
8833          sampler.Register.SwizzleZ =
8834          sampler.Register.SwizzleW = PIPE_SWIZZLE_X;
8835          emit_src_register(emit, &sampler);
8836 
8837          end_emit_instruction(emit);
8838          break;
8839       case PIPE_SWIZZLE_W:
8840       case PIPE_SWIZZLE_1:
8841          src = make_immediate_reg_float(emit, 1.0);
8842          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8843          break;
8844       case PIPE_SWIZZLE_Y:
8845       case PIPE_SWIZZLE_Z:
8846       case PIPE_SWIZZLE_0:
8847       default:
8848          src = make_immediate_reg_float(emit, 0.0);
8849          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8850          break;
8851       }
8852    }
8853 
8854    return TRUE;
8855 }
8856 
8857 
8858 
8859 /**
8860  * Emit code for TGSI_OPCODE_TEX2 (texture lookup for shadow cube map arrays)
8861  */
8862 static boolean
emit_tex2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8863 emit_tex2(struct svga_shader_emitter_v10 *emit,
8864          const struct tgsi_full_instruction *inst)
8865 {
8866    const uint unit = inst->Src[2].Register.Index;
8867    unsigned target = inst->Texture.Texture;
8868    struct tgsi_full_src_register coord, ref;
8869    int offsets[3];
8870    struct tex_swizzle_info swz_info;
8871    VGPU10_OPCODE_TYPE opcode;
8872    boolean compare_in_shader;
8873 
8874    /* check that the sampler returns a float */
8875    if (!is_valid_tex_instruction(emit, inst))
8876       return TRUE;
8877 
8878    compare_in_shader = emit->key.tex[unit].compare_in_shader;
8879    if (compare_in_shader)
8880       opcode = VGPU10_OPCODE_SAMPLE;
8881    else
8882       opcode = VGPU10_OPCODE_SAMPLE_C;
8883 
8884    begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info);
8885 
8886    get_texel_offsets(emit, inst, offsets);
8887 
8888    coord = setup_texcoord(emit, unit, &inst->Src[0]);
8889    ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8890 
8891    /* SAMPLE_C dst, coord, resource, sampler, ref */
8892    begin_emit_instruction(emit);
8893    emit_sample_opcode(emit, opcode,
8894                       inst->Instruction.Saturate, offsets);
8895    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8896    emit_src_register(emit, &coord);
8897    emit_resource_register(emit, unit);
8898    emit_sampler_register(emit, unit);
8899    if (opcode == VGPU10_OPCODE_SAMPLE_C) {
8900       emit_tex_compare_refcoord(emit, target, &ref);
8901    }
8902    end_emit_instruction(emit);
8903 
8904    end_tex_swizzle(emit, &swz_info);
8905 
8906    free_temp_indexes(emit);
8907 
8908    return TRUE;
8909 }
8910 
8911 
8912 /**
8913  * Emit code for TGSI_OPCODE_TXP (projective texture)
8914  */
8915 static boolean
emit_txp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8916 emit_txp(struct svga_shader_emitter_v10 *emit,
8917          const struct tgsi_full_instruction *inst)
8918 {
8919    const uint unit = inst->Src[1].Register.Index;
8920    const enum tgsi_texture_type target = inst->Texture.Texture;
8921    VGPU10_OPCODE_TYPE opcode;
8922    int offsets[3];
8923    unsigned tmp = get_temp_index(emit);
8924    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8925    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8926    struct tgsi_full_src_register src0_wwww =
8927       scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
8928    struct tgsi_full_src_register coord;
8929    struct tex_swizzle_info swz_info;
8930    boolean compare_in_shader;
8931 
8932    /* check that the sampler returns a float */
8933    if (!is_valid_tex_instruction(emit, inst))
8934       return TRUE;
8935 
8936    compare_in_shader = tgsi_is_shadow_target(target) &&
8937                        emit->key.tex[unit].compare_in_shader;
8938 
8939    begin_tex_swizzle(emit, unit, inst, compare_in_shader, &swz_info);
8940 
8941    get_texel_offsets(emit, inst, offsets);
8942 
8943    coord = setup_texcoord(emit, unit, &inst->Src[0]);
8944 
8945    /* DIV tmp, coord, coord.wwww */
8946    emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst,
8947                         &coord, &src0_wwww);
8948 
8949    /* SAMPLE dst, coord(tmp), resource, sampler */
8950    begin_emit_instruction(emit);
8951 
8952    if (tgsi_is_shadow_target(target) && !compare_in_shader)
8953       /* NOTE: for non-fragment shaders, we should use
8954        * VGPU10_OPCODE_SAMPLE_C_LZ, but our virtual GPU accepts this as-is.
8955        */
8956       opcode = VGPU10_OPCODE_SAMPLE_C;
8957    else
8958       opcode = VGPU10_OPCODE_SAMPLE;
8959 
8960    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8961    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8962    emit_src_register(emit, &tmp_src);  /* projected coord */
8963    emit_resource_register(emit, unit);
8964    emit_sampler_register(emit, unit);
8965    if (opcode == VGPU10_OPCODE_SAMPLE_C) {
8966       emit_tex_compare_refcoord(emit, target, &tmp_src);
8967    }
8968    end_emit_instruction(emit);
8969 
8970    end_tex_swizzle(emit, &swz_info);
8971 
8972    free_temp_indexes(emit);
8973 
8974    return TRUE;
8975 }
8976 
8977 
8978 /**
8979  * Emit code for TGSI_OPCODE_TXD (explicit derivatives)
8980  */
8981 static boolean
emit_txd(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8982 emit_txd(struct svga_shader_emitter_v10 *emit,
8983          const struct tgsi_full_instruction *inst)
8984 {
8985    const uint unit = inst->Src[3].Register.Index;
8986    const enum tgsi_texture_type target = inst->Texture.Texture;
8987    int offsets[3];
8988    struct tgsi_full_src_register coord;
8989    struct tex_swizzle_info swz_info;
8990 
8991    begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
8992                      &swz_info);
8993 
8994    get_texel_offsets(emit, inst, offsets);
8995 
8996    coord = setup_texcoord(emit, unit, &inst->Src[0]);
8997 
8998    /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */
8999    begin_emit_instruction(emit);
9000    emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D,
9001                       inst->Instruction.Saturate, offsets);
9002    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9003    emit_src_register(emit, &coord);
9004    emit_resource_register(emit, unit);
9005    emit_sampler_register(emit, unit);
9006    emit_src_register(emit, &inst->Src[1]);  /* Xderiv */
9007    emit_src_register(emit, &inst->Src[2]);  /* Yderiv */
9008    end_emit_instruction(emit);
9009 
9010    end_tex_swizzle(emit, &swz_info);
9011 
9012    free_temp_indexes(emit);
9013 
9014    return TRUE;
9015 }
9016 
9017 
9018 /**
9019  * Emit code for TGSI_OPCODE_TXF (texel fetch)
9020  */
9021 static boolean
emit_txf(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9022 emit_txf(struct svga_shader_emitter_v10 *emit,
9023          const struct tgsi_full_instruction *inst)
9024 {
9025    const uint unit = inst->Src[1].Register.Index;
9026    const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture)
9027       && emit->key.tex[unit].num_samples > 1;
9028    int offsets[3];
9029    struct tex_swizzle_info swz_info;
9030 
9031    begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
9032 
9033    get_texel_offsets(emit, inst, offsets);
9034 
9035    if (msaa) {
9036       assert(emit->key.tex[unit].num_samples > 1);
9037 
9038       /* Fetch one sample from an MSAA texture */
9039       struct tgsi_full_src_register sampleIndex =
9040          scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
9041       /* LD_MS dst, coord(s0), resource, sampleIndex */
9042       begin_emit_instruction(emit);
9043       emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS,
9044                          inst->Instruction.Saturate, offsets);
9045       emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9046       emit_src_register(emit, &inst->Src[0]);
9047       emit_resource_register(emit, unit);
9048       emit_src_register(emit, &sampleIndex);
9049       end_emit_instruction(emit);
9050    }
9051    else {
9052       /* Fetch one texel specified by integer coordinate */
9053       /* LD dst, coord(s0), resource */
9054       begin_emit_instruction(emit);
9055       emit_sample_opcode(emit, VGPU10_OPCODE_LD,
9056                          inst->Instruction.Saturate, offsets);
9057       emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9058       emit_src_register(emit, &inst->Src[0]);
9059       emit_resource_register(emit, unit);
9060       end_emit_instruction(emit);
9061    }
9062 
9063    end_tex_swizzle(emit, &swz_info);
9064 
9065    free_temp_indexes(emit);
9066 
9067    return TRUE;
9068 }
9069 
9070 
9071 /**
9072  * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias)
9073  * or TGSI_OPCODE_TXB2 (for cube shadow maps).
9074  */
9075 static boolean
emit_txl_txb(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9076 emit_txl_txb(struct svga_shader_emitter_v10 *emit,
9077              const struct tgsi_full_instruction *inst)
9078 {
9079    const enum tgsi_texture_type target = inst->Texture.Texture;
9080    VGPU10_OPCODE_TYPE opcode;
9081    unsigned unit;
9082    int offsets[3];
9083    struct tgsi_full_src_register coord, lod_bias;
9084    struct tex_swizzle_info swz_info;
9085 
9086    assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
9087           inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
9088           inst->Instruction.Opcode == TGSI_OPCODE_TXB2);
9089 
9090    if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
9091       lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
9092       unit = inst->Src[2].Register.Index;
9093    }
9094    else {
9095       lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
9096       unit = inst->Src[1].Register.Index;
9097    }
9098 
9099    begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
9100                      &swz_info);
9101 
9102    get_texel_offsets(emit, inst, offsets);
9103 
9104    coord = setup_texcoord(emit, unit, &inst->Src[0]);
9105 
9106    /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */
9107    begin_emit_instruction(emit);
9108    if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
9109       opcode = VGPU10_OPCODE_SAMPLE_L;
9110    }
9111    else {
9112       opcode = VGPU10_OPCODE_SAMPLE_B;
9113    }
9114    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
9115    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9116    emit_src_register(emit, &coord);
9117    emit_resource_register(emit, unit);
9118    emit_sampler_register(emit, unit);
9119    emit_src_register(emit, &lod_bias);
9120    end_emit_instruction(emit);
9121 
9122    end_tex_swizzle(emit, &swz_info);
9123 
9124    free_temp_indexes(emit);
9125 
9126    return TRUE;
9127 }
9128 
9129 
9130 /**
9131  * Emit code for TGSI_OPCODE_TXL2 (explicit LOD) for cubemap array.
9132  */
9133 static boolean
emit_txl2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9134 emit_txl2(struct svga_shader_emitter_v10 *emit,
9135           const struct tgsi_full_instruction *inst)
9136 {
9137    unsigned target = inst->Texture.Texture;
9138    unsigned opcode, unit;
9139    int offsets[3];
9140    struct tgsi_full_src_register coord, lod;
9141    struct tex_swizzle_info swz_info;
9142 
9143    assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL2);
9144 
9145    lod = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
9146    unit = inst->Src[2].Register.Index;
9147 
9148    begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
9149                      &swz_info);
9150 
9151    get_texel_offsets(emit, inst, offsets);
9152 
9153    coord = setup_texcoord(emit, unit, &inst->Src[0]);
9154 
9155    /* SAMPLE_L dst, coord(s0), resource, sampler, lod(s3) */
9156    begin_emit_instruction(emit);
9157    opcode = VGPU10_OPCODE_SAMPLE_L;
9158    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
9159    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
9160    emit_src_register(emit, &coord);
9161    emit_resource_register(emit, unit);
9162    emit_sampler_register(emit, unit);
9163    emit_src_register(emit, &lod);
9164    end_emit_instruction(emit);
9165 
9166    end_tex_swizzle(emit, &swz_info);
9167 
9168    free_temp_indexes(emit);
9169 
9170    return TRUE;
9171 }
9172 
9173 
9174 /**
9175  * Emit code for TGSI_OPCODE_TXQ (texture query) instruction.
9176  */
9177 static boolean
emit_txq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9178 emit_txq(struct svga_shader_emitter_v10 *emit,
9179          const struct tgsi_full_instruction *inst)
9180 {
9181    const uint unit = inst->Src[1].Register.Index;
9182 
9183    if (emit->key.tex[unit].target == PIPE_BUFFER) {
9184       /* RESINFO does not support querying texture buffers, so we instead
9185        * store texture buffer sizes in shader constants, then copy them to
9186        * implement TXQ instead of emitting RESINFO.
9187        * MOV dst, const[texture_buffer_size_index[unit]]
9188        */
9189       struct tgsi_full_src_register size_src =
9190          make_src_const_reg(emit->texture_buffer_size_index[unit]);
9191       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src);
9192    } else {
9193       /* RESINFO dst, srcMipLevel, resource */
9194       begin_emit_instruction(emit);
9195       emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
9196       emit_dst_register(emit, &inst->Dst[0]);
9197       emit_src_register(emit, &inst->Src[0]);
9198       emit_resource_register(emit, unit);
9199       end_emit_instruction(emit);
9200    }
9201 
9202    free_temp_indexes(emit);
9203 
9204    return TRUE;
9205 }
9206 
9207 
9208 /**
9209  * Does this opcode produce a double-precision result?
9210  * XXX perhaps move this to a TGSI utility.
9211  */
9212 static bool
opcode_has_dbl_dst(unsigned opcode)9213 opcode_has_dbl_dst(unsigned opcode)
9214 {
9215    switch (opcode) {
9216    case TGSI_OPCODE_F2D:
9217    case TGSI_OPCODE_DABS:
9218    case TGSI_OPCODE_DADD:
9219    case TGSI_OPCODE_DFRAC:
9220    case TGSI_OPCODE_DMAX:
9221    case TGSI_OPCODE_DMIN:
9222    case TGSI_OPCODE_DMUL:
9223    case TGSI_OPCODE_DNEG:
9224    case TGSI_OPCODE_I2D:
9225    case TGSI_OPCODE_U2D:
9226    case TGSI_OPCODE_DFMA:
9227       // XXX more TBD
9228       return true;
9229    default:
9230       return false;
9231    }
9232 }
9233 
9234 
9235 /**
9236  * Does this opcode use double-precision source registers?
9237  */
9238 static bool
opcode_has_dbl_src(unsigned opcode)9239 opcode_has_dbl_src(unsigned opcode)
9240 {
9241    switch (opcode) {
9242    case TGSI_OPCODE_D2F:
9243    case TGSI_OPCODE_DABS:
9244    case TGSI_OPCODE_DADD:
9245    case TGSI_OPCODE_DFRAC:
9246    case TGSI_OPCODE_DMAX:
9247    case TGSI_OPCODE_DMIN:
9248    case TGSI_OPCODE_DMUL:
9249    case TGSI_OPCODE_DNEG:
9250    case TGSI_OPCODE_D2I:
9251    case TGSI_OPCODE_D2U:
9252    case TGSI_OPCODE_DFMA:
9253    case TGSI_OPCODE_DSLT:
9254    case TGSI_OPCODE_DSGE:
9255    case TGSI_OPCODE_DSEQ:
9256    case TGSI_OPCODE_DSNE:
9257    case TGSI_OPCODE_DRCP:
9258    case TGSI_OPCODE_DSQRT:
9259    case TGSI_OPCODE_DMAD:
9260    case TGSI_OPCODE_DLDEXP:
9261    case TGSI_OPCODE_DFRACEXP:
9262    case TGSI_OPCODE_DRSQ:
9263    case TGSI_OPCODE_DTRUNC:
9264    case TGSI_OPCODE_DCEIL:
9265    case TGSI_OPCODE_DFLR:
9266    case TGSI_OPCODE_DROUND:
9267    case TGSI_OPCODE_DSSG:
9268       return true;
9269    default:
9270       return false;
9271    }
9272 }
9273 
9274 
9275 /**
9276  * Check that the swizzle for reading from a double-precision register
9277  * is valid. If not valid, move the source to a temporary register first.
9278  */
9279 static struct tgsi_full_src_register
check_double_src(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * reg)9280 check_double_src(struct svga_shader_emitter_v10 *emit,
9281                  const struct tgsi_full_src_register *reg)
9282 {
9283    struct tgsi_full_src_register src;
9284 
9285    if (((reg->Register.SwizzleX == PIPE_SWIZZLE_X &&
9286          reg->Register.SwizzleY == PIPE_SWIZZLE_Y) ||
9287         (reg->Register.SwizzleX == PIPE_SWIZZLE_Z &&
9288          reg->Register.SwizzleY == PIPE_SWIZZLE_W)) &&
9289        ((reg->Register.SwizzleZ == PIPE_SWIZZLE_X &&
9290          reg->Register.SwizzleW == PIPE_SWIZZLE_Y) ||
9291         (reg->Register.SwizzleZ == PIPE_SWIZZLE_Z &&
9292          reg->Register.SwizzleW == PIPE_SWIZZLE_W))) {
9293       src = *reg;
9294    } else {
9295       /* move the src to a temporary to fix the swizzle */
9296       unsigned tmp = get_temp_index(emit);
9297       struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9298       struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9299       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp_dst, reg);
9300       src = tmp_src;
9301 
9302       /* The temporary index will be released in the caller */
9303    }
9304    return src;
9305 }
9306 
9307 /**
9308  * Check that the writemask for a double-precision instruction is valid.
9309  */
9310 static void
check_double_dst_writemask(const struct tgsi_full_instruction * inst)9311 check_double_dst_writemask(const struct tgsi_full_instruction *inst)
9312 {
9313    ASSERTED unsigned writemask = inst->Dst[0].Register.WriteMask;
9314 
9315    switch (inst->Instruction.Opcode) {
9316    case TGSI_OPCODE_DABS:
9317    case TGSI_OPCODE_DADD:
9318    case TGSI_OPCODE_DFRAC:
9319    case TGSI_OPCODE_DNEG:
9320    case TGSI_OPCODE_DMAD:
9321    case TGSI_OPCODE_DMAX:
9322    case TGSI_OPCODE_DMIN:
9323    case TGSI_OPCODE_DMUL:
9324    case TGSI_OPCODE_DRCP:
9325    case TGSI_OPCODE_DSQRT:
9326    case TGSI_OPCODE_F2D:
9327    case TGSI_OPCODE_DFMA:
9328       assert(writemask == TGSI_WRITEMASK_XYZW ||
9329              writemask == TGSI_WRITEMASK_XY ||
9330              writemask == TGSI_WRITEMASK_ZW);
9331       break;
9332    case TGSI_OPCODE_DSEQ:
9333    case TGSI_OPCODE_DSGE:
9334    case TGSI_OPCODE_DSNE:
9335    case TGSI_OPCODE_DSLT:
9336    case TGSI_OPCODE_D2I:
9337    case TGSI_OPCODE_D2U:
9338       /* Write to 1 or 2 components only */
9339       assert(util_bitcount(writemask) <= 2);
9340       break;
9341    default:
9342       /* XXX this list may be incomplete */
9343       ;
9344    }
9345 }
9346 
9347 
9348 /**
9349  * Double-precision absolute value.
9350  */
9351 static boolean
emit_dabs(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9352 emit_dabs(struct svga_shader_emitter_v10 *emit,
9353           const struct tgsi_full_instruction *inst)
9354 {
9355    assert(emit->version >= 50);
9356 
9357    struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
9358    check_double_dst_writemask(inst);
9359 
9360    struct tgsi_full_src_register abs_src = absolute_src(&src);
9361 
9362    /* DMOV dst, |src| */
9363    emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &abs_src);
9364 
9365    free_temp_indexes(emit);
9366    return TRUE;
9367 }
9368 
9369 
9370 /**
9371  * Double-precision negation
9372  */
9373 static boolean
emit_dneg(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9374 emit_dneg(struct svga_shader_emitter_v10 *emit,
9375           const struct tgsi_full_instruction *inst)
9376 {
9377    assert(emit->version >= 50);
9378    struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
9379    check_double_dst_writemask(inst);
9380 
9381    struct tgsi_full_src_register neg_src = negate_src(&src);
9382 
9383    /* DMOV dst, -src */
9384    emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &neg_src);
9385 
9386    free_temp_indexes(emit);
9387    return TRUE;
9388 }
9389 
9390 
9391 /**
9392  * SM5 has no DMAD opcode.  Implement negation with DMUL/DADD.
9393  */
9394 static boolean
emit_dmad(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9395 emit_dmad(struct svga_shader_emitter_v10 *emit,
9396           const struct tgsi_full_instruction *inst)
9397 {
9398    assert(emit->version >= 50);
9399    struct tgsi_full_src_register src0 = check_double_src(emit, &inst->Src[0]);
9400    struct tgsi_full_src_register src1 = check_double_src(emit, &inst->Src[1]);
9401    struct tgsi_full_src_register src2 = check_double_src(emit, &inst->Src[2]);
9402    check_double_dst_writemask(inst);
9403 
9404    unsigned tmp = get_temp_index(emit);
9405    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9406    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9407 
9408    /* DMUL tmp, src[0], src[1] */
9409    emit_instruction_opn(emit, VGPU10_OPCODE_DMUL,
9410                         &tmp_dst, &src0, &src1, NULL,
9411                         FALSE, inst->Instruction.Precise);
9412 
9413    /* DADD dst, tmp, src[2] */
9414    emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
9415                         &inst->Dst[0], &tmp_src, &src2, NULL,
9416                         inst->Instruction.Saturate, inst->Instruction.Precise);
9417    free_temp_indexes(emit);
9418 
9419    return TRUE;
9420 }
9421 
9422 
9423 /**
9424  * Double precision reciprocal square root
9425  */
9426 static boolean
emit_drsq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)9427 emit_drsq(struct svga_shader_emitter_v10 *emit,
9428           const struct tgsi_full_dst_register *dst,
9429           const struct tgsi_full_src_register *src)
9430 {
9431    assert(emit->version >= 50);
9432 
9433    VGPU10OpcodeToken0 token0;
9434    struct tgsi_full_src_register dsrc = check_double_src(emit, src);
9435 
9436    begin_emit_instruction(emit);
9437 
9438    token0.value = 0;
9439    token0.opcodeType = VGPU10_OPCODE_VMWARE;
9440    token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DRSQ;
9441    emit_dword(emit, token0.value);
9442    emit_dst_register(emit, dst);
9443    emit_src_register(emit, &dsrc);
9444    end_emit_instruction(emit);
9445 
9446    free_temp_indexes(emit);
9447 
9448    return TRUE;
9449 }
9450 
9451 
9452 /**
9453  * There is no SM5 opcode for double precision square root.
9454  * It will be implemented with DRSQ.
9455  * dst = src * DRSQ(src)
9456  */
9457 static boolean
emit_dsqrt(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9458 emit_dsqrt(struct svga_shader_emitter_v10 *emit,
9459           const struct tgsi_full_instruction *inst)
9460 {
9461    assert(emit->version >= 50);
9462 
9463    struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
9464 
9465    /* temporary register to hold the source */
9466    unsigned tmp = get_temp_index(emit);
9467    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9468    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9469 
9470    /* temporary register to hold the DEQ result */
9471    unsigned tmp_cond = get_temp_index(emit);
9472    struct tgsi_full_dst_register tmp_cond_dst = make_dst_temp_reg(tmp_cond);
9473    struct tgsi_full_dst_register tmp_cond_dst_xy =
9474       writemask_dst(&tmp_cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
9475    struct tgsi_full_src_register tmp_cond_src = make_src_temp_reg(tmp_cond);
9476    struct tgsi_full_src_register tmp_cond_src_xy =
9477          swizzle_src(&tmp_cond_src,
9478                      PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
9479                      PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y);
9480 
9481    /* The reciprocal square root of zero yields INF.
9482     * So if the source is 0, we replace it with 1 in the tmp register.
9483     * The later multiplication of zero in the original source will yield 0
9484     * in the result.
9485     */
9486 
9487    /* tmp1 = (src == 0) ? 1 : src;
9488     *   EQ tmp1, 0, src
9489     *   MOVC tmp, tmp1, 1.0, src
9490     */
9491    struct tgsi_full_src_register zero =
9492                make_immediate_reg_double(emit, 0);
9493 
9494    struct tgsi_full_src_register one =
9495                make_immediate_reg_double(emit, 1.0);
9496 
9497    emit_instruction_op2(emit, VGPU10_OPCODE_DEQ, &tmp_cond_dst_xy,
9498                         &zero, &src);
9499    emit_instruction_op3(emit, VGPU10_OPCODE_DMOVC, &tmp_dst,
9500                         &tmp_cond_src_xy, &one, &src);
9501 
9502    struct tgsi_full_dst_register tmp_rsq_dst = make_dst_temp_reg(tmp);
9503    struct tgsi_full_src_register tmp_rsq_src = make_src_temp_reg(tmp);
9504 
9505    /* DRSQ tmp_rsq, tmp */
9506    emit_drsq(emit, &tmp_rsq_dst, &tmp_src);
9507 
9508    /* DMUL dst, tmp_rsq, src[0] */
9509    emit_instruction_op2(emit, VGPU10_OPCODE_DMUL, &inst->Dst[0],
9510                         &tmp_rsq_src, &src);
9511 
9512    free_temp_indexes(emit);
9513 
9514    return TRUE;
9515 }
9516 
9517 
9518 /**
9519  * glsl-nir path does not lower DTRUNC, so we need to
9520  * add the translation here.
9521  *
9522  * frac = DFRAC(src)
9523  * tmp = src - frac
9524  * dst = src >= 0 ? tmp : (tmp + (frac==0 ? 0 : 1))
9525  */
9526 static boolean
emit_dtrunc(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9527 emit_dtrunc(struct svga_shader_emitter_v10 *emit,
9528             const struct tgsi_full_instruction *inst)
9529 {
9530    assert(emit->version >= 50);
9531 
9532    struct tgsi_full_src_register src = check_double_src(emit, &inst->Src[0]);
9533 
9534    /* frac = DFRAC(src) */
9535    unsigned frac_index = get_temp_index(emit);
9536    struct tgsi_full_dst_register frac_dst = make_dst_temp_reg(frac_index);
9537    struct tgsi_full_src_register frac_src = make_src_temp_reg(frac_index);
9538 
9539    VGPU10OpcodeToken0 token0;
9540    begin_emit_instruction(emit);
9541    token0.value = 0;
9542    token0.opcodeType = VGPU10_OPCODE_VMWARE;
9543    token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DFRC;
9544    emit_dword(emit, token0.value);
9545    emit_dst_register(emit, &frac_dst);
9546    emit_src_register(emit, &src);
9547    end_emit_instruction(emit);
9548 
9549    /* tmp = src - frac */
9550    unsigned tmp_index = get_temp_index(emit);
9551    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp_index);
9552    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index);
9553    struct tgsi_full_src_register negate_frac_src = negate_src(&frac_src);
9554    emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
9555                         &tmp_dst, &src, &negate_frac_src, NULL,
9556                         inst->Instruction.Saturate, inst->Instruction.Precise);
9557 
9558    /* cond = frac==0 */
9559    unsigned cond_index = get_temp_index(emit);
9560    struct tgsi_full_dst_register cond_dst = make_dst_temp_reg(cond_index);
9561    struct tgsi_full_src_register cond_src = make_src_temp_reg(cond_index);
9562    struct tgsi_full_src_register zero =
9563                make_immediate_reg_double(emit, 0);
9564 
9565    /* Only use one or two components for double opcode */
9566    cond_dst = writemask_dst(&cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
9567 
9568    emit_instruction_opn(emit, VGPU10_OPCODE_DEQ,
9569                         &cond_dst, &frac_src, &zero, NULL,
9570                         inst->Instruction.Saturate, inst->Instruction.Precise);
9571 
9572    /* tmp2 = cond ? 0 : 1 */
9573    unsigned tmp2_index = get_temp_index(emit);
9574    struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2_index);
9575    struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2_index);
9576    struct tgsi_full_src_register cond_src_xy =
9577       swizzle_src(&cond_src, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
9578 		             PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y);
9579    struct tgsi_full_src_register one =
9580                make_immediate_reg_double(emit, 1.0);
9581 
9582    emit_instruction_opn(emit, VGPU10_OPCODE_DMOVC,
9583                         &tmp2_dst, &cond_src_xy, &zero, &one,
9584                         inst->Instruction.Saturate, inst->Instruction.Precise);
9585 
9586    /* tmp2 = tmp + tmp2 */
9587    emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
9588                         &tmp2_dst, &tmp_src, &tmp2_src, NULL,
9589                         inst->Instruction.Saturate, inst->Instruction.Precise);
9590 
9591    /* cond = src>=0 */
9592    emit_instruction_opn(emit, VGPU10_OPCODE_DGE,
9593                         &cond_dst, &src, &zero, NULL,
9594                         inst->Instruction.Saturate, inst->Instruction.Precise);
9595 
9596    /* dst = cond ? tmp : tmp2 */
9597    emit_instruction_opn(emit, VGPU10_OPCODE_DMOVC,
9598                         &inst->Dst[0], &cond_src_xy, &tmp_src, &tmp2_src,
9599                         inst->Instruction.Saturate, inst->Instruction.Precise);
9600 
9601    free_temp_indexes(emit);
9602    return TRUE;
9603 }
9604 
9605 
9606 static boolean
emit_interp_offset(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9607 emit_interp_offset(struct svga_shader_emitter_v10 *emit,
9608                    const struct tgsi_full_instruction *inst)
9609 {
9610    assert(emit->version >= 50);
9611 
9612    /* The src1.xy offset is a float with values in the range [-0.5, 0.5]
9613     * where (0,0) is the center of the pixel.  We need to translate that
9614     * into an integer offset on a 16x16 grid in the range [-8/16, 7/16].
9615     * Also need to flip the Y axis (I think).
9616     */
9617    unsigned tmp = get_temp_index(emit);
9618    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9619    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9620    struct tgsi_full_dst_register tmp_dst_xy =
9621       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
9622    struct tgsi_full_src_register const16 =
9623       make_immediate_reg_float4(emit, 16.0f, -16.0, 0, 0);
9624 
9625    /* MUL tmp.xy, src1, {16, -16, 0, 0} */
9626    emit_instruction_op2(emit, VGPU10_OPCODE_MUL,
9627                         &tmp_dst_xy, &inst->Src[1], &const16);
9628 
9629    /* FTOI tmp.xy, tmp */
9630    emit_instruction_op1(emit, VGPU10_OPCODE_FTOI, &tmp_dst_xy, &tmp_src);
9631 
9632    /* EVAL_SNAPPED dst, src0, tmp */
9633    emit_instruction_op2(emit, VGPU10_OPCODE_EVAL_SNAPPED,
9634                         &inst->Dst[0], &inst->Src[0], &tmp_src);
9635 
9636    free_temp_indexes(emit);
9637 
9638    return TRUE;
9639 }
9640 
9641 
9642 /**
9643  * Emit a simple instruction (like ADD, MUL, MIN, etc).
9644  */
9645 static boolean
emit_simple(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9646 emit_simple(struct svga_shader_emitter_v10 *emit,
9647             const struct tgsi_full_instruction *inst)
9648 {
9649    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9650    const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
9651    const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
9652    const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
9653    unsigned i;
9654 
9655    struct tgsi_full_src_register src[3];
9656 
9657    if (inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) {
9658       emit->current_loop_depth++;
9659    }
9660    else if (inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP) {
9661       emit->current_loop_depth--;
9662    }
9663 
9664    for (i = 0; i < op->num_src; i++) {
9665       if (dbl_src)
9666          src[i] = check_double_src(emit, &inst->Src[i]);
9667       else
9668          src[i] = inst->Src[i];
9669    }
9670 
9671    begin_emit_instruction(emit);
9672    emit_opcode_precise(emit, translate_opcode(inst->Instruction.Opcode),
9673                        inst->Instruction.Saturate,
9674                        inst->Instruction.Precise);
9675    for (i = 0; i < op->num_dst; i++) {
9676       if (dbl_dst) {
9677          check_double_dst_writemask(inst);
9678       }
9679       emit_dst_register(emit, &inst->Dst[i]);
9680    }
9681    for (i = 0; i < op->num_src; i++) {
9682       emit_src_register(emit, &src[i]);
9683    }
9684    end_emit_instruction(emit);
9685 
9686    free_temp_indexes(emit);
9687    return TRUE;
9688 }
9689 
9690 
9691 /**
9692  * Emit MSB instruction (like IMSB, UMSB).
9693  *
9694  * GLSL returns the index starting from the LSB;
9695  * whereas in SM5, firstbit_hi/shi returns the index starting from the MSB.
9696  * To get correct location as per glsl from SM5 device, we should
9697  * return (31 - index) if returned index is not -1.
9698  */
9699 static boolean
emit_msb(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9700 emit_msb(struct svga_shader_emitter_v10 *emit,
9701          const struct tgsi_full_instruction *inst)
9702 {
9703    const struct tgsi_full_dst_register *index_dst = &inst->Dst[0];
9704 
9705    assert(index_dst->Register.File != TGSI_FILE_OUTPUT);
9706 
9707    struct tgsi_full_src_register index_src =
9708       make_src_reg(index_dst->Register.File, index_dst->Register.Index);
9709    struct tgsi_full_src_register imm31 =
9710       make_immediate_reg_int(emit, 31);
9711    imm31 = scalar_src(&imm31, TGSI_SWIZZLE_X);
9712    struct tgsi_full_src_register neg_one =
9713       make_immediate_reg_int(emit, -1);
9714    neg_one = scalar_src(&neg_one, TGSI_SWIZZLE_X);
9715    unsigned tmp = get_temp_index(emit);
9716    const struct tgsi_full_dst_register tmp_dst =
9717       make_dst_temp_reg(tmp);
9718    const struct tgsi_full_dst_register tmp_dst_x =
9719       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
9720    const struct tgsi_full_src_register tmp_src_x =
9721        make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp, TGSI_SWIZZLE_X);
9722    int writemask = TGSI_WRITEMASK_X;
9723    int src_swizzle = TGSI_SWIZZLE_X;
9724    int dst_writemask = index_dst->Register.WriteMask;
9725 
9726    emit_simple(emit, inst);
9727 
9728    /* index conversion from SM5 to GLSL */
9729    while (writemask & dst_writemask) {
9730       struct tgsi_full_src_register index_src_comp =
9731          scalar_src(&index_src, src_swizzle);
9732       struct tgsi_full_dst_register index_dst_comp =
9733          writemask_dst(index_dst, writemask);
9734 
9735       /* check if index_src_comp != -1 */
9736       emit_instruction_op2(emit, VGPU10_OPCODE_INE,
9737                            &tmp_dst_x, &index_src_comp, &neg_one);
9738 
9739       /* if */
9740       emit_if(emit, &tmp_src_x);
9741 
9742       index_src_comp = negate_src(&index_src_comp);
9743       /* SUB DST, IMM{31}, DST */
9744       emit_instruction_op2(emit, VGPU10_OPCODE_IADD,
9745                            &index_dst_comp, &imm31, &index_src_comp);
9746 
9747       /* endif */
9748       emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9749 
9750       writemask = writemask << 1;
9751       src_swizzle = src_swizzle + 1;
9752    }
9753    free_temp_indexes(emit);
9754    return TRUE;
9755 }
9756 
9757 
9758 /**
9759  * Emit a BFE instruction (like UBFE, IBFE).
9760  * tgsi representation:
9761  * U/IBFE dst, value, offset, width
9762  * SM5 representation:
9763  * U/IBFE dst, width, offset, value
9764  * Note: SM5 has width & offset range (0-31);
9765  *      whereas GLSL has width & offset range (0-32)
9766  */
9767 static boolean
emit_bfe(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9768 emit_bfe(struct svga_shader_emitter_v10 *emit,
9769          const struct tgsi_full_instruction *inst)
9770 {
9771    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9772    struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32);
9773    imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X);
9774    struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
9775    zero = scalar_src(&zero, TGSI_SWIZZLE_X);
9776 
9777    unsigned tmp1 = get_temp_index(emit);
9778    const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1);
9779    const struct tgsi_full_dst_register cond1_dst_x =
9780       writemask_dst(&cond1_dst, TGSI_WRITEMASK_X);
9781    const struct tgsi_full_src_register cond1_src_x =
9782       make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X);
9783 
9784    unsigned tmp2 = get_temp_index(emit);
9785    const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2);
9786    const struct tgsi_full_dst_register cond2_dst_x =
9787       writemask_dst(&cond2_dst, TGSI_WRITEMASK_X);
9788    const struct tgsi_full_src_register cond2_src_x =
9789       make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X);
9790 
9791    /**
9792     * In SM5, when width = 32  and offset = 0, it returns 0.
9793     * On the other hand GLSL, expects value to be copied as it is, to dst.
9794     */
9795 
9796    /* cond1 = width ! = 32 */
9797    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9798                         &cond1_dst_x, &inst->Src[2], &imm32);
9799 
9800    /* cond2 = offset ! = 0 */
9801    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9802                         &cond2_dst_x, &inst->Src[1], &zero);
9803 
9804    /* cond 2 = cond1 & cond 2 */
9805    emit_instruction_op2(emit, VGPU10_OPCODE_AND, &cond2_dst_x,
9806                         &cond2_src_x,
9807                         &cond1_src_x);
9808    /* IF */
9809    emit_if(emit, &cond2_src_x);
9810 
9811    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
9812                         &inst->Src[0]);
9813 
9814    /* ELSE */
9815    emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
9816 
9817    /* U/IBFE dst, width, offset, value */
9818    emit_instruction_op3(emit, translate_opcode(opcode), &inst->Dst[0],
9819                         &inst->Src[2], &inst->Src[1], &inst->Src[0]);
9820 
9821    /* ENDIF */
9822    emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9823 
9824    free_temp_indexes(emit);
9825    return TRUE;
9826 }
9827 
9828 
9829 /**
9830  * Emit BFI  instruction
9831  * tgsi representation:
9832  * BFI dst, base, insert, offset, width
9833  * SM5 representation:
9834  * BFI dst, width, offset, insert, base
9835  * Note: SM5 has width & offset range (0-31);
9836  *      whereas GLSL has width & offset range (0-32)
9837  */
9838 static boolean
emit_bfi(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9839 emit_bfi(struct svga_shader_emitter_v10 *emit,
9840          const struct tgsi_full_instruction *inst)
9841 {
9842    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9843    struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32);
9844    imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X);
9845 
9846    struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
9847    zero = scalar_src(&zero, TGSI_SWIZZLE_X);
9848 
9849    unsigned tmp1 = get_temp_index(emit);
9850    const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1);
9851    const struct tgsi_full_dst_register cond1_dst_x =
9852       writemask_dst(&cond1_dst, TGSI_WRITEMASK_X);
9853    const struct tgsi_full_src_register cond1_src_x =
9854       make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X);
9855 
9856    unsigned tmp2 = get_temp_index(emit);
9857    const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2);
9858    const struct tgsi_full_dst_register cond2_dst_x =
9859       writemask_dst(&cond2_dst, TGSI_WRITEMASK_X);
9860    const struct tgsi_full_src_register cond2_src_x =
9861       make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X);
9862 
9863    /**
9864     * In SM5, when width = 32  and offset = 0, it returns 0.
9865     * On the other hand GLSL, expects insert to be copied as it is, to dst.
9866     */
9867 
9868    /* cond1 = width == 32 */
9869    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9870                         &cond1_dst_x, &inst->Src[3], &imm32);
9871 
9872    /* cond1 = offset == 0 */
9873    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9874                         &cond2_dst_x, &inst->Src[2], &zero);
9875 
9876    /* cond2 = cond1 & cond2 */
9877    emit_instruction_op2(emit, VGPU10_OPCODE_AND,
9878                         &cond2_dst_x, &cond2_src_x, &cond1_src_x);
9879 
9880    /* if */
9881    emit_if(emit, &cond2_src_x);
9882 
9883    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
9884                         &inst->Src[1]);
9885 
9886    /* else */
9887    emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
9888 
9889    /* BFI dst, width, offset, insert, base */
9890    begin_emit_instruction(emit);
9891    emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
9892    emit_dst_register(emit, &inst->Dst[0]);
9893    emit_src_register(emit, &inst->Src[3]);
9894    emit_src_register(emit, &inst->Src[2]);
9895    emit_src_register(emit, &inst->Src[1]);
9896    emit_src_register(emit, &inst->Src[0]);
9897    end_emit_instruction(emit);
9898 
9899    /* endif */
9900    emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9901 
9902    free_temp_indexes(emit);
9903    return TRUE;
9904 }
9905 
9906 
9907 /**
9908  * We only special case the MOV instruction to try to detect constant
9909  * color writes in the fragment shader.
9910  */
9911 static boolean
emit_mov(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9912 emit_mov(struct svga_shader_emitter_v10 *emit,
9913          const struct tgsi_full_instruction *inst)
9914 {
9915    const struct tgsi_full_src_register *src = &inst->Src[0];
9916    const struct tgsi_full_dst_register *dst = &inst->Dst[0];
9917 
9918    if (emit->unit == PIPE_SHADER_FRAGMENT &&
9919        dst->Register.File == TGSI_FILE_OUTPUT &&
9920        dst->Register.Index == 0 &&
9921        src->Register.File == TGSI_FILE_CONSTANT &&
9922        !src->Register.Indirect) {
9923       emit->constant_color_output = TRUE;
9924    }
9925 
9926    return emit_simple(emit, inst);
9927 }
9928 
9929 
9930 /**
9931  * Emit a simple VGPU10 instruction which writes to multiple dest registers,
9932  * where TGSI only uses one dest register.
9933  */
9934 static boolean
emit_simple_1dst(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,unsigned dst_count,unsigned dst_index)9935 emit_simple_1dst(struct svga_shader_emitter_v10 *emit,
9936                  const struct tgsi_full_instruction *inst,
9937                  unsigned dst_count,
9938                  unsigned dst_index)
9939 {
9940    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9941    const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
9942    unsigned i;
9943 
9944    begin_emit_instruction(emit);
9945    emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
9946 
9947    for (i = 0; i < dst_count; i++) {
9948       if (i == dst_index) {
9949          emit_dst_register(emit, &inst->Dst[0]);
9950       } else {
9951          emit_null_dst_register(emit);
9952       }
9953    }
9954 
9955    for (i = 0; i < op->num_src; i++) {
9956       emit_src_register(emit, &inst->Src[i]);
9957    }
9958    end_emit_instruction(emit);
9959 
9960    return TRUE;
9961 }
9962 
9963 
9964 /**
9965  * Emit a vmware specific VGPU10 instruction.
9966  */
9967 static boolean
emit_vmware(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,VGPU10_VMWARE_OPCODE_TYPE subopcode)9968 emit_vmware(struct svga_shader_emitter_v10 *emit,
9969             const struct tgsi_full_instruction *inst,
9970             VGPU10_VMWARE_OPCODE_TYPE subopcode)
9971 {
9972    VGPU10OpcodeToken0 token0;
9973    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9974    const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
9975    const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
9976    const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
9977    unsigned i;
9978    struct tgsi_full_src_register src[3];
9979 
9980    for (i = 0; i < op->num_src; i++) {
9981       if (dbl_src)
9982          src[i] = check_double_src(emit, &inst->Src[i]);
9983       else
9984          src[i] = inst->Src[i];
9985    }
9986 
9987    begin_emit_instruction(emit);
9988 
9989    assert((subopcode > 0 && emit->version >= 50) || subopcode == 0);
9990 
9991    token0.value = 0;
9992    token0.opcodeType = VGPU10_OPCODE_VMWARE;
9993    token0.vmwareOpcodeType = subopcode;
9994    emit_dword(emit, token0.value);
9995 
9996    if (subopcode == VGPU10_VMWARE_OPCODE_IDIV) {
9997       /* IDIV only uses the first dest register. */
9998       emit_dst_register(emit, &inst->Dst[0]);
9999       emit_null_dst_register(emit);
10000    } else {
10001       for (i = 0; i < op->num_dst; i++) {
10002          if (dbl_dst) {
10003             check_double_dst_writemask(inst);
10004          }
10005          emit_dst_register(emit, &inst->Dst[i]);
10006       }
10007    }
10008 
10009    for (i = 0; i < op->num_src; i++) {
10010       emit_src_register(emit, &src[i]);
10011    }
10012    end_emit_instruction(emit);
10013 
10014    free_temp_indexes(emit);
10015    return TRUE;
10016 }
10017 
10018 /**
10019  * Emit a memory register
10020  */
10021 
10022 typedef enum {
10023    MEM_STORE = 0,
10024    MEM_LOAD = 1,
10025    MEM_ATOMIC_COUNTER
10026 } memory_op;
10027 
10028 static void
emit_memory_register(struct svga_shader_emitter_v10 * emit,memory_op mem_op,const struct tgsi_full_instruction * inst,unsigned regIndex,unsigned writemask)10029 emit_memory_register(struct svga_shader_emitter_v10 *emit,
10030                      memory_op mem_op,
10031                      const struct tgsi_full_instruction *inst,
10032                      unsigned regIndex, unsigned writemask)
10033 {
10034    VGPU10OperandToken0 operand0;
10035    unsigned resIndex = 0;
10036 
10037    operand0.value = 0;
10038    operand0.operandType = VGPU10_OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY;
10039    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
10040    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
10041 
10042    switch (mem_op) {
10043    case MEM_ATOMIC_COUNTER:
10044    {
10045       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
10046       resIndex = inst->Src[regIndex].Register.Index;
10047       break;
10048    }
10049    case MEM_STORE:
10050    {
10051       const struct tgsi_full_dst_register *reg = &inst->Dst[regIndex];
10052 
10053       operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
10054       operand0.mask = writemask;
10055       resIndex = reg->Register.Index;
10056       break;
10057    }
10058    case MEM_LOAD:
10059    {
10060       const struct tgsi_full_src_register *reg = &inst->Src[regIndex];
10061 
10062       operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
10063       operand0.swizzleX = reg->Register.SwizzleX;
10064       operand0.swizzleY = reg->Register.SwizzleY;
10065       operand0.swizzleZ = reg->Register.SwizzleZ;
10066       operand0.swizzleW = reg->Register.SwizzleW;
10067       resIndex = reg->Register.Index;
10068       break;
10069    }
10070    default:
10071       assert(!"Unexpected memory opcode");
10072       break;
10073    }
10074 
10075    emit_dword(emit, operand0.value);
10076    emit_dword(emit, resIndex);
10077 }
10078 
10079 
10080 typedef enum {
10081    UAV_STORE = 0,
10082    UAV_LOAD = 1,
10083    UAV_ATOMIC = 2,
10084    UAV_RESQ = 3,
10085 } UAV_OP;
10086 
10087 
10088 /**
10089  * Emit a uav register
10090  * \param uav_index     index of resource register
10091  * \param uav_op        UAV_STORE/ UAV_LOAD/ UAV_ATOMIC depending on opcode
10092  * \param resourceType  resource file type
10093  * \param writemask     resource writemask
10094  */
10095 
10096 static void
emit_uav_register(struct svga_shader_emitter_v10 * emit,unsigned res_index,UAV_OP uav_op,enum tgsi_file_type resourceType,unsigned writemask)10097 emit_uav_register(struct svga_shader_emitter_v10 *emit,
10098                   unsigned res_index, UAV_OP uav_op,
10099                   enum tgsi_file_type resourceType, unsigned writemask)
10100 {
10101    VGPU10OperandToken0 operand0;
10102    unsigned uav_index = INVALID_INDEX;
10103 
10104    operand0.value = 0;
10105    operand0.operandType = VGPU10_OPERAND_TYPE_UAV;
10106    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
10107    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
10108 
10109    switch (resourceType) {
10110    case TGSI_FILE_IMAGE:
10111       uav_index = emit->key.images[res_index].uav_index;
10112       break;
10113    case TGSI_FILE_BUFFER:
10114       uav_index = emit->key.shader_buf_uav_index[res_index];
10115       break;
10116    case TGSI_FILE_HW_ATOMIC:
10117       uav_index = emit->key.atomic_buf_uav_index[res_index];
10118       break;
10119    default:
10120       assert(0);
10121    }
10122 
10123    switch (uav_op) {
10124    case UAV_ATOMIC:
10125       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
10126       break;
10127 
10128    case UAV_STORE:
10129       operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
10130       operand0.mask = writemask;
10131       break;
10132 
10133    case UAV_LOAD:
10134    case UAV_RESQ:
10135       operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
10136       operand0.swizzleX = VGPU10_COMPONENT_X;
10137       operand0.swizzleY = VGPU10_COMPONENT_Y;
10138       operand0.swizzleZ = VGPU10_COMPONENT_Z;
10139       operand0.swizzleW = VGPU10_COMPONENT_W;
10140       break;
10141 
10142    default:
10143       break;
10144    }
10145 
10146    emit_dword(emit, operand0.value);
10147    emit_dword(emit, uav_index);
10148 }
10149 
10150 
10151 /**
10152  * A helper function to emit the uav address.
10153  * For memory, buffer, and image resource, it is set to the specified address.
10154  * For HW atomic counter, the address is the sum of the address offset and the
10155  * offset into the HW atomic buffer as specified by the register index.
10156  * It is also possible to specify the counter index as an indirect address.
10157  * And in this case, the uav address will be the sum of the address offset and the
10158  * counter index specified in the indirect address.
10159  */
10160 static
10161 struct tgsi_full_src_register
emit_uav_addr_offset(struct svga_shader_emitter_v10 * emit,enum tgsi_file_type resourceType,unsigned resourceIndex,unsigned resourceIndirect,unsigned resourceIndirectIndex,const struct tgsi_full_src_register * addr_reg)10162 emit_uav_addr_offset(struct svga_shader_emitter_v10 *emit,
10163                      enum tgsi_file_type resourceType,
10164                      unsigned resourceIndex,
10165                      unsigned resourceIndirect,
10166                      unsigned resourceIndirectIndex,
10167                      const struct tgsi_full_src_register *addr_reg)
10168 {
10169    unsigned addr_tmp;
10170    struct tgsi_full_dst_register addr_dst;
10171    struct tgsi_full_src_register addr_src;
10172    struct tgsi_full_src_register two = make_immediate_reg_int(emit, 2);
10173    struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
10174 
10175    addr_tmp = get_temp_index(emit);
10176    addr_dst = make_dst_temp_reg(addr_tmp);
10177    addr_src = make_src_temp_reg(addr_tmp);
10178 
10179    /* specified address offset */
10180    if (addr_reg)
10181       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst, addr_reg);
10182    else
10183       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst, &zero);
10184 
10185    /* For HW atomic counter, we need to find the index to the
10186     * HW atomic buffer.
10187     */
10188    if (resourceType == TGSI_FILE_HW_ATOMIC) {
10189       if (resourceIndirect) {
10190 
10191          /**
10192           * uav addr offset  = counter layout offset +
10193           *                    counter indirect index address + address offset
10194           */
10195 
10196          /* counter layout offset */
10197          struct tgsi_full_src_register layout_offset;
10198          layout_offset =
10199             make_immediate_reg_int(emit, resourceIndex);
10200 
10201          /* counter layout offset + address offset */
10202          emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &addr_dst,
10203                               &addr_src, &layout_offset);
10204 
10205          /* counter indirect index address */
10206          unsigned indirect_addr =
10207             emit->address_reg_index[resourceIndirectIndex];
10208 
10209          struct tgsi_full_src_register indirect_addr_src =
10210             make_src_temp_reg(indirect_addr);
10211 
10212          indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
10213 
10214          /* counter layout offset + address offset + counter indirect address */
10215          emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &addr_dst,
10216                               &addr_src, &indirect_addr_src);
10217 
10218       } else {
10219          struct tgsi_full_src_register index_src;
10220 
10221          index_src = make_immediate_reg_int(emit, resourceIndex);
10222 
10223          /* uav addr offset  = counter index address + address offset */
10224          emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &addr_dst,
10225                               &addr_src, &index_src);
10226       }
10227 
10228       /* HW atomic buffer is declared as raw buffer, so the buffer address is
10229        * the byte offset, so we need to multiple the counter addr offset by 4.
10230        */
10231       emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &addr_dst,
10232                            &addr_src, &two);
10233    }
10234    else if (resourceType == TGSI_FILE_IMAGE) {
10235       if ((emit->key.images[resourceIndex].resource_target == PIPE_TEXTURE_3D)
10236              && emit->key.images[resourceIndex].is_single_layer) {
10237 
10238          struct tgsi_full_dst_register addr_dst_z =
10239             writemask_dst(&addr_dst, TGSI_WRITEMASK_Z);
10240 
10241          /* For non-layered 3D texture image view, we have to make sure the z
10242           * component of the address offset is set to 0.
10243           */
10244          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &addr_dst_z,
10245                               &zero);
10246       }
10247    }
10248 
10249    return addr_src;
10250 }
10251 
10252 
10253 
10254 /**
10255  * A helper function to expand indirect indexing to uav resource
10256  * by looping through the resource array, compare the indirect index and
10257  * emit the instruction for each resource in the array.
10258  */
10259 static void
loop_instruction(unsigned index,unsigned count,struct tgsi_full_src_register * addr_index,void (* fb)(struct svga_shader_emitter_v10 *,const struct tgsi_full_instruction *,unsigned),struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10260 loop_instruction(unsigned index, unsigned count,
10261                  struct tgsi_full_src_register *addr_index,
10262                  void (*fb)(struct svga_shader_emitter_v10 *,
10263                             const struct tgsi_full_instruction *, unsigned),
10264                  struct svga_shader_emitter_v10 *emit,
10265                  const struct tgsi_full_instruction *inst)
10266 {
10267    if (count == 0)
10268       return;
10269 
10270    if (index > 0) {
10271       /* ELSE */
10272       emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
10273    }
10274 
10275    struct tgsi_full_src_register index_src =
10276                                     make_immediate_reg_int(emit, index);
10277 
10278    unsigned tmp_index = get_temp_index(emit);
10279    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index);
10280    struct tgsi_full_src_register tmp_src_x =
10281                 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
10282    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp_index);
10283 
10284    /* IEQ tmp, addr_tmp_index, index */
10285    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ, &tmp_dst,
10286                         addr_index, &index_src);
10287 
10288    /* IF tmp */
10289    emit_if(emit, &tmp_src_x);
10290 
10291    free_temp_indexes(emit);
10292 
10293    (*fb)(emit, inst, index);
10294 
10295    loop_instruction(index+1, count-1, addr_index, fb, emit, inst);
10296 
10297    /* ENDIF */
10298    emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
10299 }
10300 
10301 
10302 /**
10303  * A helper function to emit the load instruction.
10304  */
10305 static void
emit_load_instruction(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,unsigned resourceIndex)10306 emit_load_instruction(struct svga_shader_emitter_v10 *emit,
10307                       const struct tgsi_full_instruction *inst,
10308                       unsigned resourceIndex)
10309 {
10310    VGPU10OpcodeToken0 token0;
10311    struct tgsi_full_src_register addr_src;
10312    enum tgsi_file_type resourceType = inst->Src[0].Register.File;
10313 
10314    /* Resolve the resource address for this resource first */
10315    addr_src = emit_uav_addr_offset(emit, resourceType, resourceIndex,
10316                                    inst->Src[0].Register.Indirect,
10317                                    inst->Src[0].Indirect.Index,
10318                                    &inst->Src[1]);
10319 
10320    /* LOAD resource, address, src */
10321    begin_emit_instruction(emit);
10322 
10323    token0.value = 0;
10324 
10325    if (resourceType == TGSI_FILE_MEMORY ||
10326        resourceType == TGSI_FILE_BUFFER ||
10327        resourceType == TGSI_FILE_HW_ATOMIC) {
10328       token0.opcodeType = VGPU10_OPCODE_LD_RAW;
10329       addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X);
10330    }
10331    else {
10332       token0.opcodeType = VGPU10_OPCODE_LD_UAV_TYPED;
10333    }
10334 
10335    token0.saturate = inst->Instruction.Saturate,
10336    emit_dword(emit, token0.value);
10337 
10338    emit_dst_register(emit, &inst->Dst[0]);
10339    emit_src_register(emit, &addr_src);
10340 
10341    if (resourceType == TGSI_FILE_MEMORY) {
10342       emit_memory_register(emit, MEM_LOAD, inst, 0, 0);
10343    } else if (resourceType == TGSI_FILE_HW_ATOMIC) {
10344       emit_uav_register(emit, inst->Src[0].Dimension.Index,
10345                         UAV_LOAD, inst->Src[0].Register.File, 0);
10346    } else {
10347       emit_uav_register(emit, resourceIndex,
10348                         UAV_LOAD, inst->Src[0].Register.File, 0);
10349    }
10350 
10351    end_emit_instruction(emit);
10352 
10353    free_temp_indexes(emit);
10354 }
10355 
10356 
10357 /**
10358  * Emit uav / memory load instruction
10359  */
10360 static boolean
emit_load(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10361 emit_load(struct svga_shader_emitter_v10 *emit,
10362            const struct tgsi_full_instruction *inst)
10363 {
10364    enum tgsi_file_type resourceType = inst->Src[0].Register.File;
10365    unsigned resourceIndex = inst->Src[0].Register.Index;
10366 
10367    /* If the resource register has indirect index, we will need
10368     * to expand it since SM5 device does not support indirect indexing
10369     * for uav.
10370     */
10371    if (inst->Src[0].Register.Indirect &&
10372        (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) {
10373 
10374       unsigned indirect_index = inst->Src[0].Indirect.Index;
10375       unsigned num_resources =
10376          resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs :
10377                                             emit->num_images;
10378 
10379       /* indirect index tmp register */
10380       unsigned indirect_addr = emit->address_reg_index[indirect_index];
10381       struct tgsi_full_src_register indirect_addr_src =
10382          make_src_temp_reg(indirect_addr);
10383       indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
10384 
10385       /* Add offset to the indirect index */
10386       if (inst->Src[0].Register.Index != 0) {
10387          struct tgsi_full_src_register offset =
10388             make_immediate_reg_int(emit, inst->Src[0].Register.Index);
10389          struct tgsi_full_dst_register indirect_addr_dst =
10390             make_dst_temp_reg(indirect_addr);
10391          emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &indirect_addr_dst,
10392                               &indirect_addr_src, &offset);
10393       }
10394 
10395       /* Loop through the resource array to find which resource to use.
10396        */
10397       loop_instruction(0, num_resources, &indirect_addr_src,
10398                        emit_load_instruction, emit, inst);
10399    }
10400    else {
10401       emit_load_instruction(emit, inst, resourceIndex);
10402    }
10403 
10404    free_temp_indexes(emit);
10405 
10406    return TRUE;
10407 }
10408 
10409 
10410 /**
10411  * A helper function to emit a store instruction.
10412  */
10413 static void
emit_store_instruction(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,unsigned resourceIndex)10414 emit_store_instruction(struct svga_shader_emitter_v10 *emit,
10415                        const struct tgsi_full_instruction *inst,
10416                        unsigned resourceIndex)
10417 {
10418    VGPU10OpcodeToken0 token0;
10419    enum tgsi_file_type resourceType = inst->Dst[0].Register.File;
10420    unsigned writemask = inst->Dst[0].Register.WriteMask;
10421    struct tgsi_full_src_register addr_src;
10422 
10423    unsigned tmp_index = get_temp_index(emit);
10424    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index);
10425    struct tgsi_full_dst_register tmp_dst_xyzw = make_dst_temp_reg(tmp_index);
10426    struct tgsi_full_dst_register tmp_dst;
10427 
10428    struct tgsi_full_src_register src = inst->Src[1];
10429    struct tgsi_full_src_register four = make_immediate_reg_int(emit, 4);
10430 
10431    boolean needLoad = FALSE;
10432    boolean needPerComponentStore = FALSE;
10433    unsigned swizzles = 0;
10434 
10435    /* Resolve the resource address for this resource first */
10436    addr_src = emit_uav_addr_offset(emit, resourceType,
10437                                    inst->Dst[0].Register.Index,
10438                                    inst->Dst[0].Register.Indirect,
10439                                    inst->Dst[0].Indirect.Index,
10440                                    &inst->Src[0]);
10441 
10442    /* First check the writemask to see if it can be supported
10443     * by the store instruction.
10444     * store_raw only allows .x, .xy, .xyz, .xyzw. For the typeless memory,
10445     * we can adjust the address offset, and do a per-component store.
10446     * store_uav_typed only allows .xyzw. In this case, we need to
10447     * do a load first, update the temporary and then issue the
10448     * store. This does have a small risk that if different threads
10449     * update different components of the same address, data might not be
10450     * in sync.
10451     */
10452    if (resourceType == TGSI_FILE_IMAGE) {
10453       needLoad = (writemask == TGSI_WRITEMASK_XYZW) ? FALSE : TRUE;
10454    }
10455    else if (resourceType == TGSI_FILE_BUFFER ||
10456             resourceType == TGSI_FILE_MEMORY) {
10457       if (!(writemask == TGSI_WRITEMASK_X || writemask == TGSI_WRITEMASK_XY ||
10458             writemask == TGSI_WRITEMASK_XYZ ||
10459             writemask == TGSI_WRITEMASK_XYZW)) {
10460          needPerComponentStore = TRUE;
10461       }
10462    }
10463 
10464    if (needLoad) {
10465       assert(resourceType == TGSI_FILE_IMAGE);
10466 
10467       /* LOAD resource, address, src */
10468       begin_emit_instruction(emit);
10469 
10470       token0.value = 0;
10471       token0.opcodeType = VGPU10_OPCODE_LD_UAV_TYPED;
10472       token0.saturate = inst->Instruction.Saturate,
10473       emit_dword(emit, token0.value);
10474 
10475       emit_dst_register(emit, &tmp_dst_xyzw);
10476       emit_src_register(emit, &addr_src);
10477       emit_uav_register(emit, resourceIndex, UAV_LOAD, resourceType, 0);
10478 
10479       end_emit_instruction(emit);
10480 
10481       /* MOV tmp(writemask) src */
10482       tmp_dst = writemask_dst(&tmp_dst_xyzw, writemask);
10483       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp_dst, &inst->Src[1]);
10484 
10485       /* Now set the writemask to xyzw for the store_uav_typed instruction */
10486       writemask = TGSI_WRITEMASK_XYZW;
10487    }
10488    else if (needPerComponentStore) {
10489       /* Save the src swizzles */
10490       swizzles = src.Register.SwizzleX |
10491                  src.Register.SwizzleY << 2 |
10492                  src.Register.SwizzleZ << 4 |
10493                  src.Register.SwizzleW << 6;
10494    }
10495 
10496    boolean storeDone = FALSE;
10497    unsigned perComponentWritemask = writemask;
10498    unsigned shift = 0;
10499    struct tgsi_full_src_register shift_src;
10500 
10501    while (!storeDone) {
10502 
10503       if (needPerComponentStore) {
10504          assert(perComponentWritemask);
10505          while (!(perComponentWritemask & TGSI_WRITEMASK_X)) {
10506             shift++;
10507             perComponentWritemask >>= 1;
10508          }
10509 
10510          /* First adjust the addr_src to the next component */
10511          if (shift != 0) {
10512             struct tgsi_full_dst_register addr_dst =
10513                make_dst_temp_reg(addr_src.Register.Index);
10514             shift_src = make_immediate_reg_int(emit, shift);
10515             emit_instruction_op3(emit, VGPU10_OPCODE_UMAD, &addr_dst, &four,
10516                                  &shift_src, &addr_src);
10517 
10518             /* Adjust the src swizzle as well */
10519             swizzles >>= (shift * 2);
10520          }
10521 
10522          /* Now the address offset is set to the next component,
10523           * we can set the writemask to .x and make sure to set
10524           * the src swizzle as well.
10525           */
10526          src.Register.SwizzleX = swizzles & 0x3;
10527          writemask = TGSI_WRITEMASK_X;
10528 
10529          /* Shift for the next component check */
10530          perComponentWritemask >>= 1;
10531          shift = 1;
10532       }
10533 
10534       /* STORE resource, address, src */
10535       begin_emit_instruction(emit);
10536 
10537       token0.value = 0;
10538       token0.saturate = inst->Instruction.Saturate;
10539 
10540       if (resourceType == TGSI_FILE_MEMORY) {
10541          token0.opcodeType = VGPU10_OPCODE_STORE_RAW;
10542          addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X);
10543          emit_dword(emit, token0.value);
10544          emit_memory_register(emit, MEM_STORE, inst, 0, writemask);
10545       }
10546       else if (resourceType == TGSI_FILE_BUFFER ||
10547                resourceType == TGSI_FILE_HW_ATOMIC) {
10548          token0.opcodeType = VGPU10_OPCODE_STORE_RAW;
10549          addr_src = scalar_src(&addr_src, TGSI_SWIZZLE_X);
10550          emit_dword(emit, token0.value);
10551          emit_uav_register(emit, resourceIndex, UAV_STORE,
10552                            resourceType, writemask);
10553       }
10554       else {
10555          token0.opcodeType = VGPU10_OPCODE_STORE_UAV_TYPED;
10556          emit_dword(emit, token0.value);
10557          emit_uav_register(emit, resourceIndex, UAV_STORE,
10558                            resourceType, writemask);
10559       }
10560 
10561       emit_src_register(emit, &addr_src);
10562 
10563       if (needLoad)
10564          emit_src_register(emit, &tmp_src);
10565       else
10566          emit_src_register(emit, &src);
10567 
10568       end_emit_instruction(emit);
10569 
10570       if (!needPerComponentStore || !perComponentWritemask)
10571          storeDone = TRUE;
10572    }
10573 
10574    free_temp_indexes(emit);
10575 }
10576 
10577 
10578 /**
10579  * Emit uav / memory store instruction
10580  */
10581 static boolean
emit_store(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10582 emit_store(struct svga_shader_emitter_v10 *emit,
10583            const struct tgsi_full_instruction *inst)
10584 {
10585    enum tgsi_file_type resourceType = inst->Dst[0].Register.File;
10586    unsigned resourceIndex = inst->Dst[0].Register.Index;
10587 
10588    /* If the resource register has indirect index, we will need
10589     * to expand it since SM5 device does not support indirect indexing
10590     * for uav.
10591     */
10592    if (inst->Dst[0].Register.Indirect &&
10593        (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) {
10594 
10595       unsigned indirect_index = inst->Dst[0].Indirect.Index;
10596       unsigned num_resources =
10597          resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs :
10598                                             emit->num_images;
10599 
10600       /* Indirect index tmp register */
10601       unsigned indirect_addr = emit->address_reg_index[indirect_index];
10602       struct tgsi_full_src_register indirect_addr_src =
10603          make_src_temp_reg(indirect_addr);
10604       indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
10605 
10606       /* Add offset to the indirect index */
10607       if (inst->Dst[0].Register.Index != 0) {
10608          struct tgsi_full_src_register offset =
10609             make_immediate_reg_int(emit, inst->Dst[0].Register.Index);
10610          struct tgsi_full_dst_register indirect_addr_dst =
10611             make_dst_temp_reg(indirect_addr);
10612          emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &indirect_addr_dst,
10613                               &indirect_addr_src, &offset);
10614       }
10615 
10616       /* Loop through the resource array to find which resource to use.
10617        */
10618       loop_instruction(0, num_resources, &indirect_addr_src,
10619                        emit_store_instruction, emit, inst);
10620    }
10621    else {
10622       emit_store_instruction(emit, inst, resourceIndex);
10623    }
10624 
10625    free_temp_indexes(emit);
10626 
10627    return TRUE;
10628 }
10629 
10630 
10631 /**
10632  * A helper function to emit an atomic instruction.
10633  */
10634 
10635 static void
emit_atomic_instruction(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,unsigned resourceIndex)10636 emit_atomic_instruction(struct svga_shader_emitter_v10 *emit,
10637                         const struct tgsi_full_instruction *inst,
10638                         unsigned resourceIndex)
10639 {
10640    VGPU10OpcodeToken0 token0;
10641    enum tgsi_file_type resourceType = inst->Src[0].Register.File;
10642    struct tgsi_full_src_register addr_src;
10643    VGPU10_OPCODE_TYPE opcode = emit->cur_atomic_opcode;
10644    const struct tgsi_full_src_register *offset;
10645 
10646    /* ntt does not specify offset for HWATOMIC. So just set offset to NULL. */
10647    offset = resourceType == TGSI_FILE_HW_ATOMIC ? NULL : &inst->Src[1];
10648 
10649    /* Resolve the resource address */
10650    addr_src = emit_uav_addr_offset(emit, resourceType,
10651                                    inst->Src[0].Register.Index,
10652                                    inst->Src[0].Register.Indirect,
10653                                    inst->Src[0].Indirect.Index,
10654                                    offset);
10655 
10656    /* Emit the atomic operation */
10657    begin_emit_instruction(emit);
10658 
10659    token0.value = 0;
10660    token0.opcodeType = opcode;
10661    token0.saturate = inst->Instruction.Saturate,
10662    emit_dword(emit, token0.value);
10663 
10664    emit_dst_register(emit, &inst->Dst[0]);
10665 
10666    if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
10667       emit_memory_register(emit, MEM_ATOMIC_COUNTER, inst, 0, 0);
10668    } else if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC) {
10669       assert(inst->Src[0].Register.Dimension == 1);
10670       emit_uav_register(emit, inst->Src[0].Dimension.Index,
10671                         UAV_ATOMIC, inst->Src[0].Register.File, 0);
10672    } else {
10673       emit_uav_register(emit, resourceIndex,
10674                         UAV_ATOMIC, inst->Src[0].Register.File, 0);
10675    }
10676 
10677    /* resource address offset */
10678    emit_src_register(emit, &addr_src);
10679 
10680    struct tgsi_full_src_register src0_x =
10681          swizzle_src(&inst->Src[2], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
10682                      TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
10683    emit_src_register(emit, &src0_x);
10684 
10685    if (opcode == VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH) {
10686       struct tgsi_full_src_register src1_x =
10687          swizzle_src(&inst->Src[3], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
10688                      TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
10689 
10690       emit_src_register(emit, &src1_x);
10691    }
10692 
10693    end_emit_instruction(emit);
10694 
10695    free_temp_indexes(emit);
10696 }
10697 
10698 
10699 /**
10700  * Emit atomic instruction
10701  */
10702 static boolean
emit_atomic(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,VGPU10_OPCODE_TYPE opcode)10703 emit_atomic(struct svga_shader_emitter_v10 *emit,
10704             const struct tgsi_full_instruction *inst,
10705             VGPU10_OPCODE_TYPE opcode)
10706 {
10707    enum tgsi_file_type resourceType = inst->Src[0].Register.File;
10708    unsigned resourceIndex = inst->Src[0].Register.Index;
10709 
10710    emit->cur_atomic_opcode = opcode;
10711 
10712    /* If the resource register has indirect index, we will need
10713     * to expand it since SM5 device does not support indirect indexing
10714     * for uav.
10715     */
10716    if (inst->Dst[0].Register.Indirect &&
10717        (resourceType == TGSI_FILE_BUFFER || resourceType == TGSI_FILE_IMAGE)) {
10718 
10719       unsigned indirect_index = inst->Dst[0].Indirect.Index;
10720       unsigned num_resources =
10721          resourceType == TGSI_FILE_BUFFER ? emit->num_shader_bufs :
10722                                             emit->num_images;
10723 
10724       /* indirect index tmp register */
10725       unsigned indirect_addr = emit->address_reg_index[indirect_index];
10726       struct tgsi_full_src_register indirect_addr_src =
10727          make_src_temp_reg(indirect_addr);
10728       indirect_addr_src = scalar_src(&indirect_addr_src, TGSI_SWIZZLE_X);
10729 
10730       /* Loop through the resource array to find which resource to use.
10731        */
10732       loop_instruction(0, num_resources, &indirect_addr_src,
10733                        emit_atomic_instruction, emit, inst);
10734    }
10735    else {
10736       emit_atomic_instruction(emit, inst, resourceIndex);
10737    }
10738 
10739    free_temp_indexes(emit);
10740 
10741    return TRUE;
10742 }
10743 
10744 
10745 /**
10746  * Emit barrier instruction
10747  */
10748 static boolean
emit_barrier(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10749 emit_barrier(struct svga_shader_emitter_v10 *emit,
10750              const struct tgsi_full_instruction *inst)
10751 {
10752    VGPU10OpcodeToken0 token0;
10753 
10754    assert(emit->version >= 50);
10755 
10756    token0.value = 0;
10757    token0.opcodeType = VGPU10_OPCODE_SYNC;
10758 
10759    if (emit->unit == PIPE_SHADER_TESS_CTRL && emit->version == 50) {
10760       /* SM5 device doesn't support BARRIER in tcs . If barrier is used
10761        * in shader, don't do anything for this opcode and continue rest
10762        * of shader translation
10763        */
10764       util_debug_message(&emit->svga_debug_callback, INFO,
10765                          "barrier instruction is not supported in tessellation control shader\n");
10766       return TRUE;
10767    }
10768    else if (emit->unit == PIPE_SHADER_COMPUTE) {
10769       if (emit->cs.shared_memory_declared)
10770          token0.syncThreadGroupShared = 1;
10771 
10772       if (emit->uav_declared)
10773          token0.syncUAVMemoryGroup = 1;
10774 
10775       token0.syncThreadsInGroup = 1;
10776    } else {
10777       token0.syncUAVMemoryGlobal = 1;
10778    }
10779 
10780    assert(token0.syncUAVMemoryGlobal || token0.syncUAVMemoryGroup ||
10781           token0.syncThreadGroupShared);
10782 
10783    begin_emit_instruction(emit);
10784    emit_dword(emit, token0.value);
10785    end_emit_instruction(emit);
10786 
10787    return TRUE;
10788 }
10789 
10790 /**
10791  * Emit memory barrier instruction
10792  */
10793 static boolean
emit_memory_barrier(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10794 emit_memory_barrier(struct svga_shader_emitter_v10 *emit,
10795                     const struct tgsi_full_instruction *inst)
10796 {
10797    unsigned index = inst->Src[0].Register.Index;
10798    unsigned swizzle = inst->Src[0].Register.SwizzleX;
10799    unsigned bartype = emit->immediates[index][swizzle].Int;
10800    VGPU10OpcodeToken0 token0;
10801 
10802    token0.value = 0;
10803    token0.opcodeType = VGPU10_OPCODE_SYNC;
10804 
10805    if (emit->unit == PIPE_SHADER_COMPUTE) {
10806 
10807       /* For compute shader, issue sync opcode with different options
10808        * depending on the memory barrier type.
10809        *
10810        * Bit 0: Shader storage buffers
10811        * Bit 1: Atomic buffers
10812        * Bit 2: Images
10813        * Bit 3: Shared memory
10814        * Bit 4: Thread group
10815        */
10816 
10817       if (bartype & (TGSI_MEMBAR_SHADER_BUFFER | TGSI_MEMBAR_ATOMIC_BUFFER |
10818                      TGSI_MEMBAR_SHADER_IMAGE))
10819          token0.syncUAVMemoryGlobal = 1;
10820       else if (bartype & TGSI_MEMBAR_THREAD_GROUP)
10821          token0.syncUAVMemoryGroup = 1;
10822 
10823       if (bartype & TGSI_MEMBAR_SHARED)
10824          token0.syncThreadGroupShared = 1;
10825    }
10826    else {
10827       /**
10828        * For graphics stages, only sync_uglobal is available.
10829        */
10830       if (bartype & (TGSI_MEMBAR_SHADER_BUFFER | TGSI_MEMBAR_ATOMIC_BUFFER |
10831                      TGSI_MEMBAR_SHADER_IMAGE))
10832          token0.syncUAVMemoryGlobal = 1;
10833    }
10834 
10835    assert(token0.syncUAVMemoryGlobal || token0.syncUAVMemoryGroup ||
10836           token0.syncThreadGroupShared);
10837 
10838    begin_emit_instruction(emit);
10839    emit_dword(emit, token0.value);
10840    end_emit_instruction(emit);
10841 
10842    return TRUE;
10843 }
10844 
10845 
10846 /**
10847  * Emit code for TGSI_OPCODE_RESQ (image size) instruction.
10848  */
10849 static boolean
emit_resq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)10850 emit_resq(struct svga_shader_emitter_v10 *emit,
10851          const struct tgsi_full_instruction *inst)
10852 {
10853    struct tgsi_full_src_register zero =
10854       make_immediate_reg_int(emit, 0);
10855 
10856    unsigned uav_resource = emit->image[inst->Src[0].Register.Index].Resource;
10857 
10858    if (uav_resource == TGSI_TEXTURE_CUBE_ARRAY) {
10859       struct tgsi_full_src_register image_src;
10860 
10861       image_src = make_src_const_reg(emit->image_size_index + inst->Src[0].Register.Index);
10862 
10863       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &image_src);
10864       return TRUE;
10865    }
10866 
10867    begin_emit_instruction(emit);
10868    if (uav_resource == TGSI_TEXTURE_BUFFER) {
10869       emit_opcode(emit, VGPU10_OPCODE_BUFINFO, FALSE);
10870       emit_dst_register(emit, &inst->Dst[0]);
10871    }
10872    else {
10873       emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
10874       emit_dst_register(emit, &inst->Dst[0]);
10875       emit_src_register(emit, &zero);
10876    }
10877    emit_uav_register(emit, inst->Src[0].Register.Index,
10878                      UAV_RESQ, inst->Src[0].Register.File, 0);
10879    end_emit_instruction(emit);
10880 
10881    return TRUE;
10882 }
10883 
10884 
10885 static boolean
emit_instruction(struct svga_shader_emitter_v10 * emit,unsigned inst_number,const struct tgsi_full_instruction * inst)10886 emit_instruction(struct svga_shader_emitter_v10 *emit,
10887                  unsigned inst_number,
10888                  const struct tgsi_full_instruction *inst)
10889 {
10890    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
10891 
10892    switch (opcode) {
10893    case TGSI_OPCODE_ADD:
10894    case TGSI_OPCODE_AND:
10895    case TGSI_OPCODE_BGNLOOP:
10896    case TGSI_OPCODE_BRK:
10897    case TGSI_OPCODE_CEIL:
10898    case TGSI_OPCODE_CONT:
10899    case TGSI_OPCODE_DDX:
10900    case TGSI_OPCODE_DDY:
10901    case TGSI_OPCODE_DIV:
10902    case TGSI_OPCODE_DP2:
10903    case TGSI_OPCODE_DP3:
10904    case TGSI_OPCODE_DP4:
10905    case TGSI_OPCODE_ELSE:
10906    case TGSI_OPCODE_ENDIF:
10907    case TGSI_OPCODE_ENDLOOP:
10908    case TGSI_OPCODE_ENDSUB:
10909    case TGSI_OPCODE_F2I:
10910    case TGSI_OPCODE_F2U:
10911    case TGSI_OPCODE_FLR:
10912    case TGSI_OPCODE_FRC:
10913    case TGSI_OPCODE_FSEQ:
10914    case TGSI_OPCODE_FSGE:
10915    case TGSI_OPCODE_FSLT:
10916    case TGSI_OPCODE_FSNE:
10917    case TGSI_OPCODE_I2F:
10918    case TGSI_OPCODE_IMAX:
10919    case TGSI_OPCODE_IMIN:
10920    case TGSI_OPCODE_INEG:
10921    case TGSI_OPCODE_ISGE:
10922    case TGSI_OPCODE_ISHR:
10923    case TGSI_OPCODE_ISLT:
10924    case TGSI_OPCODE_MAD:
10925    case TGSI_OPCODE_MAX:
10926    case TGSI_OPCODE_MIN:
10927    case TGSI_OPCODE_MUL:
10928    case TGSI_OPCODE_NOP:
10929    case TGSI_OPCODE_NOT:
10930    case TGSI_OPCODE_OR:
10931    case TGSI_OPCODE_UADD:
10932    case TGSI_OPCODE_USEQ:
10933    case TGSI_OPCODE_USGE:
10934    case TGSI_OPCODE_USLT:
10935    case TGSI_OPCODE_UMIN:
10936    case TGSI_OPCODE_UMAD:
10937    case TGSI_OPCODE_UMAX:
10938    case TGSI_OPCODE_ROUND:
10939    case TGSI_OPCODE_SQRT:
10940    case TGSI_OPCODE_SHL:
10941    case TGSI_OPCODE_TRUNC:
10942    case TGSI_OPCODE_U2F:
10943    case TGSI_OPCODE_UCMP:
10944    case TGSI_OPCODE_USHR:
10945    case TGSI_OPCODE_USNE:
10946    case TGSI_OPCODE_XOR:
10947    /* Begin SM5 opcodes */
10948    case TGSI_OPCODE_F2D:
10949    case TGSI_OPCODE_D2F:
10950    case TGSI_OPCODE_DADD:
10951    case TGSI_OPCODE_DMUL:
10952    case TGSI_OPCODE_DMAX:
10953    case TGSI_OPCODE_DMIN:
10954    case TGSI_OPCODE_DSGE:
10955    case TGSI_OPCODE_DSLT:
10956    case TGSI_OPCODE_DSEQ:
10957    case TGSI_OPCODE_DSNE:
10958    case TGSI_OPCODE_BREV:
10959    case TGSI_OPCODE_POPC:
10960    case TGSI_OPCODE_LSB:
10961    case TGSI_OPCODE_INTERP_CENTROID:
10962    case TGSI_OPCODE_INTERP_SAMPLE:
10963       /* simple instructions */
10964       return emit_simple(emit, inst);
10965    case TGSI_OPCODE_RET:
10966       if (emit->unit == PIPE_SHADER_TESS_CTRL &&
10967           !emit->tcs.control_point_phase) {
10968 
10969          /* store the tessellation levels in the patch constant phase only */
10970          store_tesslevels(emit);
10971       }
10972       return emit_simple(emit, inst);
10973 
10974    case TGSI_OPCODE_IMSB:
10975    case TGSI_OPCODE_UMSB:
10976       return emit_msb(emit, inst);
10977    case TGSI_OPCODE_IBFE:
10978    case TGSI_OPCODE_UBFE:
10979       return emit_bfe(emit, inst);
10980    case TGSI_OPCODE_BFI:
10981       return emit_bfi(emit, inst);
10982    case TGSI_OPCODE_MOV:
10983       return emit_mov(emit, inst);
10984    case TGSI_OPCODE_EMIT:
10985       return emit_vertex(emit, inst);
10986    case TGSI_OPCODE_ENDPRIM:
10987       return emit_endprim(emit, inst);
10988    case TGSI_OPCODE_IABS:
10989       return emit_iabs(emit, inst);
10990    case TGSI_OPCODE_ARL:
10991       FALLTHROUGH;
10992    case TGSI_OPCODE_UARL:
10993       return emit_arl_uarl(emit, inst);
10994    case TGSI_OPCODE_BGNSUB:
10995       /* no-op */
10996       return TRUE;
10997    case TGSI_OPCODE_CAL:
10998       return emit_cal(emit, inst);
10999    case TGSI_OPCODE_CMP:
11000       return emit_cmp(emit, inst);
11001    case TGSI_OPCODE_COS:
11002       return emit_sincos(emit, inst);
11003    case TGSI_OPCODE_DST:
11004       return emit_dst(emit, inst);
11005    case TGSI_OPCODE_EX2:
11006       return emit_ex2(emit, inst);
11007    case TGSI_OPCODE_EXP:
11008       return emit_exp(emit, inst);
11009    case TGSI_OPCODE_IF:
11010       return emit_if(emit, &inst->Src[0]);
11011    case TGSI_OPCODE_KILL:
11012       return emit_discard(emit, inst);
11013    case TGSI_OPCODE_KILL_IF:
11014       return emit_cond_discard(emit, inst);
11015    case TGSI_OPCODE_LG2:
11016       return emit_lg2(emit, inst);
11017    case TGSI_OPCODE_LIT:
11018       return emit_lit(emit, inst);
11019    case TGSI_OPCODE_LODQ:
11020       return emit_lodq(emit, inst);
11021    case TGSI_OPCODE_LOG:
11022       return emit_log(emit, inst);
11023    case TGSI_OPCODE_LRP:
11024       return emit_lrp(emit, inst);
11025    case TGSI_OPCODE_POW:
11026       return emit_pow(emit, inst);
11027    case TGSI_OPCODE_RCP:
11028       return emit_rcp(emit, inst);
11029    case TGSI_OPCODE_RSQ:
11030       return emit_rsq(emit, inst);
11031    case TGSI_OPCODE_SAMPLE:
11032       return emit_sample(emit, inst);
11033    case TGSI_OPCODE_SEQ:
11034       return emit_seq(emit, inst);
11035    case TGSI_OPCODE_SGE:
11036       return emit_sge(emit, inst);
11037    case TGSI_OPCODE_SGT:
11038       return emit_sgt(emit, inst);
11039    case TGSI_OPCODE_SIN:
11040       return emit_sincos(emit, inst);
11041    case TGSI_OPCODE_SLE:
11042       return emit_sle(emit, inst);
11043    case TGSI_OPCODE_SLT:
11044       return emit_slt(emit, inst);
11045    case TGSI_OPCODE_SNE:
11046       return emit_sne(emit, inst);
11047    case TGSI_OPCODE_SSG:
11048       return emit_ssg(emit, inst);
11049    case TGSI_OPCODE_ISSG:
11050       return emit_issg(emit, inst);
11051    case TGSI_OPCODE_TEX:
11052       return emit_tex(emit, inst);
11053    case TGSI_OPCODE_TG4:
11054       return emit_tg4(emit, inst);
11055    case TGSI_OPCODE_TEX2:
11056       return emit_tex2(emit, inst);
11057    case TGSI_OPCODE_TXP:
11058       return emit_txp(emit, inst);
11059    case TGSI_OPCODE_TXB:
11060    case TGSI_OPCODE_TXB2:
11061    case TGSI_OPCODE_TXL:
11062       return emit_txl_txb(emit, inst);
11063    case TGSI_OPCODE_TXD:
11064       return emit_txd(emit, inst);
11065    case TGSI_OPCODE_TXF:
11066       return emit_txf(emit, inst);
11067    case TGSI_OPCODE_TXL2:
11068       return emit_txl2(emit, inst);
11069    case TGSI_OPCODE_TXQ:
11070       return emit_txq(emit, inst);
11071    case TGSI_OPCODE_UIF:
11072       return emit_if(emit, &inst->Src[0]);
11073    case TGSI_OPCODE_UMUL_HI:
11074    case TGSI_OPCODE_IMUL_HI:
11075    case TGSI_OPCODE_UDIV:
11076       /* These cases use only the FIRST of two destination registers */
11077       return emit_simple_1dst(emit, inst, 2, 0);
11078    case TGSI_OPCODE_IDIV:
11079       return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_IDIV);
11080    case TGSI_OPCODE_UMUL:
11081    case TGSI_OPCODE_UMOD:
11082    case TGSI_OPCODE_MOD:
11083       /* These cases use only the SECOND of two destination registers */
11084       return emit_simple_1dst(emit, inst, 2, 1);
11085 
11086    /* Begin SM5 opcodes */
11087    case TGSI_OPCODE_DABS:
11088       return emit_dabs(emit, inst);
11089    case TGSI_OPCODE_DNEG:
11090       return emit_dneg(emit, inst);
11091    case TGSI_OPCODE_DRCP:
11092       return emit_simple(emit, inst);
11093    case TGSI_OPCODE_DSQRT:
11094       return emit_dsqrt(emit, inst);
11095    case TGSI_OPCODE_DMAD:
11096       return emit_dmad(emit, inst);
11097    case TGSI_OPCODE_DFRAC:
11098       return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_DFRC);
11099    case TGSI_OPCODE_D2I:
11100    case TGSI_OPCODE_D2U:
11101       return emit_simple(emit, inst);
11102    case TGSI_OPCODE_I2D:
11103    case TGSI_OPCODE_U2D:
11104       return emit_simple(emit, inst);
11105    case TGSI_OPCODE_DRSQ:
11106       return emit_drsq(emit, &inst->Dst[0], &inst->Src[0]);
11107    case TGSI_OPCODE_DDIV:
11108       return emit_simple(emit, inst);
11109    case TGSI_OPCODE_INTERP_OFFSET:
11110       return emit_interp_offset(emit, inst);
11111    case TGSI_OPCODE_FMA:
11112    case TGSI_OPCODE_DFMA:
11113       return emit_simple(emit, inst);
11114 
11115    case TGSI_OPCODE_DTRUNC:
11116       return emit_dtrunc(emit, inst);
11117 
11118    /* The following opcodes should never be seen here.  We return zero
11119     * for all the PIPE_CAP_TGSI_DROUND_SUPPORTED, DFRACEXP_DLDEXP_SUPPORTED,
11120     * LDEXP_SUPPORTED queries.
11121     */
11122    case TGSI_OPCODE_LDEXP:
11123    case TGSI_OPCODE_DSSG:
11124    case TGSI_OPCODE_DFRACEXP:
11125    case TGSI_OPCODE_DLDEXP:
11126    case TGSI_OPCODE_DCEIL:
11127    case TGSI_OPCODE_DFLR:
11128       debug_printf("Unexpected TGSI opcode %s.  "
11129                    "Should have been translated away by the GLSL compiler.\n",
11130                    tgsi_get_opcode_name(opcode));
11131       return FALSE;
11132 
11133    case TGSI_OPCODE_LOAD:
11134       return emit_load(emit, inst);
11135 
11136    case TGSI_OPCODE_STORE:
11137       return emit_store(emit, inst);
11138 
11139    case TGSI_OPCODE_ATOMAND:
11140       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_AND);
11141 
11142    case TGSI_OPCODE_ATOMCAS:
11143       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_CMP_EXCH);
11144 
11145    case TGSI_OPCODE_ATOMIMAX:
11146       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IMAX);
11147 
11148    case TGSI_OPCODE_ATOMIMIN:
11149       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IMIN);
11150 
11151    case TGSI_OPCODE_ATOMOR:
11152       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_OR);
11153 
11154    case TGSI_OPCODE_ATOMUADD:
11155       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_IADD);
11156 
11157    case TGSI_OPCODE_ATOMUMAX:
11158       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_UMAX);
11159 
11160    case TGSI_OPCODE_ATOMUMIN:
11161       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_UMIN);
11162 
11163    case TGSI_OPCODE_ATOMXCHG:
11164       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_EXCH);
11165 
11166    case TGSI_OPCODE_ATOMXOR:
11167       return emit_atomic(emit, inst, VGPU10_OPCODE_IMM_ATOMIC_XOR);
11168 
11169    case TGSI_OPCODE_BARRIER:
11170       return emit_barrier(emit, inst);
11171 
11172    case TGSI_OPCODE_MEMBAR:
11173       return emit_memory_barrier(emit, inst);
11174 
11175    case TGSI_OPCODE_RESQ:
11176       return emit_resq(emit, inst);
11177 
11178    case TGSI_OPCODE_END:
11179       if (!emit_post_helpers(emit))
11180          return FALSE;
11181       return emit_simple(emit, inst);
11182 
11183    default:
11184       debug_printf("Unimplemented tgsi instruction %s\n",
11185                    tgsi_get_opcode_name(opcode));
11186       return FALSE;
11187    }
11188 
11189    return TRUE;
11190 }
11191 
11192 
11193 /**
11194  * Translate a single TGSI instruction to VGPU10.
11195  */
11196 static boolean
emit_vgpu10_instruction(struct svga_shader_emitter_v10 * emit,unsigned inst_number,const struct tgsi_full_instruction * inst)11197 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
11198                         unsigned inst_number,
11199                         const struct tgsi_full_instruction *inst)
11200 {
11201    if (emit->skip_instruction)
11202       return TRUE;
11203 
11204    boolean ret = TRUE;
11205    unsigned start_token = emit_get_num_tokens(emit);
11206 
11207    emit->reemit_tgsi_instruction = FALSE;
11208 
11209    ret = emit_instruction(emit, inst_number, inst);
11210 
11211    if (emit->reemit_tgsi_instruction) {
11212       /**
11213        * Reset emit->ptr to where the translation of this tgsi instruction
11214        * started.
11215        */
11216       VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
11217       emit->ptr = (char *) (tokens + start_token);
11218 
11219       emit->reemit_tgsi_instruction = FALSE;
11220    }
11221    return ret;
11222 }
11223 
11224 
11225 /**
11226  * Emit the extra instructions to adjust the vertex position.
11227  * There are two possible adjustments:
11228  * 1. Converting from Gallium to VGPU10 coordinate space by applying the
11229  *    "prescale" and "pretranslate" values.
11230  * 2. Undoing the viewport transformation when we use the swtnl/draw path.
11231  * \param vs_pos_tmp_index  which temporary register contains the vertex pos.
11232  */
11233 static void
emit_vpos_instructions(struct svga_shader_emitter_v10 * emit)11234 emit_vpos_instructions(struct svga_shader_emitter_v10 *emit)
11235 {
11236    struct tgsi_full_src_register tmp_pos_src;
11237    struct tgsi_full_dst_register pos_dst;
11238    const unsigned vs_pos_tmp_index = emit->vposition.tmp_index;
11239 
11240    /* Don't bother to emit any extra vertex instructions if vertex position is
11241     * not written out
11242     */
11243    if (emit->vposition.out_index == INVALID_INDEX)
11244       return;
11245 
11246    /**
11247     * Reset the temporary vertex position register index
11248     * so that emit_dst_register() will use the real vertex position output
11249     */
11250    emit->vposition.tmp_index = INVALID_INDEX;
11251 
11252    tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index);
11253    pos_dst = make_dst_output_reg(emit->vposition.out_index);
11254 
11255    /* If non-adjusted vertex position register index
11256     * is valid, copy the vertex position from the temporary
11257     * vertex position register before it is modified by the
11258     * prescale computation.
11259     */
11260    if (emit->vposition.so_index != INVALID_INDEX) {
11261       struct tgsi_full_dst_register pos_so_dst =
11262          make_dst_output_reg(emit->vposition.so_index);
11263 
11264       /* MOV pos_so, tmp_pos */
11265       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, &tmp_pos_src);
11266    }
11267 
11268    if (emit->vposition.need_prescale) {
11269       /* This code adjusts the vertex position to match the VGPU10 convention.
11270        * If p is the position computed by the shader (usually by applying the
11271        * modelview and projection matrices), the new position q is computed by:
11272        *
11273        * q.x = p.w * trans.x + p.x * scale.x
11274        * q.y = p.w * trans.y + p.y * scale.y
11275        * q.z = p.w * trans.z + p.z * scale.z;
11276        * q.w = p.w * trans.w + p.w;
11277        */
11278       struct tgsi_full_src_register tmp_pos_src_w =
11279          scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
11280       struct tgsi_full_dst_register tmp_pos_dst =
11281          make_dst_temp_reg(vs_pos_tmp_index);
11282       struct tgsi_full_dst_register tmp_pos_dst_xyz =
11283          writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ);
11284 
11285       struct tgsi_full_src_register prescale_scale =
11286          make_src_temp_reg(emit->vposition.prescale_scale_index);
11287       struct tgsi_full_src_register prescale_trans =
11288          make_src_temp_reg(emit->vposition.prescale_trans_index);
11289 
11290       /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */
11291       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz,
11292                            &tmp_pos_src, &prescale_scale);
11293 
11294       /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */
11295       emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w,
11296                            &prescale_trans, &tmp_pos_src);
11297    }
11298    else if (emit->key.vs.undo_viewport) {
11299       /* This code computes the final vertex position from the temporary
11300        * vertex position by undoing the viewport transformation and the
11301        * divide-by-W operation (we convert window coords back to clip coords).
11302        * This is needed when we use the 'draw' module for fallbacks.
11303        * If p is the temp pos in window coords, then the NDC coord q is:
11304        *   q.x = (p.x - vp.x_trans) / vp.x_scale * p.w
11305        *   q.y = (p.y - vp.y_trans) / vp.y_scale * p.w
11306        *   q.z = p.z * p.w
11307        *   q.w = p.w
11308        * CONST[vs_viewport_index] contains:
11309        *   { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans }
11310        */
11311       struct tgsi_full_dst_register tmp_pos_dst =
11312          make_dst_temp_reg(vs_pos_tmp_index);
11313       struct tgsi_full_dst_register tmp_pos_dst_xy =
11314          writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY);
11315       struct tgsi_full_src_register tmp_pos_src_wwww =
11316          scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
11317 
11318       struct tgsi_full_dst_register pos_dst_xyz =
11319          writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ);
11320       struct tgsi_full_dst_register pos_dst_w =
11321          writemask_dst(&pos_dst, TGSI_WRITEMASK_W);
11322 
11323       struct tgsi_full_src_register vp_xyzw =
11324          make_src_const_reg(emit->vs.viewport_index);
11325       struct tgsi_full_src_register vp_zwww =
11326          swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
11327                      TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
11328 
11329       /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */
11330       emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy,
11331                            &tmp_pos_src, &vp_zwww);
11332 
11333       /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */
11334       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy,
11335                            &tmp_pos_src, &vp_xyzw);
11336 
11337       /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */
11338       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz,
11339                            &tmp_pos_src, &tmp_pos_src_wwww);
11340 
11341       /* MOV pos.w, tmp_pos.w */
11342       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, &tmp_pos_src);
11343    }
11344    else if (vs_pos_tmp_index != INVALID_INDEX) {
11345       /* This code is to handle the case where the temporary vertex
11346        * position register is created when the vertex shader has stream
11347        * output and prescale is disabled because rasterization is to be
11348        * discarded.
11349        */
11350       struct tgsi_full_dst_register pos_dst =
11351          make_dst_output_reg(emit->vposition.out_index);
11352 
11353       /* MOV pos, tmp_pos */
11354       begin_emit_instruction(emit);
11355       emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
11356       emit_dst_register(emit, &pos_dst);
11357       emit_src_register(emit, &tmp_pos_src);
11358       end_emit_instruction(emit);
11359    }
11360 
11361    /* Restore original vposition.tmp_index value for the next GS vertex.
11362     * It doesn't matter for VS.
11363     */
11364    emit->vposition.tmp_index = vs_pos_tmp_index;
11365 }
11366 
11367 static void
emit_clipping_instructions(struct svga_shader_emitter_v10 * emit)11368 emit_clipping_instructions(struct svga_shader_emitter_v10 *emit)
11369 {
11370    if (emit->clip_mode == CLIP_DISTANCE) {
11371       /* Copy from copy distance temporary to CLIPDIST & the shadow copy */
11372       emit_clip_distance_instructions(emit);
11373 
11374    } else if (emit->clip_mode == CLIP_VERTEX &&
11375               emit->key.last_vertex_stage) {
11376       /* Convert TGSI CLIPVERTEX to CLIPDIST */
11377       emit_clip_vertex_instructions(emit);
11378    }
11379 
11380    /**
11381     * Emit vertex position and take care of legacy user planes only if
11382     * there is a valid vertex position register index.
11383     * This is to take care of the case
11384     * where the shader doesn't output vertex position. Then in
11385     * this case, don't bother to emit more vertex instructions.
11386     */
11387    if (emit->vposition.out_index == INVALID_INDEX)
11388       return;
11389 
11390    /**
11391     * Emit per-vertex clipping instructions for legacy user defined clip planes.
11392     * NOTE: we must emit the clip distance instructions before the
11393     * emit_vpos_instructions() call since the later function will change
11394     * the TEMP[vs_pos_tmp_index] value.
11395     */
11396    if (emit->clip_mode == CLIP_LEGACY && emit->key.last_vertex_stage) {
11397       /* Emit CLIPDIST for legacy user defined clip planes */
11398       emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index);
11399    }
11400 }
11401 
11402 
11403 /**
11404  * Emit extra per-vertex instructions.  This includes clip-coordinate
11405  * space conversion and computing clip distances.  This is called for
11406  * each GS emit-vertex instruction and at the end of VS translation.
11407  */
11408 static void
emit_vertex_instructions(struct svga_shader_emitter_v10 * emit)11409 emit_vertex_instructions(struct svga_shader_emitter_v10 *emit)
11410 {
11411    /* Emit clipping instructions based on clipping mode */
11412    emit_clipping_instructions(emit);
11413 
11414    /* Emit vertex position instructions */
11415    emit_vpos_instructions(emit);
11416 }
11417 
11418 
11419 /**
11420  * Translate the TGSI_OPCODE_EMIT GS instruction.
11421  */
11422 static boolean
emit_vertex(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)11423 emit_vertex(struct svga_shader_emitter_v10 *emit,
11424             const struct tgsi_full_instruction *inst)
11425 {
11426    unsigned ret = TRUE;
11427 
11428    assert(emit->unit == PIPE_SHADER_GEOMETRY);
11429 
11430    /**
11431     * Emit the viewport array index for the first vertex.
11432     */
11433    if (emit->gs.viewport_index_out_index != INVALID_INDEX) {
11434       struct tgsi_full_dst_register viewport_index_out =
11435          make_dst_output_reg(emit->gs.viewport_index_out_index);
11436       struct tgsi_full_dst_register viewport_index_out_x =
11437          writemask_dst(&viewport_index_out, TGSI_WRITEMASK_X);
11438       struct tgsi_full_src_register viewport_index_tmp =
11439          make_src_temp_reg(emit->gs.viewport_index_tmp_index);
11440 
11441       /* Set the out index to INVALID_INDEX, so it will not
11442        * be assigned to a temp again in emit_dst_register, and
11443        * the viewport index will not be assigned again in the
11444        * subsequent vertices.
11445        */
11446       emit->gs.viewport_index_out_index = INVALID_INDEX;
11447       emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
11448                            &viewport_index_out_x, &viewport_index_tmp);
11449    }
11450 
11451    /**
11452     * Find the stream index associated with this emit vertex instruction.
11453     */
11454    assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE);
11455    unsigned streamIndex = find_stream_index(emit, &inst->Src[0]);
11456 
11457    /**
11458     * According to the ARB_gpu_shader5 spec, the built-in geometry shader
11459     * outputs are always associated with vertex stream zero.
11460     * So emit the extra vertex instructions for position or clip distance
11461     * for stream zero only.
11462     */
11463    if (streamIndex == 0) {
11464       /**
11465        * Before emitting vertex instructions, emit the temporaries for
11466        * the prescale constants based on the viewport index if needed.
11467        */
11468       if (emit->vposition.need_prescale && !emit->vposition.have_prescale)
11469          emit_temp_prescale_instructions(emit);
11470 
11471       emit_vertex_instructions(emit);
11472    }
11473 
11474    begin_emit_instruction(emit);
11475    if (emit->version >= 50) {
11476       if (emit->info.num_stream_output_components[streamIndex] == 0) {
11477          /**
11478           * If there is no output for this stream, discard this instruction.
11479           */
11480          emit->discard_instruction = TRUE;
11481       }
11482       else {
11483          emit_opcode(emit, VGPU10_OPCODE_EMIT_STREAM, FALSE);
11484          emit_stream_register(emit, streamIndex);
11485       }
11486    }
11487    else {
11488       emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE);
11489    }
11490    end_emit_instruction(emit);
11491 
11492    return ret;
11493 }
11494 
11495 
11496 /**
11497  * Emit the extra code to convert from VGPU10's boolean front-face
11498  * register to TGSI's signed front-face register.
11499  *
11500  * TODO: Make temporary front-face register a scalar.
11501  */
11502 static void
emit_frontface_instructions(struct svga_shader_emitter_v10 * emit)11503 emit_frontface_instructions(struct svga_shader_emitter_v10 *emit)
11504 {
11505    assert(emit->unit == PIPE_SHADER_FRAGMENT);
11506 
11507    if (emit->fs.face_input_index != INVALID_INDEX) {
11508       /* convert vgpu10 boolean face register to gallium +/-1 value */
11509       struct tgsi_full_dst_register tmp_dst =
11510          make_dst_temp_reg(emit->fs.face_tmp_index);
11511       struct tgsi_full_src_register one =
11512          make_immediate_reg_float(emit, 1.0f);
11513       struct tgsi_full_src_register neg_one =
11514          make_immediate_reg_float(emit, -1.0f);
11515 
11516       /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */
11517       begin_emit_instruction(emit);
11518       emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE);
11519       emit_dst_register(emit, &tmp_dst);
11520       emit_face_register(emit);
11521       emit_src_register(emit, &one);
11522       emit_src_register(emit, &neg_one);
11523       end_emit_instruction(emit);
11524    }
11525 }
11526 
11527 
11528 /**
11529  * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w.
11530  */
11531 static void
emit_fragcoord_instructions(struct svga_shader_emitter_v10 * emit)11532 emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit)
11533 {
11534    assert(emit->unit == PIPE_SHADER_FRAGMENT);
11535 
11536    if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
11537       struct tgsi_full_dst_register tmp_dst =
11538          make_dst_temp_reg(emit->fs.fragcoord_tmp_index);
11539       struct tgsi_full_dst_register tmp_dst_xyz =
11540          writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ);
11541       struct tgsi_full_dst_register tmp_dst_w =
11542          writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
11543       struct tgsi_full_src_register one =
11544          make_immediate_reg_float(emit, 1.0f);
11545       struct tgsi_full_src_register fragcoord =
11546          make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index);
11547 
11548       /* save the input index */
11549       unsigned fragcoord_input_index = emit->fs.fragcoord_input_index;
11550       /* set to invalid to prevent substitution in emit_src_register() */
11551       emit->fs.fragcoord_input_index = INVALID_INDEX;
11552 
11553       /* MOV fragcoord_tmp.xyz, fragcoord.xyz */
11554       begin_emit_instruction(emit);
11555       emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
11556       emit_dst_register(emit, &tmp_dst_xyz);
11557       emit_src_register(emit, &fragcoord);
11558       end_emit_instruction(emit);
11559 
11560       /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */
11561       begin_emit_instruction(emit);
11562       emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE);
11563       emit_dst_register(emit, &tmp_dst_w);
11564       emit_src_register(emit, &one);
11565       emit_src_register(emit, &fragcoord);
11566       end_emit_instruction(emit);
11567 
11568       /* restore saved value */
11569       emit->fs.fragcoord_input_index = fragcoord_input_index;
11570    }
11571 }
11572 
11573 
11574 /**
11575  * Emit the extra code to get the current sample position value and
11576  * put it into a temp register.
11577  */
11578 static void
emit_sample_position_instructions(struct svga_shader_emitter_v10 * emit)11579 emit_sample_position_instructions(struct svga_shader_emitter_v10 *emit)
11580 {
11581    assert(emit->unit == PIPE_SHADER_FRAGMENT);
11582 
11583    if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
11584       assert(emit->version >= 41);
11585 
11586       struct tgsi_full_dst_register tmp_dst =
11587          make_dst_temp_reg(emit->fs.sample_pos_tmp_index);
11588       struct tgsi_full_src_register half =
11589          make_immediate_reg_float4(emit, 0.5, 0.5, 0.0, 0.0);
11590 
11591       struct tgsi_full_src_register tmp_src =
11592          make_src_temp_reg(emit->fs.sample_pos_tmp_index);
11593       struct tgsi_full_src_register sample_index_reg =
11594          make_src_scalar_reg(TGSI_FILE_SYSTEM_VALUE,
11595                              emit->fs.sample_id_sys_index, TGSI_SWIZZLE_X);
11596 
11597       /* The first src register is a shader resource (if we want a
11598        * multisampled resource sample position) or the rasterizer register
11599        * (if we want the current sample position in the color buffer).  We
11600        * want the later.
11601        */
11602 
11603       /* SAMPLE_POS dst, RASTERIZER, sampleIndex */
11604       begin_emit_instruction(emit);
11605       emit_opcode(emit, VGPU10_OPCODE_SAMPLE_POS, FALSE);
11606       emit_dst_register(emit, &tmp_dst);
11607       emit_rasterizer_register(emit);
11608       emit_src_register(emit, &sample_index_reg);
11609       end_emit_instruction(emit);
11610 
11611       /* Convert from D3D coords to GL coords by adding 0.5 bias */
11612       /* ADD dst, dst, half */
11613       begin_emit_instruction(emit);
11614       emit_opcode(emit, VGPU10_OPCODE_ADD, FALSE);
11615       emit_dst_register(emit, &tmp_dst);
11616       emit_src_register(emit, &tmp_src);
11617       emit_src_register(emit, &half);
11618       end_emit_instruction(emit);
11619    }
11620 }
11621 
11622 
11623 /**
11624  * Emit extra instructions to adjust VS inputs/attributes.  This can
11625  * mean casting a vertex attribute from int to float or setting the
11626  * W component to 1, or both.
11627  */
11628 static void
emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 * emit)11629 emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
11630 {
11631    const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1;
11632    const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof;
11633    const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof;
11634    const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra;
11635    const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm;
11636    const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled;
11637    const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled;
11638 
11639    unsigned adjust_mask = (save_w_1_mask |
11640                            save_itof_mask |
11641                            save_utof_mask |
11642                            save_is_bgra_mask |
11643                            save_puint_to_snorm_mask |
11644                            save_puint_to_uscaled_mask |
11645                            save_puint_to_sscaled_mask);
11646 
11647    assert(emit->unit == PIPE_SHADER_VERTEX);
11648 
11649    if (adjust_mask) {
11650       struct tgsi_full_src_register one =
11651          make_immediate_reg_float(emit, 1.0f);
11652 
11653       struct tgsi_full_src_register one_int =
11654          make_immediate_reg_int(emit, 1);
11655 
11656       /* We need to turn off these bitmasks while emitting the
11657        * instructions below, then restore them afterward.
11658        */
11659       emit->key.vs.adjust_attrib_w_1 = 0;
11660       emit->key.vs.adjust_attrib_itof = 0;
11661       emit->key.vs.adjust_attrib_utof = 0;
11662       emit->key.vs.attrib_is_bgra = 0;
11663       emit->key.vs.attrib_puint_to_snorm = 0;
11664       emit->key.vs.attrib_puint_to_uscaled = 0;
11665       emit->key.vs.attrib_puint_to_sscaled = 0;
11666 
11667       while (adjust_mask) {
11668          unsigned index = u_bit_scan(&adjust_mask);
11669 
11670          /* skip the instruction if this vertex attribute is not being used */
11671          if (emit->info.input_usage_mask[index] == 0)
11672             continue;
11673 
11674          unsigned tmp = emit->vs.adjusted_input[index];
11675          struct tgsi_full_src_register input_src =
11676             make_src_reg(TGSI_FILE_INPUT, index);
11677 
11678          struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
11679          struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
11680          struct tgsi_full_dst_register tmp_dst_w =
11681             writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
11682 
11683          /* ITOF/UTOF/MOV tmp, input[index] */
11684          if (save_itof_mask & (1 << index)) {
11685             emit_instruction_op1(emit, VGPU10_OPCODE_ITOF,
11686                                  &tmp_dst, &input_src);
11687          }
11688          else if (save_utof_mask & (1 << index)) {
11689             emit_instruction_op1(emit, VGPU10_OPCODE_UTOF,
11690                                  &tmp_dst, &input_src);
11691          }
11692          else if (save_puint_to_snorm_mask & (1 << index)) {
11693             emit_puint_to_snorm(emit, &tmp_dst, &input_src);
11694          }
11695          else if (save_puint_to_uscaled_mask & (1 << index)) {
11696             emit_puint_to_uscaled(emit, &tmp_dst, &input_src);
11697          }
11698          else if (save_puint_to_sscaled_mask & (1 << index)) {
11699             emit_puint_to_sscaled(emit, &tmp_dst, &input_src);
11700          }
11701          else {
11702             assert((save_w_1_mask | save_is_bgra_mask) & (1 << index));
11703             emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
11704                                  &tmp_dst, &input_src);
11705          }
11706 
11707          if (save_is_bgra_mask & (1 << index)) {
11708             emit_swap_r_b(emit, &tmp_dst, &tmp_src);
11709          }
11710 
11711          if (save_w_1_mask & (1 << index)) {
11712             /* MOV tmp.w, 1.0 */
11713             if (emit->key.vs.attrib_is_pure_int & (1 << index)) {
11714                emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
11715                                     &tmp_dst_w, &one_int);
11716             }
11717             else {
11718                emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
11719                                     &tmp_dst_w, &one);
11720             }
11721          }
11722       }
11723 
11724       emit->key.vs.adjust_attrib_w_1 = save_w_1_mask;
11725       emit->key.vs.adjust_attrib_itof = save_itof_mask;
11726       emit->key.vs.adjust_attrib_utof = save_utof_mask;
11727       emit->key.vs.attrib_is_bgra = save_is_bgra_mask;
11728       emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask;
11729       emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask;
11730       emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask;
11731    }
11732 }
11733 
11734 
11735 /* Find zero-value immedate for default layer index */
11736 static void
emit_default_layer_instructions(struct svga_shader_emitter_v10 * emit)11737 emit_default_layer_instructions(struct svga_shader_emitter_v10 *emit)
11738 {
11739    assert(emit->unit == PIPE_SHADER_FRAGMENT);
11740 
11741    /* immediate for default layer index 0 */
11742    if (emit->fs.layer_input_index != INVALID_INDEX) {
11743       union tgsi_immediate_data imm;
11744       imm.Int = 0;
11745       emit->fs.layer_imm_index = find_immediate(emit, imm, 0);
11746    }
11747 }
11748 
11749 
11750 static void
emit_temp_prescale_from_cbuf(struct svga_shader_emitter_v10 * emit,unsigned cbuf_index,struct tgsi_full_dst_register * scale,struct tgsi_full_dst_register * translate)11751 emit_temp_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit,
11752                              unsigned cbuf_index,
11753                              struct tgsi_full_dst_register *scale,
11754                              struct tgsi_full_dst_register *translate)
11755 {
11756    struct tgsi_full_src_register scale_cbuf = make_src_const_reg(cbuf_index);
11757    struct tgsi_full_src_register trans_cbuf = make_src_const_reg(cbuf_index+1);
11758 
11759    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, scale, &scale_cbuf);
11760    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, translate, &trans_cbuf);
11761 }
11762 
11763 
11764 /**
11765  * A recursive helper function to find the prescale from the constant buffer
11766  */
11767 static void
find_prescale_from_cbuf(struct svga_shader_emitter_v10 * emit,unsigned index,unsigned num_prescale,struct tgsi_full_src_register * vp_index,struct tgsi_full_dst_register * scale,struct tgsi_full_dst_register * translate,struct tgsi_full_src_register * tmp_src,struct tgsi_full_dst_register * tmp_dst)11768 find_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit,
11769                         unsigned index, unsigned num_prescale,
11770                         struct tgsi_full_src_register *vp_index,
11771                         struct tgsi_full_dst_register *scale,
11772                         struct tgsi_full_dst_register *translate,
11773                         struct tgsi_full_src_register *tmp_src,
11774                         struct tgsi_full_dst_register *tmp_dst)
11775 {
11776    if (num_prescale == 0)
11777       return;
11778 
11779    if (index > 0) {
11780       /* ELSE */
11781       emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
11782    }
11783 
11784    struct tgsi_full_src_register index_src =
11785 	                            make_immediate_reg_int(emit, index);
11786 
11787    if (index == 0) {
11788       /* GE tmp, vp_index, index */
11789       emit_instruction_op2(emit, VGPU10_OPCODE_GE, tmp_dst,
11790                            vp_index, &index_src);
11791    } else {
11792       /* EQ tmp, vp_index, index */
11793       emit_instruction_op2(emit, VGPU10_OPCODE_EQ, tmp_dst,
11794                            vp_index, &index_src);
11795    }
11796 
11797    /* IF tmp */
11798    emit_if(emit, tmp_src);
11799    emit_temp_prescale_from_cbuf(emit,
11800                                 emit->vposition.prescale_cbuf_index + 2 * index,
11801                                 scale, translate);
11802 
11803    find_prescale_from_cbuf(emit, index+1, num_prescale-1,
11804                            vp_index, scale, translate,
11805                            tmp_src, tmp_dst);
11806 
11807    /* ENDIF */
11808    emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
11809 }
11810 
11811 
11812 /**
11813  * This helper function emits instructions to set the prescale
11814  * and translate temporaries to the correct constants from the
11815  * constant buffer according to the designated viewport.
11816  */
11817 static void
emit_temp_prescale_instructions(struct svga_shader_emitter_v10 * emit)11818 emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit)
11819 {
11820    struct tgsi_full_dst_register prescale_scale =
11821          make_dst_temp_reg(emit->vposition.prescale_scale_index);
11822    struct tgsi_full_dst_register prescale_translate =
11823          make_dst_temp_reg(emit->vposition.prescale_trans_index);
11824 
11825    unsigned prescale_cbuf_index = emit->vposition.prescale_cbuf_index;
11826 
11827    if (emit->vposition.num_prescale == 1) {
11828       emit_temp_prescale_from_cbuf(emit,
11829                                    prescale_cbuf_index,
11830                                    &prescale_scale, &prescale_translate);
11831    } else {
11832       /**
11833        * Since SM5 device does not support dynamic indexing, we need
11834        * to do the if-else to find the prescale constants for the
11835        * specified viewport.
11836        */
11837       struct tgsi_full_src_register vp_index_src =
11838          make_src_temp_reg(emit->gs.viewport_index_tmp_index);
11839 
11840       struct tgsi_full_src_register vp_index_src_x =
11841          scalar_src(&vp_index_src, TGSI_SWIZZLE_X);
11842 
11843       unsigned tmp = get_temp_index(emit);
11844       struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
11845       struct tgsi_full_src_register tmp_src_x =
11846                 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
11847       struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
11848 
11849       find_prescale_from_cbuf(emit, 0, emit->vposition.num_prescale,
11850                               &vp_index_src_x,
11851 		              &prescale_scale, &prescale_translate,
11852                               &tmp_src_x, &tmp_dst);
11853    }
11854 
11855    /* Mark prescale temporaries are emitted */
11856    emit->vposition.have_prescale = 1;
11857 }
11858 
11859 
11860 /**
11861  * A helper function to emit an instruction in a vertex shader to add a bias
11862  * to the VertexID system value. This patches the VertexID in the SVGA vertex
11863  * shader to include the base vertex of an indexed primitive or the start index
11864  * of a non-indexed primitive.
11865  */
11866 static void
emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10 * emit)11867 emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10 *emit)
11868 {
11869    struct tgsi_full_src_register vertex_id_bias_index =
11870       make_src_const_reg(emit->vs.vertex_id_bias_index);
11871    struct tgsi_full_src_register vertex_id_sys_src =
11872       make_src_reg(TGSI_FILE_SYSTEM_VALUE, emit->vs.vertex_id_sys_index);
11873    struct tgsi_full_src_register vertex_id_sys_src_x =
11874       scalar_src(&vertex_id_sys_src, TGSI_SWIZZLE_X);
11875    struct tgsi_full_dst_register vertex_id_tmp_dst =
11876       make_dst_temp_reg(emit->vs.vertex_id_tmp_index);
11877 
11878    /* IADD vertex_id_tmp, vertex_id_sys, vertex_id_bias */
11879    unsigned vertex_id_tmp_index = emit->vs.vertex_id_tmp_index;
11880    emit->vs.vertex_id_tmp_index = INVALID_INDEX;
11881    emit_instruction_opn(emit, VGPU10_OPCODE_IADD, &vertex_id_tmp_dst,
11882                         &vertex_id_sys_src_x, &vertex_id_bias_index, NULL, FALSE,
11883                         FALSE);
11884    emit->vs.vertex_id_tmp_index = vertex_id_tmp_index;
11885 }
11886 
11887 /**
11888  * Hull Shader must have control point outputs. But tessellation
11889  * control shader can return without writing to control point output.
11890  * In this case, the control point output is assumed to be passthrough
11891  * from the control point input.
11892  * This helper function is to write out a control point output first in case
11893  * the tessellation control shader returns before writing a
11894  * control point output.
11895  */
11896 static void
emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 * emit)11897 emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 *emit)
11898 {
11899    assert(emit->unit == PIPE_SHADER_TESS_CTRL);
11900    assert(emit->tcs.control_point_phase);
11901    assert(emit->tcs.control_point_out_index != INVALID_INDEX);
11902    assert(emit->tcs.invocation_id_sys_index != INVALID_INDEX);
11903 
11904    struct tgsi_full_dst_register output_control_point;
11905    output_control_point =
11906       make_dst_output_reg(emit->tcs.control_point_out_index);
11907 
11908    if (emit->tcs.control_point_input_index == INVALID_INDEX) {
11909       /* MOV OUTPUT 0.0f */
11910       struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
11911       begin_emit_instruction(emit);
11912       emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE);
11913       emit_dst_register(emit, &output_control_point);
11914       emit_src_register(emit, &zero);
11915       end_emit_instruction(emit);
11916    }
11917    else {
11918       /* UARL ADDR[INDEX].x INVOCATION.xxxx */
11919 
11920       struct tgsi_full_src_register invocation_src;
11921       struct tgsi_full_dst_register addr_dst;
11922       struct tgsi_full_dst_register addr_dst_x;
11923       unsigned addr_tmp;
11924 
11925       addr_tmp = emit->address_reg_index[emit->tcs.control_point_addr_index];
11926       addr_dst = make_dst_temp_reg(addr_tmp);
11927       addr_dst_x = writemask_dst(&addr_dst, TGSI_WRITEMASK_X);
11928 
11929       invocation_src = make_src_reg(TGSI_FILE_SYSTEM_VALUE,
11930                                     emit->tcs.invocation_id_sys_index);
11931 
11932       begin_emit_instruction(emit);
11933       emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE);
11934       emit_dst_register(emit, &addr_dst_x);
11935       emit_src_register(emit, &invocation_src);
11936       end_emit_instruction(emit);
11937 
11938 
11939       /* MOV OUTPUT INPUT[ADDR[INDEX].x][POSITION] */
11940 
11941       struct tgsi_full_src_register input_control_point;
11942       input_control_point = make_src_reg(TGSI_FILE_INPUT,
11943                                          emit->tcs.control_point_input_index);
11944       input_control_point.Register.Dimension = 1;
11945       input_control_point.Dimension.Indirect = 1;
11946       input_control_point.DimIndirect.File = TGSI_FILE_ADDRESS;
11947       input_control_point.DimIndirect.Index =
11948          emit->tcs.control_point_addr_index;
11949 
11950       begin_emit_instruction(emit);
11951       emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE);
11952       emit_dst_register(emit, &output_control_point);
11953       emit_src_register(emit, &input_control_point);
11954       end_emit_instruction(emit);
11955    }
11956 }
11957 
11958 /**
11959  * This functions constructs temporary tessfactor from VGPU10*_TESSFACTOR
11960  * values in domain shader. SM5 has tessfactors as floating point values where
11961  * as tgsi emit them as vector. This function allows to construct temp
11962  * tessfactor vector similar to TGSI_SEMANTIC_TESSINNER/OUTER filled with
11963  * values from VGPU10*_TESSFACTOR. Use this constructed vector whenever
11964  * TGSI_SEMANTIC_TESSINNER/OUTER is used in shader.
11965  */
11966 static void
emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 * emit)11967 emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 *emit)
11968 {
11969    struct tgsi_full_src_register src;
11970    struct tgsi_full_dst_register dst;
11971 
11972    if (emit->tes.inner.tgsi_index != INVALID_INDEX) {
11973       dst = make_dst_temp_reg(emit->tes.inner.temp_index);
11974 
11975       switch (emit->tes.prim_mode) {
11976       case PIPE_PRIM_QUADS:
11977          src = make_src_scalar_reg(TGSI_FILE_INPUT,
11978                   emit->tes.inner.in_index + 1, TGSI_SWIZZLE_X);
11979          dst = writemask_dst(&dst, TGSI_WRITEMASK_Y);
11980          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
11981          FALLTHROUGH;
11982       case PIPE_PRIM_TRIANGLES:
11983          src = make_src_scalar_reg(TGSI_FILE_INPUT,
11984                   emit->tes.inner.in_index, TGSI_SWIZZLE_X);
11985          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
11986          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
11987          break;
11988       case PIPE_PRIM_LINES:
11989          /**
11990           * As per SM5 spec, InsideTessFactor for isolines are unused.
11991           * In fact glsl tessInnerLevel for isolines doesn't mean anything but if
11992           * any application try to read tessInnerLevel in TES when primitive type
11993           * is isolines, then instead of driver throwing segfault for accesing it,
11994           * return atleast vec(1.0f)
11995           */
11996          src = make_immediate_reg_float(emit, 1.0f);
11997          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
11998          break;
11999       default:
12000          break;
12001       }
12002    }
12003 
12004    if (emit->tes.outer.tgsi_index != INVALID_INDEX) {
12005       dst = make_dst_temp_reg(emit->tes.outer.temp_index);
12006 
12007       switch (emit->tes.prim_mode) {
12008       case PIPE_PRIM_QUADS:
12009          src = make_src_scalar_reg(TGSI_FILE_INPUT,
12010                   emit->tes.outer.in_index + 3, TGSI_SWIZZLE_X);
12011          dst = writemask_dst(&dst, TGSI_WRITEMASK_W);
12012          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12013          FALLTHROUGH;
12014       case PIPE_PRIM_TRIANGLES:
12015          src = make_src_scalar_reg(TGSI_FILE_INPUT,
12016                   emit->tes.outer.in_index + 2, TGSI_SWIZZLE_X);
12017          dst = writemask_dst(&dst, TGSI_WRITEMASK_Z);
12018          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12019          FALLTHROUGH;
12020       case PIPE_PRIM_LINES:
12021          src = make_src_scalar_reg(TGSI_FILE_INPUT,
12022                   emit->tes.outer.in_index + 1, TGSI_SWIZZLE_X);
12023          dst = writemask_dst(&dst, TGSI_WRITEMASK_Y);
12024          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12025 
12026          src = make_src_scalar_reg(TGSI_FILE_INPUT,
12027                   emit->tes.outer.in_index , TGSI_SWIZZLE_X);
12028          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
12029          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12030 
12031          break;
12032       default:
12033          break;
12034       }
12035    }
12036 }
12037 
12038 
12039 static void
emit_initialize_temp_instruction(struct svga_shader_emitter_v10 * emit)12040 emit_initialize_temp_instruction(struct svga_shader_emitter_v10 *emit)
12041 {
12042    struct tgsi_full_src_register src;
12043    struct tgsi_full_dst_register dst;
12044    unsigned vgpu10_temp_index = remap_temp_index(emit, TGSI_FILE_TEMPORARY,
12045                                                  emit->initialize_temp_index);
12046    src = make_immediate_reg_float(emit, 0.0f);
12047    dst = make_dst_temp_reg(vgpu10_temp_index);
12048    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
12049    emit->temp_map[emit->initialize_temp_index].initialized = TRUE;
12050    emit->initialize_temp_index = INVALID_INDEX;
12051 }
12052 
12053 
12054 /**
12055  * Emit any extra/helper declarations/code that we might need between
12056  * the declaration section and code section.
12057  */
12058 static boolean
emit_pre_helpers(struct svga_shader_emitter_v10 * emit)12059 emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
12060 {
12061    /* Properties */
12062    if (emit->unit == PIPE_SHADER_GEOMETRY)
12063       emit_property_instructions(emit);
12064    else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
12065       emit_hull_shader_declarations(emit);
12066 
12067       /* Save the position of the first instruction token so that we can
12068        * do a second pass of the instructions for the patch constant phase.
12069        */
12070       emit->tcs.instruction_token_pos = emit->cur_tgsi_token;
12071       emit->tcs.fork_phase_add_signature = FALSE;
12072 
12073       if (!emit_hull_shader_control_point_phase(emit)) {
12074          emit->skip_instruction = TRUE;
12075          return TRUE;
12076       }
12077 
12078       /* Set the current tcs phase to control point phase */
12079       emit->tcs.control_point_phase = TRUE;
12080    }
12081    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
12082       emit_domain_shader_declarations(emit);
12083    }
12084    else if (emit->unit == PIPE_SHADER_COMPUTE) {
12085       emit_compute_shader_declarations(emit);
12086    }
12087 
12088    /* Declare inputs */
12089    if (!emit_input_declarations(emit))
12090       return FALSE;
12091 
12092    /* Declare outputs */
12093    if (!emit_output_declarations(emit))
12094       return FALSE;
12095 
12096    /* Declare temporary registers */
12097    emit_temporaries_declaration(emit);
12098 
12099    /* For PIPE_SHADER_TESS_CTRL, constants, samplers, resources and immediates
12100     * will already be declared in hs_decls (emit_hull_shader_declarations)
12101     */
12102    if (emit->unit != PIPE_SHADER_TESS_CTRL) {
12103 
12104       alloc_common_immediates(emit);
12105 
12106       /* Declare constant registers */
12107       emit_constant_declaration(emit);
12108 
12109       /* Declare samplers and resources */
12110       emit_sampler_declarations(emit);
12111       emit_resource_declarations(emit);
12112 
12113       /* Declare images */
12114       emit_image_declarations(emit);
12115 
12116       /* Declare shader buffers */
12117       emit_shader_buf_declarations(emit);
12118 
12119       /* Declare atomic buffers */
12120       emit_atomic_buf_declarations(emit);
12121    }
12122 
12123    if (emit->unit != PIPE_SHADER_FRAGMENT &&
12124        emit->unit != PIPE_SHADER_COMPUTE) {
12125       /*
12126        * Declare clip distance output registers for ClipVertex or
12127        * user defined planes
12128        */
12129       emit_clip_distance_declarations(emit);
12130    }
12131 
12132    if (emit->unit == PIPE_SHADER_COMPUTE) {
12133       emit_memory_declarations(emit);
12134 
12135       if (emit->cs.grid_size.tgsi_index != INVALID_INDEX) {
12136          emit->cs.grid_size.imm_index =
12137             alloc_immediate_int4(emit,
12138                                  emit->key.cs.grid_size[0],
12139                                  emit->key.cs.grid_size[1],
12140                                  emit->key.cs.grid_size[2], 0);
12141       }
12142    }
12143 
12144    if (emit->unit == PIPE_SHADER_FRAGMENT &&
12145        emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
12146       float alpha = emit->key.fs.alpha_ref;
12147       emit->fs.alpha_ref_index =
12148          alloc_immediate_float4(emit, alpha, alpha, alpha, alpha);
12149    }
12150 
12151    if (emit->unit != PIPE_SHADER_TESS_CTRL) {
12152       /**
12153        * For PIPE_SHADER_TESS_CTRL, immediates are already declared in
12154        * hs_decls
12155        */
12156       emit_vgpu10_immediates_block(emit);
12157    }
12158    else {
12159       emit_tcs_default_control_point_output(emit);
12160    }
12161 
12162    if (emit->unit == PIPE_SHADER_FRAGMENT) {
12163       emit_frontface_instructions(emit);
12164       emit_fragcoord_instructions(emit);
12165       emit_sample_position_instructions(emit);
12166       emit_default_layer_instructions(emit);
12167    }
12168    else if (emit->unit == PIPE_SHADER_VERTEX) {
12169       emit_vertex_attrib_instructions(emit);
12170 
12171       if (emit->info.uses_vertexid)
12172          emit_vertex_id_nobase_instruction(emit);
12173    }
12174    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
12175       emit_temp_tessfactor_instructions(emit);
12176    }
12177 
12178    /**
12179     * For geometry shader that writes to viewport index, the prescale
12180     * temporaries will be done at the first vertex emission.
12181     */
12182    if (emit->vposition.need_prescale && emit->vposition.num_prescale == 1)
12183       emit_temp_prescale_instructions(emit);
12184 
12185    return TRUE;
12186 }
12187 
12188 
12189 /**
12190  * The device has no direct support for the pipe_blend_state::alpha_to_one
12191  * option so we implement it here with shader code.
12192  *
12193  * Note that this is kind of pointless, actually.  Here we're clobbering
12194  * the alpha value with 1.0.  So if alpha-to-coverage is enabled, we'll wind
12195  * up with 100% coverage.  That's almost certainly not what the user wants.
12196  * The work-around is to add extra shader code to compute coverage from alpha
12197  * and write it to the coverage output register (if the user's shader doesn't
12198  * do so already).  We'll probably do that in the future.
12199  */
12200 static void
emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)12201 emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 *emit,
12202                                unsigned fs_color_tmp_index)
12203 {
12204    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
12205    unsigned i;
12206 
12207    /* Note: it's not 100% clear from the spec if we're supposed to clobber
12208     * the alpha for all render targets.  But that's what NVIDIA does and
12209     * that's what Piglit tests.
12210     */
12211    for (i = 0; i < emit->fs.num_color_outputs; i++) {
12212       struct tgsi_full_dst_register color_dst;
12213 
12214       if (fs_color_tmp_index != INVALID_INDEX && i == 0) {
12215          /* write to the temp color register */
12216          color_dst = make_dst_temp_reg(fs_color_tmp_index);
12217       }
12218       else {
12219          /* write directly to the color[i] output */
12220          color_dst = make_dst_output_reg(emit->fs.color_out_index[i]);
12221       }
12222 
12223       color_dst = writemask_dst(&color_dst, TGSI_WRITEMASK_W);
12224 
12225       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &one);
12226    }
12227 }
12228 
12229 
12230 /**
12231  * Emit alpha test code.  This compares TEMP[fs_color_tmp_index].w
12232  * against the alpha reference value and discards the fragment if the
12233  * comparison fails.
12234  */
12235 static void
emit_alpha_test_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)12236 emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
12237                              unsigned fs_color_tmp_index)
12238 {
12239    /* compare output color's alpha to alpha ref and discard if comparison
12240     * fails.
12241     */
12242    unsigned tmp = get_temp_index(emit);
12243    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
12244    struct tgsi_full_src_register tmp_src_x =
12245       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
12246    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
12247    struct tgsi_full_src_register color_src =
12248       make_src_temp_reg(fs_color_tmp_index);
12249    struct tgsi_full_src_register color_src_w =
12250       scalar_src(&color_src, TGSI_SWIZZLE_W);
12251    struct tgsi_full_src_register ref_src =
12252       make_src_immediate_reg(emit->fs.alpha_ref_index);
12253    struct tgsi_full_dst_register color_dst =
12254       make_dst_output_reg(emit->fs.color_out_index[0]);
12255 
12256    assert(emit->unit == PIPE_SHADER_FRAGMENT);
12257 
12258    /* dst = src0 'alpha_func' src1 */
12259    emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst,
12260                    &color_src_w, &ref_src);
12261 
12262    /* DISCARD if dst.x == 0 */
12263    begin_emit_instruction(emit);
12264    emit_discard_opcode(emit, FALSE);  /* discard if src0.x is zero */
12265    emit_src_register(emit, &tmp_src_x);
12266    end_emit_instruction(emit);
12267 
12268    /* If we don't need to broadcast the color below, emit the final color here.
12269     */
12270    if (emit->key.fs.write_color0_to_n_cbufs <= 1) {
12271       /* MOV output.color, tempcolor */
12272       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src);
12273    }
12274 
12275    free_temp_indexes(emit);
12276 }
12277 
12278 
12279 /**
12280  * Emit instructions for writing a single color output to multiple
12281  * color buffers.
12282  * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or
12283  * when key.fs.white_fragments is true).
12284  * property is set and the number of render targets is greater than one.
12285  * \param fs_color_tmp_index  index of the temp register that holds the
12286  *                            color to broadcast.
12287  */
12288 static void
emit_broadcast_color_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)12289 emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit,
12290                                  unsigned fs_color_tmp_index)
12291 {
12292    const unsigned n = emit->key.fs.write_color0_to_n_cbufs;
12293    unsigned i;
12294    struct tgsi_full_src_register color_src;
12295 
12296    if (emit->key.fs.white_fragments) {
12297       /* set all color outputs to white */
12298       color_src = make_immediate_reg_float(emit, 1.0f);
12299    }
12300    else {
12301       /* set all color outputs to TEMP[fs_color_tmp_index] */
12302       assert(fs_color_tmp_index != INVALID_INDEX);
12303       color_src = make_src_temp_reg(fs_color_tmp_index);
12304    }
12305 
12306    assert(emit->unit == PIPE_SHADER_FRAGMENT);
12307 
12308    for (i = 0; i < n; i++) {
12309       unsigned output_reg = emit->fs.color_out_index[i];
12310       struct tgsi_full_dst_register color_dst =
12311          make_dst_output_reg(output_reg);
12312 
12313       /* Fill in this semantic here since we'll use it later in
12314        * emit_dst_register().
12315        */
12316       emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR;
12317 
12318       /* MOV output.color[i], tempcolor */
12319       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src);
12320    }
12321 }
12322 
12323 
12324 /**
12325  * Emit extra helper code after the original shader code, but before the
12326  * last END/RET instruction.
12327  * For vertex shaders this means emitting the extra code to apply the
12328  * prescale scale/translation.
12329  */
12330 static boolean
emit_post_helpers(struct svga_shader_emitter_v10 * emit)12331 emit_post_helpers(struct svga_shader_emitter_v10 *emit)
12332 {
12333    if (emit->unit == PIPE_SHADER_VERTEX) {
12334       emit_vertex_instructions(emit);
12335    }
12336    else if (emit->unit == PIPE_SHADER_FRAGMENT) {
12337       const unsigned fs_color_tmp_index = emit->fs.color_tmp_index;
12338 
12339       assert(!(emit->key.fs.white_fragments &&
12340                emit->key.fs.write_color0_to_n_cbufs == 0));
12341 
12342       /* We no longer want emit_dst_register() to substitute the
12343        * temporary fragment color register for the real color output.
12344        */
12345       emit->fs.color_tmp_index = INVALID_INDEX;
12346 
12347       if (emit->key.fs.alpha_to_one) {
12348          emit_alpha_to_one_instructions(emit, fs_color_tmp_index);
12349       }
12350       if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
12351          emit_alpha_test_instructions(emit, fs_color_tmp_index);
12352       }
12353       if (emit->key.fs.write_color0_to_n_cbufs > 1 ||
12354           emit->key.fs.white_fragments) {
12355          emit_broadcast_color_instructions(emit, fs_color_tmp_index);
12356       }
12357    }
12358    else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
12359       if (!emit->tcs.control_point_phase) {
12360          /* store the tessellation levels in the patch constant phase only */
12361          store_tesslevels(emit);
12362       }
12363       else {
12364          emit_clipping_instructions(emit);
12365       }
12366    }
12367    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
12368       emit_vertex_instructions(emit);
12369    }
12370 
12371    return TRUE;
12372 }
12373 
12374 
12375 /**
12376  * Reemit rawbuf instruction
12377  */
12378 static boolean
emit_rawbuf_instruction(struct svga_shader_emitter_v10 * emit,unsigned inst_number,const struct tgsi_full_instruction * inst)12379 emit_rawbuf_instruction(struct svga_shader_emitter_v10 *emit,
12380                         unsigned inst_number,
12381                         const struct tgsi_full_instruction *inst)
12382 {
12383    boolean ret;
12384 
12385    /* For all the rawbuf references in this instruction,
12386     * load the rawbuf reference and assign to the designated temporary.
12387     * Then reeemit the instruction.
12388     */
12389    emit->reemit_rawbuf_instruction = REEMIT_IN_PROGRESS;
12390 
12391    unsigned offset_tmp = get_temp_index(emit);
12392    struct tgsi_full_dst_register offset_dst = make_dst_temp_reg(offset_tmp);
12393    struct tgsi_full_src_register offset_src = make_src_temp_reg(offset_tmp);
12394    struct tgsi_full_src_register four = make_immediate_reg_int(emit, 4);
12395 
12396    for (unsigned i = 0; i < emit->raw_buf_cur_tmp_index; i++) {
12397       struct tgsi_full_src_register element_src;
12398 
12399       /* First get the element index register. */
12400 
12401       if (emit->raw_buf_tmp[i].indirect) {
12402          unsigned tmp = get_temp_index(emit);
12403          struct tgsi_full_dst_register element_dst = make_dst_temp_reg(tmp);
12404          struct tgsi_full_src_register element_index =
12405             make_src_temp_reg(emit->raw_buf_tmp[i].element_index);
12406          struct tgsi_full_src_register element_rel =
12407             make_immediate_reg_int(emit, emit->raw_buf_tmp[i].element_rel);
12408 
12409          element_src = make_src_temp_reg(tmp);
12410          element_src = scalar_src(&element_src, TGSI_SWIZZLE_X);
12411          element_dst = writemask_dst(&element_dst, TGSI_WRITEMASK_X);
12412 
12413          /* element index from the indirect register */
12414          element_index = make_src_temp_reg(emit->raw_buf_tmp[i].element_index);
12415          element_index = scalar_src(&element_index, TGSI_SWIZZLE_X);
12416 
12417          /* IADD element_src element_index element_index_relative */
12418          emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &element_dst,
12419                               &element_index, &element_rel);
12420       }
12421       else {
12422          element_src =
12423             make_immediate_reg_int(emit, emit->raw_buf_tmp[i].element_index);
12424       }
12425 
12426       /* byte offset = element index << 4 */
12427       emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &offset_dst,
12428                            &element_src, &four);
12429 
12430       struct tgsi_full_dst_register dst_tmp =
12431          make_dst_temp_reg(i + emit->raw_buf_tmp_index);
12432 
12433       /* LD_RAW tmp, rawbuf byte offset, rawbuf */
12434 
12435       begin_emit_instruction(emit);
12436       emit_opcode(emit, VGPU10_OPCODE_LD_RAW, FALSE);
12437       emit_dst_register(emit, &dst_tmp);
12438 
12439       struct tgsi_full_src_register offset_x =
12440             scalar_src(&offset_src, TGSI_SWIZZLE_X);
12441       emit_src_register(emit, &offset_x);
12442 
12443       emit_resource_register(emit,
12444          emit->raw_buf_tmp[i].buffer_index + emit->raw_buf_srv_start_index);
12445       end_emit_instruction(emit);
12446    }
12447 
12448    emit->raw_buf_cur_tmp_index = 0;
12449 
12450    ret = emit_vgpu10_instruction(emit, inst_number, inst);
12451 
12452    /* reset raw buf state */
12453    emit->raw_buf_cur_tmp_index = 0;
12454    emit->reemit_rawbuf_instruction = REEMIT_FALSE;
12455 
12456    free_temp_indexes(emit);
12457 
12458    return ret;
12459 }
12460 
12461 
12462 /**
12463  * Translate the TGSI tokens into VGPU10 tokens.
12464  */
12465 static boolean
emit_vgpu10_instructions(struct svga_shader_emitter_v10 * emit,const struct tgsi_token * tokens)12466 emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit,
12467                          const struct tgsi_token *tokens)
12468 {
12469    struct tgsi_parse_context parse;
12470    boolean ret = TRUE;
12471    boolean pre_helpers_emitted = FALSE;
12472    unsigned inst_number = 0;
12473 
12474    tgsi_parse_init(&parse, tokens);
12475 
12476    while (!tgsi_parse_end_of_tokens(&parse)) {
12477 
12478       /* Save the current tgsi token starting position */
12479       emit->cur_tgsi_token = parse.Position;
12480 
12481       tgsi_parse_token(&parse);
12482 
12483       switch (parse.FullToken.Token.Type) {
12484       case TGSI_TOKEN_TYPE_IMMEDIATE:
12485          ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate);
12486          if (!ret)
12487             goto done;
12488          break;
12489 
12490       case TGSI_TOKEN_TYPE_DECLARATION:
12491          ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration);
12492          if (!ret)
12493             goto done;
12494          break;
12495 
12496       case TGSI_TOKEN_TYPE_INSTRUCTION:
12497          if (!pre_helpers_emitted) {
12498             ret = emit_pre_helpers(emit);
12499             if (!ret)
12500                goto done;
12501             pre_helpers_emitted = TRUE;
12502          }
12503          ret = emit_vgpu10_instruction(emit, inst_number++,
12504                                        &parse.FullToken.FullInstruction);
12505 
12506          /* Usually this applies to TCS only. If shader is reading control
12507           * point outputs in control point phase, we should reemit all
12508           * instructions which are writting into control point output in
12509           * control phase to store results into temporaries.
12510           */
12511          if (emit->reemit_instruction) {
12512             assert(emit->unit == PIPE_SHADER_TESS_CTRL);
12513             ret = emit_vgpu10_instruction(emit, inst_number,
12514                                           &parse.FullToken.FullInstruction);
12515          }
12516          else if (emit->initialize_temp_index != INVALID_INDEX) {
12517             emit_initialize_temp_instruction(emit);
12518             emit->initialize_temp_index = INVALID_INDEX;
12519             ret = emit_vgpu10_instruction(emit, inst_number - 1,
12520                                           &parse.FullToken.FullInstruction);
12521          }
12522          else if (emit->reemit_rawbuf_instruction) {
12523             ret = emit_rawbuf_instruction(emit, inst_number - 1,
12524                                           &parse.FullToken.FullInstruction);
12525          }
12526 
12527          if (!ret)
12528             goto done;
12529          break;
12530 
12531       case TGSI_TOKEN_TYPE_PROPERTY:
12532          ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty);
12533          if (!ret)
12534             goto done;
12535          break;
12536 
12537       default:
12538          break;
12539       }
12540    }
12541 
12542    if (emit->unit == PIPE_SHADER_TESS_CTRL) {
12543       ret = emit_hull_shader_patch_constant_phase(emit, &parse);
12544    }
12545 
12546 done:
12547    tgsi_parse_free(&parse);
12548    return ret;
12549 }
12550 
12551 
12552 /**
12553  * Emit the first VGPU10 shader tokens.
12554  */
12555 static boolean
emit_vgpu10_header(struct svga_shader_emitter_v10 * emit)12556 emit_vgpu10_header(struct svga_shader_emitter_v10 *emit)
12557 {
12558    VGPU10ProgramToken ptoken;
12559 
12560    /* First token: VGPU10ProgramToken  (version info, program type (VS,GS,PS)) */
12561 
12562    /* Maximum supported shader version is 50 */
12563    unsigned version = MIN2(emit->version, 50);
12564 
12565    ptoken.value = 0; /* init whole token to zero */
12566    ptoken.majorVersion = version / 10;
12567    ptoken.minorVersion = version % 10;
12568    ptoken.programType = translate_shader_type(emit->unit);
12569    if (!emit_dword(emit, ptoken.value))
12570       return FALSE;
12571 
12572    /* Second token: total length of shader, in tokens.  We can't fill this
12573     * in until we're all done.  Emit zero for now.
12574     */
12575    if (!emit_dword(emit, 0))
12576       return FALSE;
12577 
12578    if (emit->version >= 50) {
12579       VGPU10OpcodeToken0 token;
12580 
12581       if (emit->unit == PIPE_SHADER_TESS_CTRL) {
12582          /* For hull shader, we need to start the declarations phase first before
12583           * emitting any declarations including the global flags.
12584           */
12585          token.value = 0;
12586          token.opcodeType = VGPU10_OPCODE_HS_DECLS;
12587          begin_emit_instruction(emit);
12588          emit_dword(emit, token.value);
12589          end_emit_instruction(emit);
12590       }
12591 
12592       /* Emit global flags */
12593       token.value = 0;    /* init whole token to zero */
12594       token.opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
12595       token.enableDoublePrecisionFloatOps = 1;  /* set bit */
12596       token.instructionLength = 1;
12597       if (!emit_dword(emit, token.value))
12598          return FALSE;
12599    }
12600 
12601    if (emit->version >= 40) {
12602       VGPU10OpcodeToken0 token;
12603 
12604       /* Reserved for global flag such as refactoringAllowed.
12605        * If the shader does not use the precise qualifier, we will set the
12606        * refactoringAllowed global flag; otherwise, we will leave the reserved
12607        * token to NOP.
12608        */
12609       emit->reserved_token = (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0);
12610       token.value = 0;
12611       token.opcodeType = VGPU10_OPCODE_NOP;
12612       token.instructionLength = 1;
12613       if (!emit_dword(emit, token.value))
12614          return FALSE;
12615    }
12616 
12617    return TRUE;
12618 }
12619 
12620 
12621 static boolean
emit_vgpu10_tail(struct svga_shader_emitter_v10 * emit)12622 emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit)
12623 {
12624    VGPU10ProgramToken *tokens;
12625 
12626    /* Replace the second token with total shader length */
12627    tokens = (VGPU10ProgramToken *) emit->buf;
12628    tokens[1].value = emit_get_num_tokens(emit);
12629 
12630    if (emit->version >= 40 && !emit->uses_precise_qualifier) {
12631       /* Replace the reserved token with the RefactoringAllowed global flag */
12632       VGPU10OpcodeToken0 *ptoken;
12633 
12634       ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token];
12635       assert(ptoken->opcodeType == VGPU10_OPCODE_NOP);
12636       ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
12637       ptoken->refactoringAllowed = 1;
12638    }
12639 
12640    if (emit->version >= 50 && emit->fs.forceEarlyDepthStencil) {
12641       /* Replace the reserved token with the forceEarlyDepthStencil  global flag */
12642       VGPU10OpcodeToken0 *ptoken;
12643 
12644       ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token];
12645       ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
12646       ptoken->forceEarlyDepthStencil = 1;
12647    }
12648 
12649    return TRUE;
12650 }
12651 
12652 
12653 /**
12654  * Modify the FS to read the BCOLORs and use the FACE register
12655  * to choose between the front/back colors.
12656  */
12657 static const struct tgsi_token *
transform_fs_twoside(const struct tgsi_token * tokens)12658 transform_fs_twoside(const struct tgsi_token *tokens)
12659 {
12660    if (0) {
12661       debug_printf("Before tgsi_add_two_side ------------------\n");
12662       tgsi_dump(tokens,0);
12663    }
12664    tokens = tgsi_add_two_side(tokens);
12665    if (0) {
12666       debug_printf("After tgsi_add_two_side ------------------\n");
12667       tgsi_dump(tokens, 0);
12668    }
12669    return tokens;
12670 }
12671 
12672 
12673 /**
12674  * Modify the FS to do polygon stipple.
12675  */
12676 static const struct tgsi_token *
transform_fs_pstipple(struct svga_shader_emitter_v10 * emit,const struct tgsi_token * tokens)12677 transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
12678                       const struct tgsi_token *tokens)
12679 {
12680    const struct tgsi_token *new_tokens;
12681    unsigned unit;
12682 
12683    if (0) {
12684       debug_printf("Before pstipple ------------------\n");
12685       tgsi_dump(tokens,0);
12686    }
12687 
12688    new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
12689                                                      TGSI_FILE_INPUT);
12690 
12691    emit->fs.pstipple_sampler_unit = unit;
12692 
12693    /* The new sampler state is appended to the end of the samplers list */
12694    emit->fs.pstipple_sampler_state_index = emit->key.num_samplers++;
12695 
12696    /* Setup texture state for stipple */
12697    emit->sampler_target[unit] = TGSI_TEXTURE_2D;
12698    emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
12699    emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
12700    emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
12701    emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
12702    emit->key.tex[unit].target = PIPE_TEXTURE_2D;
12703    emit->key.tex[unit].sampler_index = emit->fs.pstipple_sampler_state_index;
12704 
12705    if (0) {
12706       debug_printf("After pstipple ------------------\n");
12707       tgsi_dump(new_tokens, 0);
12708    }
12709 
12710    return new_tokens;
12711 }
12712 
12713 /**
12714  * Modify the FS to support anti-aliasing point.
12715  */
12716 static const struct tgsi_token *
transform_fs_aapoint(struct svga_context * svga,const struct tgsi_token * tokens,int aa_coord_index)12717 transform_fs_aapoint(struct svga_context *svga,
12718 		     const struct tgsi_token *tokens,
12719                      int aa_coord_index)
12720 {
12721    bool need_texcoord_semantic =
12722       svga->pipe.screen->get_param(svga->pipe.screen, PIPE_CAP_TGSI_TEXCOORD);
12723 
12724    if (0) {
12725       debug_printf("Before tgsi_add_aa_point ------------------\n");
12726       tgsi_dump(tokens,0);
12727    }
12728    tokens = tgsi_add_aa_point(tokens, aa_coord_index, need_texcoord_semantic);
12729    if (0) {
12730       debug_printf("After tgsi_add_aa_point ------------------\n");
12731       tgsi_dump(tokens, 0);
12732    }
12733    return tokens;
12734 }
12735 
12736 
12737 /**
12738  * A helper function to determine the shader in the previous stage and
12739  * then call the linker function to determine the input mapping for this
12740  * shader to match the output indices from the shader in the previous stage.
12741  */
12742 static void
compute_input_mapping(struct svga_context * svga,struct svga_shader_emitter_v10 * emit,enum pipe_shader_type unit)12743 compute_input_mapping(struct svga_context *svga,
12744                       struct svga_shader_emitter_v10 *emit,
12745                       enum pipe_shader_type unit)
12746 {
12747    struct svga_shader *prevShader = NULL;   /* shader in the previous stage */
12748 
12749    if (unit == PIPE_SHADER_FRAGMENT) {
12750       prevShader = svga->curr.gs ?
12751          &svga->curr.gs->base : (svga->curr.tes ?
12752          &svga->curr.tes->base : &svga->curr.vs->base);
12753    } else if (unit == PIPE_SHADER_GEOMETRY) {
12754       prevShader = svga->curr.tes ? &svga->curr.tes->base : &svga->curr.vs->base;
12755    } else if (unit == PIPE_SHADER_TESS_EVAL) {
12756       assert(svga->curr.tcs);
12757       prevShader = &svga->curr.tcs->base;
12758    } else if (unit == PIPE_SHADER_TESS_CTRL) {
12759       assert(svga->curr.vs);
12760       prevShader = &svga->curr.vs->base;
12761    }
12762 
12763    if (prevShader != NULL) {
12764       svga_link_shaders(&prevShader->tgsi_info, &emit->info, &emit->linkage);
12765       emit->prevShaderInfo = &prevShader->tgsi_info;
12766    }
12767    else {
12768       /**
12769        * Since vertex shader does not need to go through the linker to
12770        * establish the input map, we need to make sure the highest index
12771        * of input registers is set properly here.
12772        */
12773       emit->linkage.input_map_max = MAX2((int)emit->linkage.input_map_max,
12774                                          emit->info.file_max[TGSI_FILE_INPUT]);
12775    }
12776 }
12777 
12778 
12779 /**
12780  * Copies the shader signature info to the shader variant
12781  */
12782 static void
copy_shader_signature(struct svga_shader_signature * sgn,struct svga_shader_variant * variant)12783 copy_shader_signature(struct svga_shader_signature *sgn,
12784                       struct svga_shader_variant *variant)
12785 {
12786    SVGA3dDXShaderSignatureHeader *header = &sgn->header;
12787 
12788    /* Calculate the signature length */
12789    variant->signatureLen = sizeof(SVGA3dDXShaderSignatureHeader) +
12790                            (header->numInputSignatures +
12791                             header->numOutputSignatures +
12792                             header->numPatchConstantSignatures) *
12793                            sizeof(SVGA3dDXShaderSignatureEntry);
12794 
12795    /* Allocate buffer for the signature info */
12796    variant->signature =
12797       (SVGA3dDXShaderSignatureHeader *)CALLOC(1, variant->signatureLen);
12798 
12799    char *sgnBuf = (char *)variant->signature;
12800    unsigned sgnLen;
12801 
12802    /* Copy the signature info to the shader variant structure */
12803    memcpy(sgnBuf, &sgn->header, sizeof(SVGA3dDXShaderSignatureHeader));
12804    sgnBuf += sizeof(SVGA3dDXShaderSignatureHeader);
12805 
12806    if (header->numInputSignatures) {
12807       sgnLen =
12808          header->numInputSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
12809       memcpy(sgnBuf, &sgn->inputs[0], sgnLen);
12810       sgnBuf += sgnLen;
12811    }
12812 
12813    if (header->numOutputSignatures) {
12814       sgnLen =
12815          header->numOutputSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
12816       memcpy(sgnBuf, &sgn->outputs[0], sgnLen);
12817       sgnBuf += sgnLen;
12818    }
12819 
12820    if (header->numPatchConstantSignatures) {
12821       sgnLen =
12822          header->numPatchConstantSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
12823       memcpy(sgnBuf, &sgn->patchConstants[0], sgnLen);
12824    }
12825 }
12826 
12827 
12828 /**
12829  * This is the main entrypoint for the TGSI -> VPGU10 translator.
12830  */
12831 struct svga_shader_variant *
svga_tgsi_vgpu10_translate(struct svga_context * svga,const struct svga_shader * shader,const struct svga_compile_key * key,enum pipe_shader_type unit)12832 svga_tgsi_vgpu10_translate(struct svga_context *svga,
12833                            const struct svga_shader *shader,
12834                            const struct svga_compile_key *key,
12835                            enum pipe_shader_type unit)
12836 {
12837    struct svga_screen *svgascreen = svga_screen(svga->pipe.screen);
12838    struct svga_shader_variant *variant = NULL;
12839    struct svga_shader_emitter_v10 *emit;
12840    const struct tgsi_token *tokens = shader->tokens;
12841 
12842    (void) make_immediate_reg_double;   /* unused at this time */
12843 
12844    assert(unit == PIPE_SHADER_VERTEX ||
12845           unit == PIPE_SHADER_GEOMETRY ||
12846           unit == PIPE_SHADER_FRAGMENT ||
12847           unit == PIPE_SHADER_TESS_CTRL ||
12848           unit == PIPE_SHADER_TESS_EVAL ||
12849           unit == PIPE_SHADER_COMPUTE);
12850 
12851    /* These two flags cannot be used together */
12852    assert(key->vs.need_prescale + key->vs.undo_viewport <= 1);
12853 
12854    SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_TGSIVGPU10TRANSLATE);
12855    /*
12856     * Setup the code emitter
12857     */
12858    emit = alloc_emitter();
12859    if (!emit)
12860       goto done;
12861 
12862    emit->unit = unit;
12863    if (svga_have_gl43(svga)) {
12864       emit->version = 51;
12865    } else if (svga_have_sm5(svga)) {
12866       emit->version = 50;
12867    } else if (svga_have_sm4_1(svga)) {
12868       emit->version = 41;
12869    } else {
12870       emit->version = 40;
12871    }
12872 
12873    emit->use_sampler_state_mapping = emit->key.sampler_state_mapping;
12874 
12875    emit->signature.header.headerVersion = SVGADX_SIGNATURE_HEADER_VERSION_0;
12876 
12877    emit->key = *key;
12878 
12879    emit->vposition.need_prescale = (emit->key.vs.need_prescale ||
12880                                     emit->key.gs.need_prescale ||
12881                                     emit->key.tes.need_prescale);
12882 
12883    /* Determine how many prescale factors in the constant buffer */
12884    emit->vposition.num_prescale = 1;
12885    if (emit->vposition.need_prescale && emit->key.gs.writes_viewport_index) {
12886       assert(emit->unit == PIPE_SHADER_GEOMETRY);
12887       emit->vposition.num_prescale = emit->key.gs.num_prescale;
12888    }
12889 
12890    emit->vposition.tmp_index = INVALID_INDEX;
12891    emit->vposition.so_index = INVALID_INDEX;
12892    emit->vposition.out_index = INVALID_INDEX;
12893 
12894    emit->vs.vertex_id_sys_index = INVALID_INDEX;
12895    emit->vs.vertex_id_tmp_index = INVALID_INDEX;
12896    emit->vs.vertex_id_bias_index = INVALID_INDEX;
12897 
12898    emit->fs.color_tmp_index = INVALID_INDEX;
12899    emit->fs.face_input_index = INVALID_INDEX;
12900    emit->fs.fragcoord_input_index = INVALID_INDEX;
12901    emit->fs.sample_id_sys_index = INVALID_INDEX;
12902    emit->fs.sample_pos_sys_index = INVALID_INDEX;
12903    emit->fs.sample_mask_in_sys_index = INVALID_INDEX;
12904    emit->fs.layer_input_index = INVALID_INDEX;
12905    emit->fs.layer_imm_index = INVALID_INDEX;
12906 
12907    emit->gs.prim_id_index = INVALID_INDEX;
12908    emit->gs.invocation_id_sys_index = INVALID_INDEX;
12909    emit->gs.viewport_index_out_index = INVALID_INDEX;
12910    emit->gs.viewport_index_tmp_index = INVALID_INDEX;
12911 
12912    emit->tcs.vertices_per_patch_index = INVALID_INDEX;
12913    emit->tcs.invocation_id_sys_index = INVALID_INDEX;
12914    emit->tcs.control_point_input_index = INVALID_INDEX;
12915    emit->tcs.control_point_addr_index = INVALID_INDEX;
12916    emit->tcs.control_point_out_index = INVALID_INDEX;
12917    emit->tcs.control_point_tmp_index = INVALID_INDEX;
12918    emit->tcs.control_point_out_count = 0;
12919    emit->tcs.inner.out_index = INVALID_INDEX;
12920    emit->tcs.inner.temp_index = INVALID_INDEX;
12921    emit->tcs.inner.tgsi_index = INVALID_INDEX;
12922    emit->tcs.outer.out_index = INVALID_INDEX;
12923    emit->tcs.outer.temp_index = INVALID_INDEX;
12924    emit->tcs.outer.tgsi_index = INVALID_INDEX;
12925    emit->tcs.patch_generic_out_count = 0;
12926    emit->tcs.patch_generic_out_index = INVALID_INDEX;
12927    emit->tcs.patch_generic_tmp_index = INVALID_INDEX;
12928    emit->tcs.prim_id_index = INVALID_INDEX;
12929 
12930    emit->tes.tesscoord_sys_index = INVALID_INDEX;
12931    emit->tes.inner.in_index = INVALID_INDEX;
12932    emit->tes.inner.temp_index = INVALID_INDEX;
12933    emit->tes.inner.tgsi_index = INVALID_INDEX;
12934    emit->tes.outer.in_index = INVALID_INDEX;
12935    emit->tes.outer.temp_index = INVALID_INDEX;
12936    emit->tes.outer.tgsi_index = INVALID_INDEX;
12937    emit->tes.prim_id_index = INVALID_INDEX;
12938 
12939    emit->cs.thread_id_index = INVALID_INDEX;
12940    emit->cs.block_id_index = INVALID_INDEX;
12941    emit->cs.grid_size.tgsi_index = INVALID_INDEX;
12942    emit->cs.grid_size.imm_index = INVALID_INDEX;
12943    emit->cs.block_width = 1;
12944    emit->cs.block_height = 1;
12945    emit->cs.block_depth = 1;
12946 
12947    emit->clip_dist_out_index = INVALID_INDEX;
12948    emit->clip_dist_tmp_index = INVALID_INDEX;
12949    emit->clip_dist_so_index = INVALID_INDEX;
12950    emit->clip_vertex_out_index = INVALID_INDEX;
12951    emit->clip_vertex_tmp_index = INVALID_INDEX;
12952    emit->svga_debug_callback = svga->debug.callback;
12953 
12954    emit->index_range.start_index = INVALID_INDEX;
12955    emit->index_range.count = 0;
12956    emit->index_range.required = FALSE;
12957    emit->index_range.operandType = VGPU10_NUM_OPERANDS;
12958    emit->index_range.dim = 0;
12959    emit->index_range.size = 0;
12960 
12961    emit->current_loop_depth = 0;
12962 
12963    emit->initialize_temp_index = INVALID_INDEX;
12964    emit->image_size_index = INVALID_INDEX;
12965 
12966    emit->max_vs_inputs  = svgascreen->max_vs_inputs;
12967    emit->max_vs_outputs = svgascreen->max_vs_outputs;
12968    emit->max_gs_inputs  = svgascreen->max_gs_inputs;
12969 
12970    if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) {
12971       emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS;
12972    }
12973 
12974    if (unit == PIPE_SHADER_FRAGMENT) {
12975       if (key->fs.light_twoside) {
12976          tokens = transform_fs_twoside(tokens);
12977       }
12978       if (key->fs.pstipple) {
12979          const struct tgsi_token *new_tokens =
12980             transform_fs_pstipple(emit, tokens);
12981          if (tokens != shader->tokens) {
12982             /* free the two-sided shader tokens */
12983             tgsi_free_tokens(tokens);
12984          }
12985          tokens = new_tokens;
12986       }
12987       if (key->fs.aa_point) {
12988          tokens = transform_fs_aapoint(svga, tokens,
12989 			               key->fs.aa_point_coord_index);
12990       }
12991    }
12992 
12993    if (SVGA_DEBUG & DEBUG_TGSI) {
12994       debug_printf("#####################################\n");
12995       debug_printf("### TGSI Shader %u\n", shader->id);
12996       tgsi_dump(tokens, 0);
12997    }
12998 
12999    /**
13000     * Rescan the header if the token string is different from the one
13001     * included in the shader; otherwise, the header info is already up-to-date
13002     */
13003    if (tokens != shader->tokens) {
13004       tgsi_scan_shader(tokens, &emit->info);
13005    } else {
13006       emit->info = shader->tgsi_info;
13007    }
13008 
13009    emit->num_outputs = emit->info.num_outputs;
13010 
13011    /**
13012     * Compute input mapping to match the outputs from shader
13013     * in the previous stage
13014     */
13015    compute_input_mapping(svga, emit, unit);
13016 
13017    determine_clipping_mode(emit);
13018 
13019    if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX ||
13020        unit == PIPE_SHADER_TESS_CTRL || unit == PIPE_SHADER_TESS_EVAL) {
13021       if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) {
13022          /* if there is stream output declarations associated
13023           * with this shader or the shader writes to ClipDistance
13024           * then reserve extra registers for the non-adjusted vertex position
13025           * and the ClipDistance shadow copy.
13026           */
13027          emit->vposition.so_index = emit->num_outputs++;
13028 
13029          if (emit->clip_mode == CLIP_DISTANCE) {
13030             emit->clip_dist_so_index = emit->num_outputs++;
13031             if (emit->info.num_written_clipdistance > 4)
13032                emit->num_outputs++;
13033          }
13034       }
13035    }
13036 
13037    /* Determine if constbuf to rawbuf translation is needed */
13038    if (emit->info.const_buffers_declared) {
13039       emit->raw_bufs = emit->key.raw_buffers;
13040       emit->raw_buf_srv_start_index = emit->key.srv_raw_buf_index;
13041    }
13042 
13043    /*
13044     * Do actual shader translation.
13045     */
13046    if (!emit_vgpu10_header(emit)) {
13047       debug_printf("svga: emit VGPU10 header failed\n");
13048       goto cleanup;
13049    }
13050 
13051    if (!emit_vgpu10_instructions(emit, tokens)) {
13052       debug_printf("svga: emit VGPU10 instructions failed\n");
13053       goto cleanup;
13054    }
13055 
13056    if (!emit_vgpu10_tail(emit)) {
13057       debug_printf("svga: emit VGPU10 tail failed\n");
13058       goto cleanup;
13059    }
13060 
13061    if (emit->register_overflow) {
13062       goto cleanup;
13063    }
13064 
13065    /*
13066     * Create, initialize the 'variant' object.
13067     */
13068    variant = svga_new_shader_variant(svga, unit);
13069    if (!variant)
13070       goto cleanup;
13071 
13072    variant->shader = shader;
13073    variant->nr_tokens = emit_get_num_tokens(emit);
13074    variant->tokens = (const unsigned *)emit->buf;
13075 
13076    /* Copy shader signature info to the shader variant */
13077    if (svga_have_sm5(svga)) {
13078       copy_shader_signature(&emit->signature, variant);
13079    }
13080 
13081    emit->buf = NULL;  /* buffer is no longer owed by emitter context */
13082    memcpy(&variant->key, key, sizeof(*key));
13083    variant->id = UTIL_BITMASK_INVALID_INDEX;
13084 
13085    /* The extra constant starting offset starts with the number of
13086     * shader constants declared in the shader.
13087     */
13088    variant->extra_const_start = emit->num_shader_consts[0];
13089    if (key->gs.wide_point) {
13090       /**
13091        * The extra constant added in the transformed shader
13092        * for inverse viewport scale is to be supplied by the driver.
13093        * So the extra constant starting offset needs to be reduced by 1.
13094        */
13095       assert(variant->extra_const_start > 0);
13096       variant->extra_const_start--;
13097    }
13098 
13099    if (unit == PIPE_SHADER_FRAGMENT) {
13100       struct svga_fs_variant *fs_variant = svga_fs_variant(variant);
13101 
13102       fs_variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
13103       fs_variant->pstipple_sampler_state_index =
13104          emit->fs.pstipple_sampler_state_index;
13105 
13106       /* If there was exactly one write to a fragment shader output register
13107        * and it came from a constant buffer, we know all fragments will have
13108        * the same color (except for blending).
13109        */
13110       fs_variant->constant_color_output =
13111          emit->constant_color_output && emit->num_output_writes == 1;
13112 
13113       /** keep track in the variant if flat interpolation is used
13114        *  for any of the varyings.
13115        */
13116       fs_variant->uses_flat_interp = emit->uses_flat_interp;
13117 
13118       fs_variant->fs_shadow_compare_units = emit->shadow_compare_units;
13119    }
13120    else if (unit == PIPE_SHADER_TESS_EVAL) {
13121       struct svga_tes_variant *tes_variant = svga_tes_variant(variant);
13122 
13123       /* Keep track in the tes variant some of the layout parameters.
13124        * These parameters will be referenced by the tcs to emit
13125        * the necessary declarations for the hull shader.
13126        */
13127       tes_variant->prim_mode = emit->tes.prim_mode;
13128       tes_variant->spacing = emit->tes.spacing;
13129       tes_variant->vertices_order_cw = emit->tes.vertices_order_cw;
13130       tes_variant->point_mode = emit->tes.point_mode;
13131    }
13132 
13133 
13134    if (tokens != shader->tokens) {
13135       tgsi_free_tokens(tokens);
13136    }
13137 
13138 cleanup:
13139    free_emitter(emit);
13140 
13141 done:
13142    SVGA_STATS_TIME_POP(svga_sws(svga));
13143    return variant;
13144 }
13145