• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**********************************************************
2  * Copyright 1998-2013 VMware, Inc.  All rights reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person
5  * obtaining a copy of this software and associated documentation
6  * files (the "Software"), to deal in the Software without
7  * restriction, including without limitation the rights to use, copy,
8  * modify, merge, publish, distribute, sublicense, and/or sell copies
9  * of the Software, and to permit persons to whom the Software is
10  * furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be
13  * included in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  **********************************************************/
25 
26 /**
27  * @file svga_tgsi_vgpu10.c
28  *
29  * TGSI -> VGPU10 shader translation.
30  *
31  * \author Mingcheng Chen
32  * \author Brian Paul
33  */
34 
35 #include "pipe/p_compiler.h"
36 #include "pipe/p_shader_tokens.h"
37 #include "pipe/p_defines.h"
38 #include "tgsi/tgsi_build.h"
39 #include "tgsi/tgsi_dump.h"
40 #include "tgsi/tgsi_info.h"
41 #include "tgsi/tgsi_parse.h"
42 #include "tgsi/tgsi_scan.h"
43 #include "tgsi/tgsi_strings.h"
44 #include "tgsi/tgsi_two_side.h"
45 #include "tgsi/tgsi_aa_point.h"
46 #include "tgsi/tgsi_util.h"
47 #include "util/u_math.h"
48 #include "util/u_memory.h"
49 #include "util/u_bitmask.h"
50 #include "util/u_debug.h"
51 #include "util/u_pstipple.h"
52 
53 #include "svga_context.h"
54 #include "svga_debug.h"
55 #include "svga_link.h"
56 #include "svga_shader.h"
57 #include "svga_tgsi.h"
58 
59 #include "VGPU10ShaderTokens.h"
60 
61 
62 #define INVALID_INDEX 99999
63 #define MAX_INTERNAL_TEMPS 3
64 #define MAX_SYSTEM_VALUES 4
65 #define MAX_IMMEDIATE_COUNT \
66         (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4)
67 #define MAX_TEMP_ARRAYS 64  /* Enough? */
68 
69 
70 /**
71  * Clipping is complicated.  There's four different cases which we
72  * handle during VS/GS shader translation:
73  */
74 enum clipping_mode
75 {
76    CLIP_NONE,     /**< No clipping enabled */
77    CLIP_LEGACY,   /**< The shader has no clipping declarations or code but
78                    * one or more user-defined clip planes are enabled.  We
79                    * generate extra code to emit clip distances.
80                    */
81    CLIP_DISTANCE, /**< The shader already declares clip distance output
82                    * registers and has code to write to them.
83                    */
84    CLIP_VERTEX    /**< The shader declares a clip vertex output register and
85                   * has code that writes to the register.  We convert the
86                   * clipvertex position into one or more clip distances.
87                   */
88 };
89 
90 
91 /* Shader signature info */
92 struct svga_shader_signature
93 {
94    SVGA3dDXShaderSignatureHeader header;
95    SVGA3dDXShaderSignatureEntry inputs[PIPE_MAX_SHADER_INPUTS];
96    SVGA3dDXShaderSignatureEntry outputs[PIPE_MAX_SHADER_OUTPUTS];
97    SVGA3dDXShaderSignatureEntry patchConstants[PIPE_MAX_SHADER_OUTPUTS];
98 };
99 
100 static inline void
set_shader_signature_entry(SVGA3dDXShaderSignatureEntry * e,unsigned index,SVGA3dDXSignatureSemanticName sgnName,unsigned mask,SVGA3dDXSignatureRegisterComponentType compType,SVGA3dDXSignatureMinPrecision minPrecision)101 set_shader_signature_entry(SVGA3dDXShaderSignatureEntry *e,
102                            unsigned index,
103                            SVGA3dDXSignatureSemanticName sgnName,
104                            unsigned mask,
105                            SVGA3dDXSignatureRegisterComponentType compType,
106                            SVGA3dDXSignatureMinPrecision minPrecision)
107 {
108    e->registerIndex = index;
109    e->semanticName = sgnName;
110    e->mask = mask;
111    e->componentType = compType;
112    e->minPrecision = minPrecision;
113 };
114 
115 static const SVGA3dDXSignatureSemanticName
116 tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_COUNT] = {
117    SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION,
118    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
119    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
120    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
121    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
122    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
123    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
124    SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE,
125    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
126    SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID,
127    SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID,
128    SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID,
129    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
130    SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE,
131    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
132    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
133    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
134    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
135    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
136    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
137    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
138    SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX,
139    SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX,
140    SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX,
141    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
142    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
143    SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID,
144    SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID,
145    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
146    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
147    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
148    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
149    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
150    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
151    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
152    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
153    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
154    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
155    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
156    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
157    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
158    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
159    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
160    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED,
161    SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED
162 };
163 
164 
165 /**
166  * Map tgsi semantic name to SVGA signature semantic name
167  */
168 static inline SVGA3dDXSignatureSemanticName
map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name)169 map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name)
170 {
171    assert(name < TGSI_SEMANTIC_COUNT);
172 
173    /* Do a few asserts here to spot check the mapping */
174    assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_PRIMID] ==
175           SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
176    assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_VIEWPORT_INDEX] ==
177           SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX);
178    assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_INVOCATIONID] ==
179           SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID);
180 
181    return tgsi_semantic_to_sgn_name[name];
182 }
183 
184 
185 struct svga_shader_emitter_v10
186 {
187    /* The token output buffer */
188    unsigned size;
189    char *buf;
190    char *ptr;
191 
192    /* Information about the shader and state (does not change) */
193    struct svga_compile_key key;
194    struct tgsi_shader_info info;
195    unsigned unit;
196    unsigned version; /**< Either 40 or 41 at this time */
197 
198    unsigned cur_tgsi_token;     /**< current tgsi token position */
199    unsigned inst_start_token;
200    boolean discard_instruction; /**< throw away current instruction? */
201    boolean reemit_instruction;  /**< reemit current instruction */
202    boolean skip_instruction;    /**< skip current instruction */
203 
204    union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4];
205    double (*immediates_dbl)[2];
206    unsigned num_immediates;      /**< Number of immediates emitted */
207    unsigned common_immediate_pos[10];  /**< literals for common immediates */
208    unsigned num_common_immediates;
209    boolean immediates_emitted;
210 
211    unsigned num_outputs;      /**< include any extra outputs */
212                               /**  The first extra output is reserved for
213                                *   non-adjusted vertex position for
214                                *   stream output purpose
215                                */
216 
217    /* Temporary Registers */
218    unsigned num_shader_temps; /**< num of temps used by original shader */
219    unsigned internal_temp_count;  /**< currently allocated internal temps */
220    struct {
221       unsigned start, size;
222    } temp_arrays[MAX_TEMP_ARRAYS];
223    unsigned num_temp_arrays;
224 
225    /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */
226    struct {
227       unsigned arrayId, index;
228       boolean initialized;
229    } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */
230 
231    unsigned initialize_temp_index;
232 
233    /** Number of constants used by original shader for each constant buffer.
234     * The size should probably always match with that of svga_state.constbufs.
235     */
236    unsigned num_shader_consts[SVGA_MAX_CONST_BUFS];
237 
238    /* Samplers */
239    unsigned num_samplers;
240    boolean sampler_view[PIPE_MAX_SAMPLERS];  /**< True if sampler view exists*/
241    ubyte sampler_target[PIPE_MAX_SAMPLERS];  /**< TGSI_TEXTURE_x */
242    ubyte sampler_return_type[PIPE_MAX_SAMPLERS];  /**< TGSI_RETURN_TYPE_x */
243 
244    /* Index Range declaration */
245    struct {
246       unsigned start_index;
247       unsigned count;
248       boolean required;
249       unsigned operandType;
250       unsigned size;
251       unsigned dim;
252    } index_range;
253 
254    /* Address regs (really implemented with temps) */
255    unsigned num_address_regs;
256    unsigned address_reg_index[MAX_VGPU10_ADDR_REGS];
257 
258    /* Output register usage masks */
259    ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS];
260 
261    /* To map TGSI system value index to VGPU shader input indexes */
262    ubyte system_value_indexes[MAX_SYSTEM_VALUES];
263 
264    struct {
265       /* vertex position scale/translation */
266       unsigned out_index;  /**< the real position output reg */
267       unsigned tmp_index;  /**< the fake/temp position output reg */
268       unsigned so_index;   /**< the non-adjusted position output reg */
269       unsigned prescale_cbuf_index;  /* index to the const buf for prescale */
270       unsigned prescale_scale_index, prescale_trans_index;
271       unsigned num_prescale;      /* number of prescale factor in const buf */
272       unsigned viewport_index;
273       unsigned need_prescale:1;
274       unsigned have_prescale:1;
275    } vposition;
276 
277    /* For vertex shaders only */
278    struct {
279       /* viewport constant */
280       unsigned viewport_index;
281 
282       unsigned vertex_id_bias_index;
283       unsigned vertex_id_sys_index;
284       unsigned vertex_id_tmp_index;
285 
286       /* temp index of adjusted vertex attributes */
287       unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS];
288    } vs;
289 
290    /* For fragment shaders only */
291    struct {
292       unsigned color_out_index[PIPE_MAX_COLOR_BUFS];  /**< the real color output regs */
293       unsigned num_color_outputs;
294       unsigned color_tmp_index;  /**< fake/temp color output reg */
295       unsigned alpha_ref_index;  /**< immediate constant for alpha ref */
296 
297       /* front-face */
298       unsigned face_input_index; /**< real fragment shader face reg (bool) */
299       unsigned face_tmp_index;   /**< temp face reg converted to -1 / +1 */
300 
301       unsigned pstipple_sampler_unit;
302 
303       unsigned fragcoord_input_index;  /**< real fragment position input reg */
304       unsigned fragcoord_tmp_index;    /**< 1/w modified position temp reg */
305 
306       unsigned sample_id_sys_index;  /**< TGSI index of sample id sys value */
307 
308       unsigned sample_pos_sys_index; /**< TGSI index of sample pos sys value */
309       unsigned sample_pos_tmp_index; /**< which temp reg has the sample pos */
310 
311       /** TGSI index of sample mask input sys value */
312       unsigned sample_mask_in_sys_index;
313 
314       /** Which texture units are doing shadow comparison in the FS code */
315       unsigned shadow_compare_units;
316 
317       /* layer */
318       unsigned layer_input_index;    /**< TGSI index of layer */
319       unsigned layer_imm_index;      /**< immediate for default layer 0 */
320    } fs;
321 
322    /* For geometry shaders only */
323    struct {
324       VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */
325       VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */
326       unsigned input_size;       /**< size of input arrays */
327       unsigned prim_id_index;    /**< primitive id register index */
328       unsigned max_out_vertices; /**< maximum number of output vertices */
329       unsigned invocations;
330       unsigned invocation_id_sys_index;
331 
332       unsigned viewport_index_out_index;
333       unsigned viewport_index_tmp_index;
334    } gs;
335 
336    /* For tessellation control shaders only */
337    struct {
338       unsigned vertices_per_patch_index;     /**< vertices_per_patch system value index */
339       unsigned imm_index;                    /**< immediate for tcs */
340       unsigned invocation_id_sys_index;      /**< invocation id */
341       unsigned invocation_id_tmp_index;
342       unsigned instruction_token_pos;        /* token pos for the first instruction */
343       unsigned control_point_input_index;    /* control point input register index */
344       unsigned control_point_addr_index;     /* control point input address register */
345       unsigned control_point_out_index;      /* control point output register index */
346       unsigned control_point_tmp_index;      /* control point temporary register */
347       unsigned control_point_out_count;      /* control point output count */
348       boolean  control_point_phase;          /* true if in control point phase */
349       boolean  fork_phase_add_signature;     /* true if needs to add signature in fork phase */
350       unsigned patch_generic_out_count;      /* per-patch generic output count */
351       unsigned patch_generic_out_index;      /* per-patch generic output register index*/
352       unsigned patch_generic_tmp_index;      /* per-patch generic temporary register index*/
353       unsigned prim_id_index;                /* primitive id */
354       struct {
355          unsigned out_index;      /* real tessinner output register */
356          unsigned temp_index;     /* tessinner temp register */
357          unsigned tgsi_index;     /* tgsi tessinner output register */
358       } inner;
359       struct {
360          unsigned out_index;      /* real tessouter output register */
361          unsigned temp_index;     /* tessouter temp register */
362          unsigned tgsi_index;     /* tgsi tessouter output register */
363       } outer;
364    } tcs;
365 
366    /* For tessellation evaluation shaders only */
367    struct {
368       enum pipe_prim_type prim_mode;
369       enum pipe_tess_spacing spacing;
370       boolean vertices_order_cw;
371       boolean point_mode;
372       unsigned tesscoord_sys_index;
373       unsigned prim_id_index;                /* primitive id */
374       struct {
375          unsigned in_index;       /* real tessinner input register */
376          unsigned temp_index;     /* tessinner temp register */
377          unsigned tgsi_index;     /* tgsi tessinner input register */
378       } inner;
379       struct {
380          unsigned in_index;       /* real tessouter input register */
381          unsigned temp_index;     /* tessouter temp register */
382          unsigned tgsi_index;     /* tgsi tessouter input register */
383       } outer;
384    } tes;
385 
386    /* For vertex or geometry shaders */
387    enum clipping_mode clip_mode;
388    unsigned clip_dist_out_index; /**< clip distance output register index */
389    unsigned clip_dist_tmp_index; /**< clip distance temporary register */
390    unsigned clip_dist_so_index;  /**< clip distance shadow copy */
391 
392    /** Index of temporary holding the clipvertex coordinate */
393    unsigned clip_vertex_out_index; /**< clip vertex output register index */
394    unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */
395 
396    /* user clip plane constant slot indexes */
397    unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES];
398 
399    unsigned num_output_writes;
400    boolean constant_color_output;
401 
402    boolean uses_flat_interp;
403 
404    unsigned reserved_token;        /* index to the reserved token */
405    boolean uses_precise_qualifier;
406 
407    /* For all shaders: const reg index for RECT coord scaling */
408    unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS];
409 
410    /* For all shaders: const reg index for texture buffer size */
411    unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS];
412 
413    /* VS/TCS/TES/GS/FS Linkage info */
414    struct shader_linkage linkage;
415    struct tgsi_shader_info *prevShaderInfo;
416 
417    /* Shader signature */
418    struct svga_shader_signature signature;
419 
420    bool register_overflow;  /**< Set if we exceed a VGPU10 register limit */
421 
422    /* For pipe_debug_message */
423    struct pipe_debug_callback svga_debug_callback;
424 
425    /* current loop depth in shader */
426    unsigned current_loop_depth;
427 };
428 
429 
430 static void emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit);
431 static void emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit);
432 static boolean emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit);
433 static boolean emit_constant_declaration(struct svga_shader_emitter_v10 *emit);
434 static boolean emit_sampler_declarations(struct svga_shader_emitter_v10 *emit);
435 static boolean emit_resource_declarations(struct svga_shader_emitter_v10 *emit);
436 static boolean emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit);
437 static boolean emit_index_range_declaration(struct svga_shader_emitter_v10 *emit);
438 static void emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit);
439 
440 static boolean
441 emit_post_helpers(struct svga_shader_emitter_v10 *emit);
442 
443 static boolean
444 emit_vertex(struct svga_shader_emitter_v10 *emit,
445             const struct tgsi_full_instruction *inst);
446 
447 static boolean
448 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
449                         unsigned inst_number,
450                         const struct tgsi_full_instruction *inst);
451 
452 static void
453 emit_input_declaration(struct svga_shader_emitter_v10 *emit,
454                        unsigned opcodeType, unsigned operandType,
455                        unsigned dim, unsigned index, unsigned size,
456                        unsigned name, unsigned numComp,
457                        unsigned selMode, unsigned usageMask,
458                        unsigned interpMode,
459                        boolean addSignature,
460                        SVGA3dDXSignatureSemanticName sgnName);
461 
462 static void
463 create_temp_array(struct svga_shader_emitter_v10 *emit,
464                   unsigned arrayID, unsigned first, unsigned count,
465                   unsigned startIndex);
466 
467 static char err_buf[128];
468 
469 static boolean
expand(struct svga_shader_emitter_v10 * emit)470 expand(struct svga_shader_emitter_v10 *emit)
471 {
472    char *new_buf;
473    unsigned newsize = emit->size * 2;
474 
475    if (emit->buf != err_buf)
476       new_buf = REALLOC(emit->buf, emit->size, newsize);
477    else
478       new_buf = NULL;
479 
480    if (!new_buf) {
481       emit->ptr = err_buf;
482       emit->buf = err_buf;
483       emit->size = sizeof(err_buf);
484       return FALSE;
485    }
486 
487    emit->size = newsize;
488    emit->ptr = new_buf + (emit->ptr - emit->buf);
489    emit->buf = new_buf;
490    return TRUE;
491 }
492 
493 /**
494  * Create and initialize a new svga_shader_emitter_v10 object.
495  */
496 static struct svga_shader_emitter_v10 *
alloc_emitter(void)497 alloc_emitter(void)
498 {
499    struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit));
500 
501    if (!emit)
502       return NULL;
503 
504    /* to initialize the output buffer */
505    emit->size = 512;
506    if (!expand(emit)) {
507       FREE(emit);
508       return NULL;
509    }
510    return emit;
511 }
512 
513 /**
514  * Free an svga_shader_emitter_v10 object.
515  */
516 static void
free_emitter(struct svga_shader_emitter_v10 * emit)517 free_emitter(struct svga_shader_emitter_v10 *emit)
518 {
519    assert(emit);
520    FREE(emit->buf);    /* will be NULL if translation succeeded */
521    FREE(emit);
522 }
523 
524 static inline boolean
reserve(struct svga_shader_emitter_v10 * emit,unsigned nr_dwords)525 reserve(struct svga_shader_emitter_v10 *emit,
526         unsigned nr_dwords)
527 {
528    while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) {
529       if (!expand(emit))
530          return FALSE;
531    }
532 
533    return TRUE;
534 }
535 
536 static boolean
emit_dword(struct svga_shader_emitter_v10 * emit,uint32 dword)537 emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword)
538 {
539    if (!reserve(emit, 1))
540       return FALSE;
541 
542    *(uint32 *)emit->ptr = dword;
543    emit->ptr += sizeof dword;
544    return TRUE;
545 }
546 
547 static boolean
emit_dwords(struct svga_shader_emitter_v10 * emit,const uint32 * dwords,unsigned nr)548 emit_dwords(struct svga_shader_emitter_v10 *emit,
549             const uint32 *dwords,
550             unsigned nr)
551 {
552    if (!reserve(emit, nr))
553       return FALSE;
554 
555    memcpy(emit->ptr, dwords, nr * sizeof *dwords);
556    emit->ptr += nr * sizeof *dwords;
557    return TRUE;
558 }
559 
560 /** Return the number of tokens in the emitter's buffer */
561 static unsigned
emit_get_num_tokens(const struct svga_shader_emitter_v10 * emit)562 emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit)
563 {
564    return (emit->ptr - emit->buf) / sizeof(unsigned);
565 }
566 
567 
568 /**
569  * Check for register overflow.  If we overflow we'll set an
570  * error flag.  This function can be called for register declarations
571  * or use as src/dst instruction operands.
572  * \param type  register type.  One of VGPU10_OPERAND_TYPE_x
573                 or VGPU10_OPCODE_DCL_x
574  * \param index  the register index
575  */
576 static void
check_register_index(struct svga_shader_emitter_v10 * emit,unsigned operandType,unsigned index)577 check_register_index(struct svga_shader_emitter_v10 *emit,
578                      unsigned operandType, unsigned index)
579 {
580    bool overflow_before = emit->register_overflow;
581 
582    switch (operandType) {
583    case VGPU10_OPERAND_TYPE_TEMP:
584    case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP:
585    case VGPU10_OPCODE_DCL_TEMPS:
586       if (index >= VGPU10_MAX_TEMPS) {
587          emit->register_overflow = TRUE;
588       }
589       break;
590    case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER:
591    case VGPU10_OPCODE_DCL_CONSTANT_BUFFER:
592       if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
593          emit->register_overflow = TRUE;
594       }
595       break;
596    case VGPU10_OPERAND_TYPE_INPUT:
597    case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID:
598    case VGPU10_OPCODE_DCL_INPUT:
599    case VGPU10_OPCODE_DCL_INPUT_SGV:
600    case VGPU10_OPCODE_DCL_INPUT_SIV:
601    case VGPU10_OPCODE_DCL_INPUT_PS:
602    case VGPU10_OPCODE_DCL_INPUT_PS_SGV:
603    case VGPU10_OPCODE_DCL_INPUT_PS_SIV:
604       if ((emit->unit == PIPE_SHADER_VERTEX &&
605            index >= VGPU10_MAX_VS_INPUTS) ||
606           (emit->unit == PIPE_SHADER_GEOMETRY &&
607            index >= VGPU10_MAX_GS_INPUTS) ||
608           (emit->unit == PIPE_SHADER_FRAGMENT &&
609            index >= VGPU10_MAX_FS_INPUTS) ||
610           (emit->unit == PIPE_SHADER_TESS_CTRL &&
611            index >= VGPU11_MAX_HS_INPUT_CONTROL_POINTS) ||
612           (emit->unit == PIPE_SHADER_TESS_EVAL &&
613            index >= VGPU11_MAX_DS_INPUT_CONTROL_POINTS)) {
614          emit->register_overflow = TRUE;
615       }
616       break;
617    case VGPU10_OPERAND_TYPE_OUTPUT:
618    case VGPU10_OPCODE_DCL_OUTPUT:
619    case VGPU10_OPCODE_DCL_OUTPUT_SGV:
620    case VGPU10_OPCODE_DCL_OUTPUT_SIV:
621       /* Note: we are skipping two output indices in tcs for
622        * tessinner/outer levels. Implementation will not exceed
623        * number of output count but it allows index to go beyond
624        * VGPU11_MAX_HS_OUTPUTS.
625        * Index will never be >= index >= VGPU11_MAX_HS_OUTPUTS + 2
626        */
627       if ((emit->unit == PIPE_SHADER_VERTEX &&
628            index >= VGPU10_MAX_VS_OUTPUTS) ||
629           (emit->unit == PIPE_SHADER_GEOMETRY &&
630            index >= VGPU10_MAX_GS_OUTPUTS) ||
631           (emit->unit == PIPE_SHADER_FRAGMENT &&
632            index >= VGPU10_MAX_FS_OUTPUTS) ||
633           (emit->unit == PIPE_SHADER_TESS_CTRL &&
634            index >= VGPU11_MAX_HS_OUTPUTS + 2) ||
635           (emit->unit == PIPE_SHADER_TESS_EVAL &&
636            index >= VGPU11_MAX_DS_OUTPUTS)) {
637          emit->register_overflow = TRUE;
638       }
639       break;
640    case VGPU10_OPERAND_TYPE_SAMPLER:
641    case VGPU10_OPCODE_DCL_SAMPLER:
642       if (index >= VGPU10_MAX_SAMPLERS) {
643          emit->register_overflow = TRUE;
644       }
645       break;
646    case VGPU10_OPERAND_TYPE_RESOURCE:
647    case VGPU10_OPCODE_DCL_RESOURCE:
648       if (index >= VGPU10_MAX_RESOURCES) {
649          emit->register_overflow = TRUE;
650       }
651       break;
652    case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
653       if (index >= MAX_IMMEDIATE_COUNT) {
654          emit->register_overflow = TRUE;
655       }
656       break;
657    case VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
658       /* nothing */
659       break;
660    default:
661       assert(0);
662       ; /* nothing */
663    }
664 
665    if (emit->register_overflow && !overflow_before) {
666       debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n",
667                    operandType, index);
668    }
669 }
670 
671 
672 /**
673  * Examine misc state to determine the clipping mode.
674  */
675 static void
determine_clipping_mode(struct svga_shader_emitter_v10 * emit)676 determine_clipping_mode(struct svga_shader_emitter_v10 *emit)
677 {
678    /* num_written_clipdistance in the shader info for tessellation
679     * control shader is always 0 because the TGSI_PROPERTY_NUM_CLIPDIST_ENABLED
680     * is not defined for this shader. So we go through all the output declarations
681     * to set the num_written_clipdistance. This is just to determine the
682     * clipping mode.
683     */
684    if (emit->unit == PIPE_SHADER_TESS_CTRL) {
685       unsigned i;
686       for (i = 0; i < emit->info.num_outputs; i++) {
687          if (emit->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) {
688             emit->info.num_written_clipdistance =
689                4 * (emit->info.output_semantic_index[i] + 1);
690          }
691       }
692    }
693 
694    if (emit->info.num_written_clipdistance > 0) {
695       emit->clip_mode = CLIP_DISTANCE;
696    }
697    else if (emit->info.writes_clipvertex) {
698       emit->clip_mode = CLIP_VERTEX;
699    }
700    else if (emit->key.clip_plane_enable && emit->key.last_vertex_stage) {
701       /*
702        * Only the last shader in the vertex processing stage needs to
703        * handle the legacy clip mode.
704        */
705       emit->clip_mode = CLIP_LEGACY;
706    }
707    else {
708       emit->clip_mode = CLIP_NONE;
709    }
710 }
711 
712 
713 /**
714  * For clip distance register declarations and clip distance register
715  * writes we need to mask the declaration usage or instruction writemask
716  * (respectively) against the set of the really-enabled clipping planes.
717  *
718  * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables
719  * has a VS that writes to all 8 clip distance registers, but the plane enable
720  * flags are a subset of that.
721  *
722  * This function is used to apply the plane enable flags to the register
723  * declaration or instruction writemask.
724  *
725  * \param writemask  the declaration usage mask or instruction writemask
726  * \param clip_reg_index  which clip plane register is being declared/written.
727  *                        The legal values are 0 and 1 (two clip planes per
728  *                        register, for a total of 8 clip planes)
729  */
730 static unsigned
apply_clip_plane_mask(struct svga_shader_emitter_v10 * emit,unsigned writemask,unsigned clip_reg_index)731 apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit,
732                       unsigned writemask, unsigned clip_reg_index)
733 {
734    unsigned shift;
735 
736    assert(clip_reg_index < 2);
737 
738    /* four clip planes per clip register: */
739    shift = clip_reg_index * 4;
740    writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf);
741 
742    return writemask;
743 }
744 
745 
746 /**
747  * Translate gallium shader type into VGPU10 type.
748  */
749 static VGPU10_PROGRAM_TYPE
translate_shader_type(unsigned type)750 translate_shader_type(unsigned type)
751 {
752    switch (type) {
753    case PIPE_SHADER_VERTEX:
754       return VGPU10_VERTEX_SHADER;
755    case PIPE_SHADER_GEOMETRY:
756       return VGPU10_GEOMETRY_SHADER;
757    case PIPE_SHADER_FRAGMENT:
758       return VGPU10_PIXEL_SHADER;
759    case PIPE_SHADER_TESS_CTRL:
760       return VGPU10_HULL_SHADER;
761    case PIPE_SHADER_TESS_EVAL:
762       return VGPU10_DOMAIN_SHADER;
763    case PIPE_SHADER_COMPUTE:
764       return VGPU10_COMPUTE_SHADER;
765    default:
766       assert(!"Unexpected shader type");
767       return VGPU10_VERTEX_SHADER;
768    }
769 }
770 
771 
772 /**
773  * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x
774  * Note: we only need to translate the opcodes for "simple" instructions,
775  * as seen below.  All other opcodes are handled/translated specially.
776  */
777 static VGPU10_OPCODE_TYPE
translate_opcode(enum tgsi_opcode opcode)778 translate_opcode(enum tgsi_opcode opcode)
779 {
780    switch (opcode) {
781    case TGSI_OPCODE_MOV:
782       return VGPU10_OPCODE_MOV;
783    case TGSI_OPCODE_MUL:
784       return VGPU10_OPCODE_MUL;
785    case TGSI_OPCODE_ADD:
786       return VGPU10_OPCODE_ADD;
787    case TGSI_OPCODE_DP3:
788       return VGPU10_OPCODE_DP3;
789    case TGSI_OPCODE_DP4:
790       return VGPU10_OPCODE_DP4;
791    case TGSI_OPCODE_MIN:
792       return VGPU10_OPCODE_MIN;
793    case TGSI_OPCODE_MAX:
794       return VGPU10_OPCODE_MAX;
795    case TGSI_OPCODE_MAD:
796       return VGPU10_OPCODE_MAD;
797    case TGSI_OPCODE_SQRT:
798       return VGPU10_OPCODE_SQRT;
799    case TGSI_OPCODE_FRC:
800       return VGPU10_OPCODE_FRC;
801    case TGSI_OPCODE_FLR:
802       return VGPU10_OPCODE_ROUND_NI;
803    case TGSI_OPCODE_FSEQ:
804       return VGPU10_OPCODE_EQ;
805    case TGSI_OPCODE_FSGE:
806       return VGPU10_OPCODE_GE;
807    case TGSI_OPCODE_FSNE:
808       return VGPU10_OPCODE_NE;
809    case TGSI_OPCODE_DDX:
810       return VGPU10_OPCODE_DERIV_RTX;
811    case TGSI_OPCODE_DDY:
812       return VGPU10_OPCODE_DERIV_RTY;
813    case TGSI_OPCODE_RET:
814       return VGPU10_OPCODE_RET;
815    case TGSI_OPCODE_DIV:
816       return VGPU10_OPCODE_DIV;
817    case TGSI_OPCODE_IDIV:
818       return VGPU10_OPCODE_VMWARE;
819    case TGSI_OPCODE_DP2:
820       return VGPU10_OPCODE_DP2;
821    case TGSI_OPCODE_BRK:
822       return VGPU10_OPCODE_BREAK;
823    case TGSI_OPCODE_IF:
824       return VGPU10_OPCODE_IF;
825    case TGSI_OPCODE_ELSE:
826       return VGPU10_OPCODE_ELSE;
827    case TGSI_OPCODE_ENDIF:
828       return VGPU10_OPCODE_ENDIF;
829    case TGSI_OPCODE_CEIL:
830       return VGPU10_OPCODE_ROUND_PI;
831    case TGSI_OPCODE_I2F:
832       return VGPU10_OPCODE_ITOF;
833    case TGSI_OPCODE_NOT:
834       return VGPU10_OPCODE_NOT;
835    case TGSI_OPCODE_TRUNC:
836       return VGPU10_OPCODE_ROUND_Z;
837    case TGSI_OPCODE_SHL:
838       return VGPU10_OPCODE_ISHL;
839    case TGSI_OPCODE_AND:
840       return VGPU10_OPCODE_AND;
841    case TGSI_OPCODE_OR:
842       return VGPU10_OPCODE_OR;
843    case TGSI_OPCODE_XOR:
844       return VGPU10_OPCODE_XOR;
845    case TGSI_OPCODE_CONT:
846       return VGPU10_OPCODE_CONTINUE;
847    case TGSI_OPCODE_EMIT:
848       return VGPU10_OPCODE_EMIT;
849    case TGSI_OPCODE_ENDPRIM:
850       return VGPU10_OPCODE_CUT;
851    case TGSI_OPCODE_BGNLOOP:
852       return VGPU10_OPCODE_LOOP;
853    case TGSI_OPCODE_ENDLOOP:
854       return VGPU10_OPCODE_ENDLOOP;
855    case TGSI_OPCODE_ENDSUB:
856       return VGPU10_OPCODE_RET;
857    case TGSI_OPCODE_NOP:
858       return VGPU10_OPCODE_NOP;
859    case TGSI_OPCODE_END:
860       return VGPU10_OPCODE_RET;
861    case TGSI_OPCODE_F2I:
862       return VGPU10_OPCODE_FTOI;
863    case TGSI_OPCODE_IMAX:
864       return VGPU10_OPCODE_IMAX;
865    case TGSI_OPCODE_IMIN:
866       return VGPU10_OPCODE_IMIN;
867    case TGSI_OPCODE_UDIV:
868    case TGSI_OPCODE_UMOD:
869    case TGSI_OPCODE_MOD:
870       return VGPU10_OPCODE_UDIV;
871    case TGSI_OPCODE_IMUL_HI:
872       return VGPU10_OPCODE_IMUL;
873    case TGSI_OPCODE_INEG:
874       return VGPU10_OPCODE_INEG;
875    case TGSI_OPCODE_ISHR:
876       return VGPU10_OPCODE_ISHR;
877    case TGSI_OPCODE_ISGE:
878       return VGPU10_OPCODE_IGE;
879    case TGSI_OPCODE_ISLT:
880       return VGPU10_OPCODE_ILT;
881    case TGSI_OPCODE_F2U:
882       return VGPU10_OPCODE_FTOU;
883    case TGSI_OPCODE_UADD:
884       return VGPU10_OPCODE_IADD;
885    case TGSI_OPCODE_U2F:
886       return VGPU10_OPCODE_UTOF;
887    case TGSI_OPCODE_UCMP:
888       return VGPU10_OPCODE_MOVC;
889    case TGSI_OPCODE_UMAD:
890       return VGPU10_OPCODE_UMAD;
891    case TGSI_OPCODE_UMAX:
892       return VGPU10_OPCODE_UMAX;
893    case TGSI_OPCODE_UMIN:
894       return VGPU10_OPCODE_UMIN;
895    case TGSI_OPCODE_UMUL:
896    case TGSI_OPCODE_UMUL_HI:
897       return VGPU10_OPCODE_UMUL;
898    case TGSI_OPCODE_USEQ:
899       return VGPU10_OPCODE_IEQ;
900    case TGSI_OPCODE_USGE:
901       return VGPU10_OPCODE_UGE;
902    case TGSI_OPCODE_USHR:
903       return VGPU10_OPCODE_USHR;
904    case TGSI_OPCODE_USLT:
905       return VGPU10_OPCODE_ULT;
906    case TGSI_OPCODE_USNE:
907       return VGPU10_OPCODE_INE;
908    case TGSI_OPCODE_SWITCH:
909       return VGPU10_OPCODE_SWITCH;
910    case TGSI_OPCODE_CASE:
911       return VGPU10_OPCODE_CASE;
912    case TGSI_OPCODE_DEFAULT:
913       return VGPU10_OPCODE_DEFAULT;
914    case TGSI_OPCODE_ENDSWITCH:
915       return VGPU10_OPCODE_ENDSWITCH;
916    case TGSI_OPCODE_FSLT:
917       return VGPU10_OPCODE_LT;
918    case TGSI_OPCODE_ROUND:
919       return VGPU10_OPCODE_ROUND_NE;
920    /* Begin SM5 opcodes */
921    case TGSI_OPCODE_F2D:
922       return VGPU10_OPCODE_FTOD;
923    case TGSI_OPCODE_D2F:
924       return VGPU10_OPCODE_DTOF;
925    case TGSI_OPCODE_DMUL:
926       return VGPU10_OPCODE_DMUL;
927    case TGSI_OPCODE_DADD:
928       return VGPU10_OPCODE_DADD;
929    case TGSI_OPCODE_DMAX:
930       return VGPU10_OPCODE_DMAX;
931    case TGSI_OPCODE_DMIN:
932       return VGPU10_OPCODE_DMIN;
933    case TGSI_OPCODE_DSEQ:
934       return VGPU10_OPCODE_DEQ;
935    case TGSI_OPCODE_DSGE:
936       return VGPU10_OPCODE_DGE;
937    case TGSI_OPCODE_DSLT:
938       return VGPU10_OPCODE_DLT;
939    case TGSI_OPCODE_DSNE:
940       return VGPU10_OPCODE_DNE;
941    case TGSI_OPCODE_IBFE:
942       return VGPU10_OPCODE_IBFE;
943    case TGSI_OPCODE_UBFE:
944       return VGPU10_OPCODE_UBFE;
945    case TGSI_OPCODE_BFI:
946       return VGPU10_OPCODE_BFI;
947    case TGSI_OPCODE_BREV:
948       return VGPU10_OPCODE_BFREV;
949    case TGSI_OPCODE_POPC:
950       return VGPU10_OPCODE_COUNTBITS;
951    case TGSI_OPCODE_LSB:
952       return VGPU10_OPCODE_FIRSTBIT_LO;
953    case TGSI_OPCODE_IMSB:
954       return VGPU10_OPCODE_FIRSTBIT_SHI;
955    case TGSI_OPCODE_UMSB:
956       return VGPU10_OPCODE_FIRSTBIT_HI;
957    case TGSI_OPCODE_INTERP_CENTROID:
958       return VGPU10_OPCODE_EVAL_CENTROID;
959    case TGSI_OPCODE_INTERP_SAMPLE:
960       return VGPU10_OPCODE_EVAL_SAMPLE_INDEX;
961    case TGSI_OPCODE_BARRIER:
962       return VGPU10_OPCODE_SYNC;
963 
964    /* DX11.1 Opcodes */
965    case TGSI_OPCODE_DDIV:
966       return VGPU10_OPCODE_DDIV;
967    case TGSI_OPCODE_DRCP:
968       return VGPU10_OPCODE_DRCP;
969    case TGSI_OPCODE_D2I:
970       return VGPU10_OPCODE_DTOI;
971    case TGSI_OPCODE_D2U:
972       return VGPU10_OPCODE_DTOU;
973    case TGSI_OPCODE_I2D:
974       return VGPU10_OPCODE_ITOD;
975    case TGSI_OPCODE_U2D:
976       return VGPU10_OPCODE_UTOD;
977 
978    case TGSI_OPCODE_SAMPLE_POS:
979       /* Note: we never actually get this opcode because there's no GLSL
980        * function to query multisample resource sample positions.  There's
981        * only the TGSI_SEMANTIC_SAMPLEPOS system value which contains the
982        * position of the current sample in the render target.
983        */
984       /* FALL-THROUGH */
985    case TGSI_OPCODE_SAMPLE_INFO:
986       /* NOTE: we never actually get this opcode because the GLSL compiler
987        * implements the gl_NumSamples variable with a simple constant in the
988        * constant buffer.
989        */
990       /* FALL-THROUGH */
991    default:
992       assert(!"Unexpected TGSI opcode in translate_opcode()");
993       return VGPU10_OPCODE_NOP;
994    }
995 }
996 
997 
998 /**
999  * Translate a TGSI register file type into a VGPU10 operand type.
1000  * \param array  is the TGSI_FILE_TEMPORARY register an array?
1001  */
1002 static VGPU10_OPERAND_TYPE
translate_register_file(enum tgsi_file_type file,boolean array)1003 translate_register_file(enum tgsi_file_type file, boolean array)
1004 {
1005    switch (file) {
1006    case TGSI_FILE_CONSTANT:
1007       return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
1008    case TGSI_FILE_INPUT:
1009       return VGPU10_OPERAND_TYPE_INPUT;
1010    case TGSI_FILE_OUTPUT:
1011       return VGPU10_OPERAND_TYPE_OUTPUT;
1012    case TGSI_FILE_TEMPORARY:
1013       return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP
1014                    : VGPU10_OPERAND_TYPE_TEMP;
1015    case TGSI_FILE_IMMEDIATE:
1016       /* all immediates are 32-bit values at this time so
1017        * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time.
1018        */
1019       return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER;
1020    case TGSI_FILE_SAMPLER:
1021       return VGPU10_OPERAND_TYPE_SAMPLER;
1022    case TGSI_FILE_SYSTEM_VALUE:
1023       return VGPU10_OPERAND_TYPE_INPUT;
1024 
1025    /* XXX TODO more cases to finish */
1026 
1027    default:
1028       assert(!"Bad tgsi register file!");
1029       return VGPU10_OPERAND_TYPE_NULL;
1030    }
1031 }
1032 
1033 
1034 /**
1035  * Emit a null dst register
1036  */
1037 static void
emit_null_dst_register(struct svga_shader_emitter_v10 * emit)1038 emit_null_dst_register(struct svga_shader_emitter_v10 *emit)
1039 {
1040    VGPU10OperandToken0 operand;
1041 
1042    operand.value = 0;
1043    operand.operandType = VGPU10_OPERAND_TYPE_NULL;
1044    operand.numComponents = VGPU10_OPERAND_0_COMPONENT;
1045 
1046    emit_dword(emit, operand.value);
1047 }
1048 
1049 
1050 /**
1051  * If the given register is a temporary, return the array ID.
1052  * Else return zero.
1053  */
1054 static unsigned
get_temp_array_id(const struct svga_shader_emitter_v10 * emit,enum tgsi_file_type file,unsigned index)1055 get_temp_array_id(const struct svga_shader_emitter_v10 *emit,
1056                   enum tgsi_file_type file, unsigned index)
1057 {
1058    if (file == TGSI_FILE_TEMPORARY) {
1059       return emit->temp_map[index].arrayId;
1060    }
1061    else {
1062       return 0;
1063    }
1064 }
1065 
1066 
1067 /**
1068  * If the given register is a temporary, convert the index from a TGSI
1069  * TEMPORARY index to a VGPU10 temp index.
1070  */
1071 static unsigned
remap_temp_index(const struct svga_shader_emitter_v10 * emit,enum tgsi_file_type file,unsigned index)1072 remap_temp_index(const struct svga_shader_emitter_v10 *emit,
1073                  enum tgsi_file_type file, unsigned index)
1074 {
1075    if (file == TGSI_FILE_TEMPORARY) {
1076       return emit->temp_map[index].index;
1077    }
1078    else {
1079       return index;
1080    }
1081 }
1082 
1083 
1084 /**
1085  * Setup the operand0 fields related to indexing (1D, 2D, relative, etc).
1086  * Note: the operandType field must already be initialized.
1087  * \param file  the register file being accessed
1088  * \param indirect  using indirect addressing of the register file?
1089  * \param index2D  if true, 2-D indexing is being used (const or temp registers)
1090  * \param indirect2D  if true, 2-D indirect indexing being used (for const buf)
1091  */
1092 static VGPU10OperandToken0
setup_operand0_indexing(struct svga_shader_emitter_v10 * emit,VGPU10OperandToken0 operand0,enum tgsi_file_type file,boolean indirect,boolean index2D,bool indirect2D)1093 setup_operand0_indexing(struct svga_shader_emitter_v10 *emit,
1094                         VGPU10OperandToken0 operand0,
1095                         enum tgsi_file_type file,
1096                         boolean indirect,
1097                         boolean index2D, bool indirect2D)
1098 {
1099    VGPU10_OPERAND_INDEX_REPRESENTATION index0Rep, index1Rep;
1100    VGPU10_OPERAND_INDEX_DIMENSION indexDim;
1101 
1102    /*
1103     * Compute index dimensions
1104     */
1105    if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 ||
1106        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID ||
1107        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID ||
1108        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID ||
1109        operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP ||
1110        operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) {
1111       /* there's no swizzle for in-line immediates */
1112       indexDim = VGPU10_OPERAND_INDEX_0D;
1113       assert(operand0.selectionMode == 0);
1114    }
1115    else if (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT) {
1116       indexDim = VGPU10_OPERAND_INDEX_0D;
1117    }
1118    else {
1119       indexDim = index2D ? VGPU10_OPERAND_INDEX_2D : VGPU10_OPERAND_INDEX_1D;
1120    }
1121 
1122    /*
1123     * Compute index representation(s) (immediate vs relative).
1124     */
1125    if (indexDim == VGPU10_OPERAND_INDEX_2D) {
1126       index0Rep = indirect2D ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1127          : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1128 
1129       index1Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1130          : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1131    }
1132    else if (indexDim == VGPU10_OPERAND_INDEX_1D) {
1133       index0Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE
1134          : VGPU10_OPERAND_INDEX_IMMEDIATE32;
1135 
1136       index1Rep = 0;
1137    }
1138    else {
1139       index0Rep = 0;
1140       index1Rep = 0;
1141    }
1142 
1143    operand0.indexDimension = indexDim;
1144    operand0.index0Representation = index0Rep;
1145    operand0.index1Representation = index1Rep;
1146 
1147    return operand0;
1148 }
1149 
1150 
1151 /**
1152  * Emit the operand for expressing an address register for indirect indexing.
1153  * Note that the address register is really just a temp register.
1154  * \param addr_reg_index  which address register to use
1155  */
1156 static void
emit_indirect_register(struct svga_shader_emitter_v10 * emit,unsigned addr_reg_index)1157 emit_indirect_register(struct svga_shader_emitter_v10 *emit,
1158                        unsigned addr_reg_index)
1159 {
1160    unsigned tmp_reg_index;
1161    VGPU10OperandToken0 operand0;
1162 
1163    assert(addr_reg_index < MAX_VGPU10_ADDR_REGS);
1164 
1165    tmp_reg_index = emit->address_reg_index[addr_reg_index];
1166 
1167    /* operand0 is a simple temporary register, selecting one component */
1168    operand0.value = 0;
1169    operand0.operandType = VGPU10_OPERAND_TYPE_TEMP;
1170    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1171    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1172    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
1173    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1174    operand0.swizzleX = 0;
1175    operand0.swizzleY = 1;
1176    operand0.swizzleZ = 2;
1177    operand0.swizzleW = 3;
1178 
1179    emit_dword(emit, operand0.value);
1180    emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index));
1181 }
1182 
1183 
1184 /**
1185  * Translate the dst register of a TGSI instruction and emit VGPU10 tokens.
1186  * \param emit  the emitter context
1187  * \param reg  the TGSI dst register to translate
1188  */
1189 static void
emit_dst_register(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * reg)1190 emit_dst_register(struct svga_shader_emitter_v10 *emit,
1191                   const struct tgsi_full_dst_register *reg)
1192 {
1193    enum tgsi_file_type file = reg->Register.File;
1194    unsigned index = reg->Register.Index;
1195    const enum tgsi_semantic sem_name = emit->info.output_semantic_name[index];
1196    const unsigned sem_index = emit->info.output_semantic_index[index];
1197    unsigned writemask = reg->Register.WriteMask;
1198    const boolean indirect = reg->Register.Indirect;
1199    unsigned tempArrayId = get_temp_array_id(emit, file, index);
1200    boolean index2d = reg->Register.Dimension || tempArrayId > 0;
1201    VGPU10OperandToken0 operand0;
1202 
1203    if (file == TGSI_FILE_TEMPORARY) {
1204       emit->temp_map[index].initialized = TRUE;
1205    }
1206 
1207    if (file == TGSI_FILE_OUTPUT) {
1208       if (emit->unit == PIPE_SHADER_VERTEX ||
1209           emit->unit == PIPE_SHADER_GEOMETRY ||
1210           emit->unit == PIPE_SHADER_TESS_EVAL) {
1211          if (index == emit->vposition.out_index &&
1212              emit->vposition.tmp_index != INVALID_INDEX) {
1213             /* replace OUTPUT[POS] with TEMP[POS].  We need to store the
1214              * vertex position result in a temporary so that we can modify
1215              * it in the post_helper() code.
1216              */
1217             file = TGSI_FILE_TEMPORARY;
1218             index = emit->vposition.tmp_index;
1219          }
1220          else if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
1221                   emit->clip_dist_tmp_index != INVALID_INDEX) {
1222             /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
1223              * We store the clip distance in a temporary first, then
1224              * we'll copy it to the shadow copy and to CLIPDIST with the
1225              * enabled planes mask in emit_clip_distance_instructions().
1226              */
1227             file = TGSI_FILE_TEMPORARY;
1228             index = emit->clip_dist_tmp_index + sem_index;
1229          }
1230          else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
1231                   emit->clip_vertex_tmp_index != INVALID_INDEX) {
1232             /* replace the CLIPVERTEX output register with a temporary */
1233             assert(emit->clip_mode == CLIP_VERTEX);
1234             assert(sem_index == 0);
1235             file = TGSI_FILE_TEMPORARY;
1236             index = emit->clip_vertex_tmp_index;
1237          }
1238          else if (sem_name == TGSI_SEMANTIC_COLOR &&
1239                   emit->key.clamp_vertex_color) {
1240 
1241             /* set the saturate modifier of the instruction
1242              * to clamp the vertex color.
1243              */
1244             VGPU10OpcodeToken0 *token =
1245                (VGPU10OpcodeToken0 *)emit->buf + emit->inst_start_token;
1246             token->saturate = TRUE;
1247          }
1248          else if (sem_name == TGSI_SEMANTIC_VIEWPORT_INDEX &&
1249                   emit->gs.viewport_index_out_index != INVALID_INDEX) {
1250             file = TGSI_FILE_TEMPORARY;
1251             index = emit->gs.viewport_index_tmp_index;
1252          }
1253       }
1254       else if (emit->unit == PIPE_SHADER_FRAGMENT) {
1255          if (sem_name == TGSI_SEMANTIC_POSITION) {
1256             /* Fragment depth output register */
1257             operand0.value = 0;
1258             operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
1259             operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1260             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1261             emit_dword(emit, operand0.value);
1262             return;
1263          }
1264          else if (sem_name == TGSI_SEMANTIC_SAMPLEMASK) {
1265             /* Fragment sample mask output */
1266             operand0.value = 0;
1267             operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
1268             operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1269             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1270             emit_dword(emit, operand0.value);
1271             return;
1272          }
1273          else if (index == emit->fs.color_out_index[0] &&
1274              emit->fs.color_tmp_index != INVALID_INDEX) {
1275             /* replace OUTPUT[COLOR] with TEMP[COLOR].  We need to store the
1276              * fragment color result in a temporary so that we can read it
1277              * it in the post_helper() code.
1278              */
1279             file = TGSI_FILE_TEMPORARY;
1280             index = emit->fs.color_tmp_index;
1281          }
1282          else {
1283             /* Typically, for fragment shaders, the output register index
1284              * matches the color semantic index.  But not when we write to
1285              * the fragment depth register.  In that case, OUT[0] will be
1286              * fragdepth and OUT[1] will be the 0th color output.  We need
1287              * to use the semantic index for color outputs.
1288              */
1289             assert(sem_name == TGSI_SEMANTIC_COLOR);
1290             index = emit->info.output_semantic_index[index];
1291 
1292             emit->num_output_writes++;
1293          }
1294       }
1295       else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
1296          if (index == emit->tcs.inner.tgsi_index) {
1297             /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
1298              * in temporary for now so that will be store into appropriate
1299              * registers in post_helper() in patch constant phase.
1300              */
1301             if (emit->tcs.control_point_phase) {
1302                /* Discard writing into tessfactor in control point phase */
1303                emit->discard_instruction =  TRUE;
1304             }
1305             else {
1306                file = TGSI_FILE_TEMPORARY;
1307                index = emit->tcs.inner.temp_index;
1308             }
1309          }
1310          else if (index == emit->tcs.outer.tgsi_index) {
1311             /* replace OUTPUT[TESSLEVEL] with temp. We are storing it
1312              * in temporary for now so that will be store into appropriate
1313              * registers in post_helper().
1314              */
1315             if (emit->tcs.control_point_phase) {
1316                /* Discard writing into tessfactor in control point phase */
1317                emit->discard_instruction =  TRUE;
1318             }
1319             else {
1320                file = TGSI_FILE_TEMPORARY;
1321                index = emit->tcs.outer.temp_index;
1322             }
1323          }
1324          else if (index >= emit->tcs.patch_generic_out_index &&
1325                   index < (emit->tcs.patch_generic_out_index +
1326                           emit->tcs.patch_generic_out_count)) {
1327             if (emit->tcs.control_point_phase) {
1328                /* Discard writing into generic patch constant outputs in
1329                   control point phase */
1330                emit->discard_instruction =  TRUE;
1331             }
1332             else {
1333                if (emit->reemit_instruction) {
1334                   /* Store results of reemitted instruction in temporary register. */
1335                   file = TGSI_FILE_TEMPORARY;
1336                   index = emit->tcs.patch_generic_tmp_index +
1337                           (index - emit->tcs.patch_generic_out_index);
1338                   /**
1339                    * Temporaries for patch constant data can be done
1340                    * as indexable temporaries.
1341                    */
1342                   tempArrayId = get_temp_array_id(emit, file, index);
1343                   index2d = tempArrayId > 0;
1344 
1345                   emit->reemit_instruction = FALSE;
1346                }
1347                else {
1348                   /* If per-patch outputs is been read in shader, we
1349                    * reemit instruction and store results in temporaries in
1350                    * patch constant phase. */
1351                   if (emit->info.reads_perpatch_outputs) {
1352                      emit->reemit_instruction = TRUE;
1353                   }
1354                }
1355             }
1356          }
1357          else if (reg->Register.Dimension) {
1358             /* Only control point outputs are declared 2D in tgsi */
1359             if (emit->tcs.control_point_phase) {
1360                if (emit->reemit_instruction) {
1361                   /* Store results of reemitted instruction in temporary register. */
1362                   index2d = FALSE;
1363                   file = TGSI_FILE_TEMPORARY;
1364                   index = emit->tcs.control_point_tmp_index +
1365                           (index - emit->tcs.control_point_out_index);
1366                   emit->reemit_instruction = FALSE;
1367                }
1368                else {
1369                   /* The mapped control point outputs are 1-D */
1370                   index2d = FALSE;
1371                   if (emit->info.reads_pervertex_outputs) {
1372                      /* If per-vertex outputs is been read in shader, we
1373                       * reemit instruction and store results in temporaries
1374                       * control point phase. */
1375                      emit->reemit_instruction = TRUE;
1376                   }
1377                }
1378 
1379                if (sem_name == TGSI_SEMANTIC_CLIPDIST &&
1380                    emit->clip_dist_tmp_index != INVALID_INDEX) {
1381                   /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST].
1382                    * We store the clip distance in a temporary first, then
1383                    * we'll copy it to the shadow copy and to CLIPDIST with the
1384                    * enabled planes mask in emit_clip_distance_instructions().
1385                    */
1386                   file = TGSI_FILE_TEMPORARY;
1387                   index = emit->clip_dist_tmp_index + sem_index;
1388                }
1389                else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX &&
1390                         emit->clip_vertex_tmp_index != INVALID_INDEX) {
1391                   /* replace the CLIPVERTEX output register with a temporary */
1392                   assert(emit->clip_mode == CLIP_VERTEX);
1393                   assert(sem_index == 0);
1394                   file = TGSI_FILE_TEMPORARY;
1395                   index = emit->clip_vertex_tmp_index;
1396                }
1397             }
1398             else {
1399                /* Discard writing into control point outputs in
1400                   patch constant phase */
1401                emit->discard_instruction =  TRUE;
1402             }
1403          }
1404       }
1405    }
1406 
1407    /* init operand tokens to all zero */
1408    operand0.value = 0;
1409 
1410    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1411 
1412    /* the operand has a writemask */
1413    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
1414 
1415    /* Which of the four dest components to write to. Note that we can use a
1416     * simple assignment here since TGSI writemasks match VGPU10 writemasks.
1417     */
1418    STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X);
1419    operand0.mask = writemask;
1420 
1421    /* translate TGSI register file type to VGPU10 operand type */
1422    operand0.operandType = translate_register_file(file, tempArrayId > 0);
1423 
1424    check_register_index(emit, operand0.operandType, index);
1425 
1426    operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1427                                       index2d, FALSE);
1428 
1429    /* Emit tokens */
1430    emit_dword(emit, operand0.value);
1431    if (tempArrayId > 0) {
1432       emit_dword(emit, tempArrayId);
1433    }
1434 
1435    emit_dword(emit, remap_temp_index(emit, file, index));
1436 
1437    if (indirect) {
1438       emit_indirect_register(emit, reg->Indirect.Index);
1439    }
1440 }
1441 
1442 
1443 /**
1444  * Check if temporary register needs to be initialize when
1445  * shader is not using indirect addressing for temporary and uninitialized
1446  * temporary is not used in loop. In these two scenarios, we cannot
1447  * determine if temporary is initialized or not.
1448  */
1449 static boolean
need_temp_reg_initialization(struct svga_shader_emitter_v10 * emit,unsigned index)1450 need_temp_reg_initialization(struct svga_shader_emitter_v10 *emit,
1451                              unsigned index)
1452 {
1453    if (!(emit->info.indirect_files && (1u << TGSI_FILE_TEMPORARY))
1454        && emit->current_loop_depth == 0) {
1455       if (!emit->temp_map[index].initialized &&
1456           emit->temp_map[index].index < emit->num_shader_temps) {
1457          return TRUE;
1458       }
1459    }
1460 
1461    return FALSE;
1462 }
1463 
1464 
1465 /**
1466  * Translate a src register of a TGSI instruction and emit VGPU10 tokens.
1467  * In quite a few cases, we do register substitution.  For example, if
1468  * the TGSI register is the front/back-face register, we replace that with
1469  * a temp register containing a value we computed earlier.
1470  */
1471 static void
emit_src_register(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * reg)1472 emit_src_register(struct svga_shader_emitter_v10 *emit,
1473                   const struct tgsi_full_src_register *reg)
1474 {
1475    enum tgsi_file_type file = reg->Register.File;
1476    unsigned index = reg->Register.Index;
1477    const boolean indirect = reg->Register.Indirect;
1478    unsigned tempArrayId = get_temp_array_id(emit, file, index);
1479    boolean index2d = (reg->Register.Dimension ||
1480                             tempArrayId > 0 ||
1481                             file == TGSI_FILE_CONSTANT);
1482    unsigned index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index;
1483    boolean indirect2d = reg->Dimension.Indirect;
1484    unsigned swizzleX = reg->Register.SwizzleX;
1485    unsigned swizzleY = reg->Register.SwizzleY;
1486    unsigned swizzleZ = reg->Register.SwizzleZ;
1487    unsigned swizzleW = reg->Register.SwizzleW;
1488    const boolean absolute = reg->Register.Absolute;
1489    const boolean negate = reg->Register.Negate;
1490    VGPU10OperandToken0 operand0;
1491    VGPU10OperandToken1 operand1;
1492 
1493    operand0.value = operand1.value = 0;
1494 
1495    if (emit->unit == PIPE_SHADER_FRAGMENT){
1496       if (file == TGSI_FILE_INPUT) {
1497          if (index == emit->fs.face_input_index) {
1498             /* Replace INPUT[FACE] with TEMP[FACE] */
1499             file = TGSI_FILE_TEMPORARY;
1500             index = emit->fs.face_tmp_index;
1501          }
1502          else if (index == emit->fs.fragcoord_input_index) {
1503             /* Replace INPUT[POSITION] with TEMP[POSITION] */
1504             file = TGSI_FILE_TEMPORARY;
1505             index = emit->fs.fragcoord_tmp_index;
1506          }
1507          else if (index == emit->fs.layer_input_index) {
1508             /* Replace INPUT[LAYER] with zero.x */
1509             file = TGSI_FILE_IMMEDIATE;
1510             index = emit->fs.layer_imm_index;
1511             swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1512          }
1513          else {
1514             /* We remap fragment shader inputs to that FS input indexes
1515              * match up with VS/GS output indexes.
1516              */
1517             index = emit->linkage.input_map[index];
1518          }
1519       }
1520       else if (file == TGSI_FILE_SYSTEM_VALUE) {
1521          if (index == emit->fs.sample_pos_sys_index) {
1522             assert(emit->version >= 41);
1523             /* Current sample position is in a temp register */
1524             file = TGSI_FILE_TEMPORARY;
1525             index = emit->fs.sample_pos_tmp_index;
1526          }
1527          else if (index == emit->fs.sample_mask_in_sys_index) {
1528             /* Emitted as vCoverage0.x */
1529             /* According to GLSL spec, the gl_SampleMaskIn array has ceil(s / 32)
1530              * elements where s is the maximum number of color samples supported
1531              * by the implementation. With current implementation, we should not
1532              * have more than one element. So assert if Index != 0
1533              */
1534             assert((!reg->Register.Indirect && reg->Register.Index == 0) ||
1535                    reg->Register.Indirect);
1536             operand0.value = 0;
1537             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK;
1538             operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1539             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1540             operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1541             emit_dword(emit, operand0.value);
1542             return;
1543          }
1544          else {
1545             /* Map the TGSI system value to a VGPU10 input register */
1546             assert(index < ARRAY_SIZE(emit->system_value_indexes));
1547             file = TGSI_FILE_INPUT;
1548             index = emit->system_value_indexes[index];
1549          }
1550       }
1551    }
1552    else if (emit->unit == PIPE_SHADER_GEOMETRY) {
1553       if (file == TGSI_FILE_INPUT) {
1554          if (index == emit->gs.prim_id_index) {
1555             operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
1556             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1557          }
1558          index = emit->linkage.input_map[index];
1559       }
1560       else if (file == TGSI_FILE_SYSTEM_VALUE &&
1561                index == emit->gs.invocation_id_sys_index) {
1562          /* Emitted as vGSInstanceID0.x */
1563          operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1564          operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID;
1565          index = 0;
1566       }
1567    }
1568    else if (emit->unit == PIPE_SHADER_VERTEX) {
1569       if (file == TGSI_FILE_INPUT) {
1570          /* if input is adjusted... */
1571          if ((emit->key.vs.adjust_attrib_w_1 |
1572               emit->key.vs.adjust_attrib_itof |
1573               emit->key.vs.adjust_attrib_utof |
1574               emit->key.vs.attrib_is_bgra |
1575               emit->key.vs.attrib_puint_to_snorm |
1576               emit->key.vs.attrib_puint_to_uscaled |
1577               emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) {
1578             file = TGSI_FILE_TEMPORARY;
1579             index = emit->vs.adjusted_input[index];
1580          }
1581       }
1582       else if (file == TGSI_FILE_SYSTEM_VALUE) {
1583          if (index == emit->vs.vertex_id_sys_index &&
1584              emit->vs.vertex_id_tmp_index != INVALID_INDEX) {
1585             file = TGSI_FILE_TEMPORARY;
1586             index = emit->vs.vertex_id_tmp_index;
1587             swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1588          }
1589          else {
1590             /* Map the TGSI system value to a VGPU10 input register */
1591             assert(index < ARRAY_SIZE(emit->system_value_indexes));
1592             file = TGSI_FILE_INPUT;
1593             index = emit->system_value_indexes[index];
1594          }
1595       }
1596    }
1597    else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
1598 
1599       if (file == TGSI_FILE_SYSTEM_VALUE) {
1600          if (index == emit->tcs.vertices_per_patch_index) {
1601             /**
1602              * if source register is the system value for vertices_per_patch,
1603              * replace it with the immediate.
1604              */
1605             file = TGSI_FILE_IMMEDIATE;
1606             index = emit->tcs.imm_index;
1607             swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X;
1608          }
1609          else if (index == emit->tcs.invocation_id_sys_index) {
1610             if (emit->tcs.control_point_phase) {
1611                /**
1612                 * Emitted as vOutputControlPointID.x
1613                 */
1614                operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1615                operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID;
1616                operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
1617                operand0.mask = 0;
1618                emit_dword(emit, operand0.value);
1619                return;
1620             }
1621             else {
1622                /* There is no control point ID input declaration in
1623                 * the patch constant phase in hull shader.
1624                 * Since for now we are emitting all instructions in
1625                 * the patch constant phase, we are replacing the
1626                 * control point ID reference with the immediate 0.
1627                 */
1628                file = TGSI_FILE_IMMEDIATE;
1629                index = emit->tcs.imm_index;
1630                swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_W;
1631             }
1632          }
1633          else if (index == emit->tcs.prim_id_index) {
1634             /**
1635              * Emitted as vPrim.x
1636              */
1637             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1638             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1639             index = 0;
1640          }
1641       }
1642       else if (file == TGSI_FILE_INPUT) {
1643          index = emit->linkage.input_map[index];
1644          if (!emit->tcs.control_point_phase) {
1645             /* Emitted as vicp */
1646             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1647             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
1648             assert(reg->Register.Dimension);
1649          }
1650       }
1651       else if (file == TGSI_FILE_OUTPUT) {
1652          if ((index >= emit->tcs.patch_generic_out_index &&
1653              index < (emit->tcs.patch_generic_out_index +
1654                       emit->tcs.patch_generic_out_count)) ||
1655              index == emit->tcs.inner.tgsi_index ||
1656              index == emit->tcs.outer.tgsi_index) {
1657             if (emit->tcs.control_point_phase) {
1658                emit->discard_instruction = TRUE;
1659             }
1660             else {
1661                /* Device doesn't allow reading from output so
1662                 * use corresponding temporary register as source */
1663                file = TGSI_FILE_TEMPORARY;
1664                if (index == emit->tcs.inner.tgsi_index) {
1665                   index = emit->tcs.inner.temp_index;
1666                }
1667                else if (index == emit->tcs.outer.tgsi_index) {
1668                   index = emit->tcs.outer.temp_index;
1669                }
1670                else {
1671                   index = emit->tcs.patch_generic_tmp_index +
1672                           (index - emit->tcs.patch_generic_out_index);
1673                }
1674 
1675                /**
1676                 * Temporaries for patch constant data can be done
1677                 * as indexable temporaries.
1678                 */
1679                tempArrayId = get_temp_array_id(emit, file, index);
1680                index2d = tempArrayId > 0;
1681                index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index;
1682             }
1683          }
1684          else if (index2d) {
1685             if (emit->tcs.control_point_phase) {
1686                /* Device doesn't allow reading from output so
1687                 * use corresponding temporary register as source */
1688                file = TGSI_FILE_TEMPORARY;
1689                index2d = FALSE;
1690                index = emit->tcs.control_point_tmp_index +
1691                        (index - emit->tcs.control_point_out_index);
1692             }
1693             else {
1694                emit->discard_instruction = TRUE;
1695             }
1696          }
1697       }
1698    }
1699    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
1700       if (file == TGSI_FILE_SYSTEM_VALUE) {
1701          if (index == emit->tes.tesscoord_sys_index) {
1702             /**
1703              * Emitted as vDomain
1704              */
1705             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1706             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT;
1707             index = 0;
1708          }
1709          else if (index == emit->tes.inner.tgsi_index) {
1710             file = TGSI_FILE_TEMPORARY;
1711             index = emit->tes.inner.temp_index;
1712          }
1713          else if (index == emit->tes.outer.tgsi_index) {
1714             file = TGSI_FILE_TEMPORARY;
1715             index = emit->tes.outer.temp_index;
1716          }
1717          else if (index == emit->tes.prim_id_index) {
1718             /**
1719              * Emitted as vPrim.x
1720              */
1721             operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
1722             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
1723             index = 0;
1724          }
1725 
1726       }
1727       else if (file == TGSI_FILE_INPUT) {
1728          if (index2d) {
1729             /* 2D input is emitted as vcp (input control point). */
1730             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
1731             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1732 
1733             /* index specifies the element index and is remapped
1734              * to align with the tcs output index.
1735              */
1736             index = emit->linkage.input_map[index];
1737 
1738             assert(index2 < emit->key.tes.vertices_per_patch);
1739          }
1740          else {
1741             if (index < emit->key.tes.tessfactor_index)
1742                /* index specifies the generic patch index.
1743                 * Remapped to match up with the tcs output index.
1744                 */
1745                index = emit->linkage.input_map[index];
1746 
1747             operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT;
1748             operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1749          }
1750       }
1751    }
1752 
1753    if (file == TGSI_FILE_ADDRESS) {
1754       index = emit->address_reg_index[index];
1755       file = TGSI_FILE_TEMPORARY;
1756    }
1757 
1758    if (file == TGSI_FILE_TEMPORARY) {
1759       if (need_temp_reg_initialization(emit, index)) {
1760          emit->initialize_temp_index = index;
1761          emit->discard_instruction = TRUE;
1762       }
1763    }
1764 
1765    if (operand0.value == 0) {
1766       /* if operand0 was not set above for a special case, do the general
1767        * case now.
1768        */
1769       operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1770       operand0.operandType = translate_register_file(file, tempArrayId > 0);
1771    }
1772    operand0 = setup_operand0_indexing(emit, operand0, file, indirect,
1773                                       index2d, indirect2d);
1774 
1775    if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 &&
1776        operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) {
1777       /* there's no swizzle for in-line immediates */
1778       if (swizzleX == swizzleY &&
1779           swizzleX == swizzleZ &&
1780           swizzleX == swizzleW) {
1781          operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1782       }
1783       else {
1784          operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1785       }
1786 
1787       operand0.swizzleX = swizzleX;
1788       operand0.swizzleY = swizzleY;
1789       operand0.swizzleZ = swizzleZ;
1790       operand0.swizzleW = swizzleW;
1791 
1792       if (absolute || negate) {
1793          operand0.extended = 1;
1794          operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER;
1795          if (absolute && !negate)
1796             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS;
1797          if (!absolute && negate)
1798             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG;
1799          if (absolute && negate)
1800             operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG;
1801       }
1802    }
1803 
1804    /* Emit the operand tokens */
1805    emit_dword(emit, operand0.value);
1806    if (operand0.extended)
1807       emit_dword(emit, operand1.value);
1808 
1809    if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) {
1810       /* Emit the four float/int in-line immediate values */
1811       unsigned *c;
1812       assert(index < ARRAY_SIZE(emit->immediates));
1813       assert(file == TGSI_FILE_IMMEDIATE);
1814       assert(swizzleX < 4);
1815       assert(swizzleY < 4);
1816       assert(swizzleZ < 4);
1817       assert(swizzleW < 4);
1818       c = (unsigned *) emit->immediates[index];
1819       emit_dword(emit, c[swizzleX]);
1820       emit_dword(emit, c[swizzleY]);
1821       emit_dword(emit, c[swizzleZ]);
1822       emit_dword(emit, c[swizzleW]);
1823    }
1824    else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) {
1825       /* Emit the register index(es) */
1826       if (index2d) {
1827          emit_dword(emit, index2);
1828 
1829          if (indirect2d) {
1830             emit_indirect_register(emit, reg->DimIndirect.Index);
1831          }
1832       }
1833 
1834       emit_dword(emit, remap_temp_index(emit, file, index));
1835 
1836       if (indirect) {
1837          emit_indirect_register(emit, reg->Indirect.Index);
1838       }
1839    }
1840 }
1841 
1842 
1843 /**
1844  * Emit a resource operand (for use with a SAMPLE instruction).
1845  */
1846 static void
emit_resource_register(struct svga_shader_emitter_v10 * emit,unsigned resource_number)1847 emit_resource_register(struct svga_shader_emitter_v10 *emit,
1848                        unsigned resource_number)
1849 {
1850    VGPU10OperandToken0 operand0;
1851 
1852    check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number);
1853 
1854    /* init */
1855    operand0.value = 0;
1856 
1857    operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
1858    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1859    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1860    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1861    operand0.swizzleX = VGPU10_COMPONENT_X;
1862    operand0.swizzleY = VGPU10_COMPONENT_Y;
1863    operand0.swizzleZ = VGPU10_COMPONENT_Z;
1864    operand0.swizzleW = VGPU10_COMPONENT_W;
1865 
1866    emit_dword(emit, operand0.value);
1867    emit_dword(emit, resource_number);
1868 }
1869 
1870 
1871 /**
1872  * Emit a sampler operand (for use with a SAMPLE instruction).
1873  */
1874 static void
emit_sampler_register(struct svga_shader_emitter_v10 * emit,unsigned sampler_number)1875 emit_sampler_register(struct svga_shader_emitter_v10 *emit,
1876                       unsigned sampler_number)
1877 {
1878    VGPU10OperandToken0 operand0;
1879 
1880    check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number);
1881 
1882    /* init */
1883    operand0.value = 0;
1884 
1885    operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
1886    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1887 
1888    emit_dword(emit, operand0.value);
1889    emit_dword(emit, sampler_number);
1890 }
1891 
1892 
1893 /**
1894  * Emit an operand which reads the IS_FRONT_FACING register.
1895  */
1896 static void
emit_face_register(struct svga_shader_emitter_v10 * emit)1897 emit_face_register(struct svga_shader_emitter_v10 *emit)
1898 {
1899    VGPU10OperandToken0 operand0;
1900    unsigned index = emit->linkage.input_map[emit->fs.face_input_index];
1901 
1902    /* init */
1903    operand0.value = 0;
1904 
1905    operand0.operandType = VGPU10_OPERAND_TYPE_INPUT;
1906    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1907    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE;
1908    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1909 
1910    operand0.swizzleX = VGPU10_COMPONENT_X;
1911    operand0.swizzleY = VGPU10_COMPONENT_X;
1912    operand0.swizzleZ = VGPU10_COMPONENT_X;
1913    operand0.swizzleW = VGPU10_COMPONENT_X;
1914 
1915    emit_dword(emit, operand0.value);
1916    emit_dword(emit, index);
1917 }
1918 
1919 
1920 /**
1921  * Emit tokens for the "rasterizer" register used by the SAMPLE_POS
1922  * instruction.
1923  */
1924 static void
emit_rasterizer_register(struct svga_shader_emitter_v10 * emit)1925 emit_rasterizer_register(struct svga_shader_emitter_v10 *emit)
1926 {
1927    VGPU10OperandToken0 operand0;
1928 
1929    /* init */
1930    operand0.value = 0;
1931 
1932    /* No register index for rasterizer index (there's only one) */
1933    operand0.operandType = VGPU10_OPERAND_TYPE_RASTERIZER;
1934    operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
1935    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
1936    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
1937    operand0.swizzleX = VGPU10_COMPONENT_X;
1938    operand0.swizzleY = VGPU10_COMPONENT_Y;
1939    operand0.swizzleZ = VGPU10_COMPONENT_Z;
1940    operand0.swizzleW = VGPU10_COMPONENT_W;
1941 
1942    emit_dword(emit, operand0.value);
1943 }
1944 
1945 
1946 /**
1947  * Emit tokens for the "stream" register used by the
1948  * DCL_STREAM, CUT_STREAM, EMIT_STREAM instructions.
1949  */
1950 static void
emit_stream_register(struct svga_shader_emitter_v10 * emit,unsigned index)1951 emit_stream_register(struct svga_shader_emitter_v10 *emit, unsigned index)
1952 {
1953    VGPU10OperandToken0 operand0;
1954 
1955    /* init */
1956    operand0.value = 0;
1957 
1958    /* No register index for rasterizer index (there's only one) */
1959    operand0.operandType = VGPU10_OPERAND_TYPE_STREAM;
1960    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
1961    operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
1962 
1963    emit_dword(emit, operand0.value);
1964    emit_dword(emit, index);
1965 }
1966 
1967 
1968 /**
1969  * Emit the token for a VGPU10 opcode, with precise parameter.
1970  * \param saturate   clamp result to [0,1]?
1971  */
1972 static void
emit_opcode_precise(struct svga_shader_emitter_v10 * emit,unsigned vgpu10_opcode,boolean saturate,boolean precise)1973 emit_opcode_precise(struct svga_shader_emitter_v10 *emit,
1974                     unsigned vgpu10_opcode, boolean saturate, boolean precise)
1975 {
1976    VGPU10OpcodeToken0 token0;
1977 
1978    token0.value = 0;  /* init all fields to zero */
1979    token0.opcodeType = vgpu10_opcode;
1980    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
1981    token0.saturate = saturate;
1982 
1983    /* Mesa's GLSL IR -> TGSI translator will set the TGSI precise flag for
1984     * 'invariant' declarations.  Only set preciseValues=1 if we have SM5.
1985     */
1986    token0.preciseValues = precise && emit->version >= 50;
1987 
1988    emit_dword(emit, token0.value);
1989 
1990    emit->uses_precise_qualifier |= token0.preciseValues;
1991 }
1992 
1993 
1994 /**
1995  * Emit the token for a VGPU10 opcode.
1996  * \param saturate   clamp result to [0,1]?
1997  */
1998 static void
emit_opcode(struct svga_shader_emitter_v10 * emit,unsigned vgpu10_opcode,boolean saturate)1999 emit_opcode(struct svga_shader_emitter_v10 *emit,
2000             unsigned vgpu10_opcode, boolean saturate)
2001 {
2002    emit_opcode_precise(emit, vgpu10_opcode, saturate, FALSE);
2003 }
2004 
2005 
2006 /**
2007  * Emit the token for a VGPU10 resinfo instruction.
2008  * \param modifier   return type modifier, _uint or _rcpFloat.
2009  *                   TODO: We may want to remove this parameter if it will
2010  *                   only ever be used as _uint.
2011  */
2012 static void
emit_opcode_resinfo(struct svga_shader_emitter_v10 * emit,VGPU10_RESINFO_RETURN_TYPE modifier)2013 emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit,
2014                     VGPU10_RESINFO_RETURN_TYPE modifier)
2015 {
2016    VGPU10OpcodeToken0 token0;
2017 
2018    token0.value = 0;  /* init all fields to zero */
2019    token0.opcodeType = VGPU10_OPCODE_RESINFO;
2020    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2021    token0.resinfoReturnType = modifier;
2022 
2023    emit_dword(emit, token0.value);
2024 }
2025 
2026 
2027 /**
2028  * Emit opcode tokens for a texture sample instruction.  Texture instructions
2029  * can be rather complicated (texel offsets, etc) so we have this specialized
2030  * function.
2031  */
2032 static void
emit_sample_opcode(struct svga_shader_emitter_v10 * emit,unsigned vgpu10_opcode,boolean saturate,const int offsets[3])2033 emit_sample_opcode(struct svga_shader_emitter_v10 *emit,
2034                    unsigned vgpu10_opcode, boolean saturate,
2035                    const int offsets[3])
2036 {
2037    VGPU10OpcodeToken0 token0;
2038    VGPU10OpcodeToken1 token1;
2039 
2040    token0.value = 0;  /* init all fields to zero */
2041    token0.opcodeType = vgpu10_opcode;
2042    token0.instructionLength = 0; /* Filled in by end_emit_instruction() */
2043    token0.saturate = saturate;
2044 
2045    if (offsets[0] || offsets[1] || offsets[2]) {
2046       assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2047       assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2048       assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET);
2049       assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2050       assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2051       assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET);
2052 
2053       token0.extended = 1;
2054       token1.value = 0;
2055       token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS;
2056       token1.offsetU = offsets[0];
2057       token1.offsetV = offsets[1];
2058       token1.offsetW = offsets[2];
2059    }
2060 
2061    emit_dword(emit, token0.value);
2062    if (token0.extended) {
2063       emit_dword(emit, token1.value);
2064    }
2065 }
2066 
2067 
2068 /**
2069  * Emit a DISCARD opcode token.
2070  * If nonzero is set, we'll discard the fragment if the X component is not 0.
2071  * Otherwise, we'll discard the fragment if the X component is 0.
2072  */
2073 static void
emit_discard_opcode(struct svga_shader_emitter_v10 * emit,boolean nonzero)2074 emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero)
2075 {
2076    VGPU10OpcodeToken0 opcode0;
2077 
2078    opcode0.value = 0;
2079    opcode0.opcodeType = VGPU10_OPCODE_DISCARD;
2080    if (nonzero)
2081       opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
2082 
2083    emit_dword(emit, opcode0.value);
2084 }
2085 
2086 
2087 /**
2088  * We need to call this before we begin emitting a VGPU10 instruction.
2089  */
2090 static void
begin_emit_instruction(struct svga_shader_emitter_v10 * emit)2091 begin_emit_instruction(struct svga_shader_emitter_v10 *emit)
2092 {
2093    assert(emit->inst_start_token == 0);
2094    /* Save location of the instruction's VGPU10OpcodeToken0 token.
2095     * Note, we can't save a pointer because it would become invalid if
2096     * we have to realloc the output buffer.
2097     */
2098    emit->inst_start_token = emit_get_num_tokens(emit);
2099 }
2100 
2101 
2102 /**
2103  * We need to call this after we emit the last token of a VGPU10 instruction.
2104  * This function patches in the opcode token's instructionLength field.
2105  */
2106 static void
end_emit_instruction(struct svga_shader_emitter_v10 * emit)2107 end_emit_instruction(struct svga_shader_emitter_v10 *emit)
2108 {
2109    VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf;
2110    unsigned inst_length;
2111 
2112    assert(emit->inst_start_token > 0);
2113 
2114    if (emit->discard_instruction) {
2115       /* Back up the emit->ptr to where this instruction started so
2116        * that we discard the current instruction.
2117        */
2118       emit->ptr = (char *) (tokens + emit->inst_start_token);
2119    }
2120    else {
2121       /* Compute instruction length and patch that into the start of
2122        * the instruction.
2123        */
2124       inst_length = emit_get_num_tokens(emit) - emit->inst_start_token;
2125 
2126       assert(inst_length > 0);
2127 
2128       tokens[emit->inst_start_token].instructionLength = inst_length;
2129    }
2130 
2131    emit->inst_start_token = 0; /* reset to zero for error checking */
2132    emit->discard_instruction = FALSE;
2133 }
2134 
2135 
2136 /**
2137  * Return index for a free temporary register.
2138  */
2139 static unsigned
get_temp_index(struct svga_shader_emitter_v10 * emit)2140 get_temp_index(struct svga_shader_emitter_v10 *emit)
2141 {
2142    assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS);
2143    return emit->num_shader_temps + emit->internal_temp_count++;
2144 }
2145 
2146 
2147 /**
2148  * Release the temporaries which were generated by get_temp_index().
2149  */
2150 static void
free_temp_indexes(struct svga_shader_emitter_v10 * emit)2151 free_temp_indexes(struct svga_shader_emitter_v10 *emit)
2152 {
2153    emit->internal_temp_count = 0;
2154 }
2155 
2156 
2157 /**
2158  * Create a tgsi_full_src_register.
2159  */
2160 static struct tgsi_full_src_register
make_src_reg(enum tgsi_file_type file,unsigned index)2161 make_src_reg(enum tgsi_file_type file, unsigned index)
2162 {
2163    struct tgsi_full_src_register reg;
2164 
2165    memset(&reg, 0, sizeof(reg));
2166    reg.Register.File = file;
2167    reg.Register.Index = index;
2168    reg.Register.SwizzleX = TGSI_SWIZZLE_X;
2169    reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
2170    reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
2171    reg.Register.SwizzleW = TGSI_SWIZZLE_W;
2172    return reg;
2173 }
2174 
2175 
2176 /**
2177  * Create a tgsi_full_src_register with a swizzle such that all four
2178  * vector components have the same scalar value.
2179  */
2180 static struct tgsi_full_src_register
make_src_scalar_reg(enum tgsi_file_type file,unsigned index,unsigned component)2181 make_src_scalar_reg(enum tgsi_file_type file, unsigned index, unsigned component)
2182 {
2183    struct tgsi_full_src_register reg;
2184 
2185    assert(component >= TGSI_SWIZZLE_X);
2186    assert(component <= TGSI_SWIZZLE_W);
2187 
2188    memset(&reg, 0, sizeof(reg));
2189    reg.Register.File = file;
2190    reg.Register.Index = index;
2191    reg.Register.SwizzleX =
2192    reg.Register.SwizzleY =
2193    reg.Register.SwizzleZ =
2194    reg.Register.SwizzleW = component;
2195    return reg;
2196 }
2197 
2198 
2199 /**
2200  * Create a tgsi_full_src_register for a temporary.
2201  */
2202 static struct tgsi_full_src_register
make_src_temp_reg(unsigned index)2203 make_src_temp_reg(unsigned index)
2204 {
2205    return make_src_reg(TGSI_FILE_TEMPORARY, index);
2206 }
2207 
2208 
2209 /**
2210  * Create a tgsi_full_src_register for a constant.
2211  */
2212 static struct tgsi_full_src_register
make_src_const_reg(unsigned index)2213 make_src_const_reg(unsigned index)
2214 {
2215    return make_src_reg(TGSI_FILE_CONSTANT, index);
2216 }
2217 
2218 
2219 /**
2220  * Create a tgsi_full_src_register for an immediate constant.
2221  */
2222 static struct tgsi_full_src_register
make_src_immediate_reg(unsigned index)2223 make_src_immediate_reg(unsigned index)
2224 {
2225    return make_src_reg(TGSI_FILE_IMMEDIATE, index);
2226 }
2227 
2228 
2229 /**
2230  * Create a tgsi_full_dst_register.
2231  */
2232 static struct tgsi_full_dst_register
make_dst_reg(enum tgsi_file_type file,unsigned index)2233 make_dst_reg(enum tgsi_file_type file, unsigned index)
2234 {
2235    struct tgsi_full_dst_register reg;
2236 
2237    memset(&reg, 0, sizeof(reg));
2238    reg.Register.File = file;
2239    reg.Register.Index = index;
2240    reg.Register.WriteMask = TGSI_WRITEMASK_XYZW;
2241    return reg;
2242 }
2243 
2244 
2245 /**
2246  * Create a tgsi_full_dst_register for a temporary.
2247  */
2248 static struct tgsi_full_dst_register
make_dst_temp_reg(unsigned index)2249 make_dst_temp_reg(unsigned index)
2250 {
2251    return make_dst_reg(TGSI_FILE_TEMPORARY, index);
2252 }
2253 
2254 
2255 /**
2256  * Create a tgsi_full_dst_register for an output.
2257  */
2258 static struct tgsi_full_dst_register
make_dst_output_reg(unsigned index)2259 make_dst_output_reg(unsigned index)
2260 {
2261    return make_dst_reg(TGSI_FILE_OUTPUT, index);
2262 }
2263 
2264 
2265 /**
2266  * Create negated tgsi_full_src_register.
2267  */
2268 static struct tgsi_full_src_register
negate_src(const struct tgsi_full_src_register * reg)2269 negate_src(const struct tgsi_full_src_register *reg)
2270 {
2271    struct tgsi_full_src_register neg = *reg;
2272    neg.Register.Negate = !reg->Register.Negate;
2273    return neg;
2274 }
2275 
2276 /**
2277  * Create absolute value of a tgsi_full_src_register.
2278  */
2279 static struct tgsi_full_src_register
absolute_src(const struct tgsi_full_src_register * reg)2280 absolute_src(const struct tgsi_full_src_register *reg)
2281 {
2282    struct tgsi_full_src_register absolute = *reg;
2283    absolute.Register.Absolute = 1;
2284    return absolute;
2285 }
2286 
2287 
2288 /** Return the named swizzle term from the src register */
2289 static inline unsigned
get_swizzle(const struct tgsi_full_src_register * reg,enum tgsi_swizzle term)2290 get_swizzle(const struct tgsi_full_src_register *reg, enum tgsi_swizzle term)
2291 {
2292    switch (term) {
2293    case TGSI_SWIZZLE_X:
2294       return reg->Register.SwizzleX;
2295    case TGSI_SWIZZLE_Y:
2296       return reg->Register.SwizzleY;
2297    case TGSI_SWIZZLE_Z:
2298       return reg->Register.SwizzleZ;
2299    case TGSI_SWIZZLE_W:
2300       return reg->Register.SwizzleW;
2301    default:
2302       assert(!"Bad swizzle");
2303       return TGSI_SWIZZLE_X;
2304    }
2305 }
2306 
2307 
2308 /**
2309  * Create swizzled tgsi_full_src_register.
2310  */
2311 static struct tgsi_full_src_register
swizzle_src(const struct tgsi_full_src_register * reg,enum tgsi_swizzle swizzleX,enum tgsi_swizzle swizzleY,enum tgsi_swizzle swizzleZ,enum tgsi_swizzle swizzleW)2312 swizzle_src(const struct tgsi_full_src_register *reg,
2313             enum tgsi_swizzle swizzleX, enum tgsi_swizzle swizzleY,
2314             enum tgsi_swizzle swizzleZ, enum tgsi_swizzle swizzleW)
2315 {
2316    struct tgsi_full_src_register swizzled = *reg;
2317    /* Note: we swizzle the current swizzle */
2318    swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX);
2319    swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY);
2320    swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ);
2321    swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW);
2322    return swizzled;
2323 }
2324 
2325 
2326 /**
2327  * Create swizzled tgsi_full_src_register where all the swizzle
2328  * terms are the same.
2329  */
2330 static struct tgsi_full_src_register
scalar_src(const struct tgsi_full_src_register * reg,enum tgsi_swizzle swizzle)2331 scalar_src(const struct tgsi_full_src_register *reg, enum tgsi_swizzle swizzle)
2332 {
2333    struct tgsi_full_src_register swizzled = *reg;
2334    /* Note: we swizzle the current swizzle */
2335    swizzled.Register.SwizzleX =
2336    swizzled.Register.SwizzleY =
2337    swizzled.Register.SwizzleZ =
2338    swizzled.Register.SwizzleW = get_swizzle(reg, swizzle);
2339    return swizzled;
2340 }
2341 
2342 
2343 /**
2344  * Create new tgsi_full_dst_register with writemask.
2345  * \param mask  bitmask of TGSI_WRITEMASK_[XYZW]
2346  */
2347 static struct tgsi_full_dst_register
writemask_dst(const struct tgsi_full_dst_register * reg,unsigned mask)2348 writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask)
2349 {
2350    struct tgsi_full_dst_register masked = *reg;
2351    masked.Register.WriteMask = mask;
2352    return masked;
2353 }
2354 
2355 
2356 /**
2357  * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW.
2358  */
2359 static boolean
same_swizzle_terms(const struct tgsi_full_src_register * reg)2360 same_swizzle_terms(const struct tgsi_full_src_register *reg)
2361 {
2362    return (reg->Register.SwizzleX == reg->Register.SwizzleY &&
2363            reg->Register.SwizzleY == reg->Register.SwizzleZ &&
2364            reg->Register.SwizzleZ == reg->Register.SwizzleW);
2365 }
2366 
2367 
2368 /**
2369  * Search the vector for the value 'x' and return its position.
2370  */
2371 static int
find_imm_in_vec4(const union tgsi_immediate_data vec[4],union tgsi_immediate_data x)2372 find_imm_in_vec4(const union tgsi_immediate_data vec[4],
2373                  union tgsi_immediate_data x)
2374 {
2375    unsigned i;
2376    for (i = 0; i < 4; i++) {
2377       if (vec[i].Int == x.Int)
2378          return i;
2379    }
2380    return -1;
2381 }
2382 
2383 
2384 /**
2385  * Helper used by make_immediate_reg(), make_immediate_reg_4().
2386  */
2387 static int
find_immediate(struct svga_shader_emitter_v10 * emit,union tgsi_immediate_data x,unsigned startIndex)2388 find_immediate(struct svga_shader_emitter_v10 *emit,
2389                union tgsi_immediate_data x, unsigned startIndex)
2390 {
2391    const unsigned endIndex = emit->num_immediates;
2392    unsigned i;
2393 
2394    assert(emit->immediates_emitted);
2395 
2396    /* Search immediates for x, y, z, w */
2397    for (i = startIndex; i < endIndex; i++) {
2398       if (x.Int == emit->immediates[i][0].Int ||
2399           x.Int == emit->immediates[i][1].Int ||
2400           x.Int == emit->immediates[i][2].Int ||
2401           x.Int == emit->immediates[i][3].Int) {
2402          return i;
2403       }
2404    }
2405    /* Should never try to use an immediate value that wasn't pre-declared */
2406    assert(!"find_immediate() failed!");
2407    return -1;
2408 }
2409 
2410 
2411 /**
2412  * As above, but search for a double[2] pair.
2413  */
2414 static int
find_immediate_dbl(struct svga_shader_emitter_v10 * emit,double x,double y)2415 find_immediate_dbl(struct svga_shader_emitter_v10 *emit,
2416                    double x, double y)
2417 {
2418    const unsigned endIndex = emit->num_immediates;
2419    unsigned i;
2420 
2421    assert(emit->immediates_emitted);
2422 
2423    /* Search immediates for x, y, z, w */
2424    for (i = 0; i < endIndex; i++) {
2425       if (x == emit->immediates_dbl[i][0] &&
2426           y == emit->immediates_dbl[i][1]) {
2427          return i;
2428       }
2429    }
2430    /* Should never try to use an immediate value that wasn't pre-declared */
2431    assert(!"find_immediate_dbl() failed!");
2432    return -1;
2433 }
2434 
2435 
2436 
2437 /**
2438  * Return a tgsi_full_src_register for an immediate/literal
2439  * union tgsi_immediate_data[4] value.
2440  * Note: the values must have been previously declared/allocated in
2441  * emit_pre_helpers().  And, all of x,y,z,w must be located in the same
2442  * vec4 immediate.
2443  */
2444 static struct tgsi_full_src_register
make_immediate_reg_4(struct svga_shader_emitter_v10 * emit,const union tgsi_immediate_data imm[4])2445 make_immediate_reg_4(struct svga_shader_emitter_v10 *emit,
2446                      const union tgsi_immediate_data imm[4])
2447 {
2448    struct tgsi_full_src_register reg;
2449    unsigned i;
2450 
2451    for (i = 0; i < emit->num_common_immediates; i++) {
2452       /* search for first component value */
2453       int immpos = find_immediate(emit, imm[0], i);
2454       int x, y, z, w;
2455 
2456       assert(immpos >= 0);
2457 
2458       /* find remaining components within the immediate vector */
2459       x = find_imm_in_vec4(emit->immediates[immpos], imm[0]);
2460       y = find_imm_in_vec4(emit->immediates[immpos], imm[1]);
2461       z = find_imm_in_vec4(emit->immediates[immpos], imm[2]);
2462       w = find_imm_in_vec4(emit->immediates[immpos], imm[3]);
2463 
2464       if (x >=0 &&  y >= 0 && z >= 0 && w >= 0) {
2465          /* found them all */
2466          memset(&reg, 0, sizeof(reg));
2467          reg.Register.File = TGSI_FILE_IMMEDIATE;
2468          reg.Register.Index = immpos;
2469          reg.Register.SwizzleX = x;
2470          reg.Register.SwizzleY = y;
2471          reg.Register.SwizzleZ = z;
2472          reg.Register.SwizzleW = w;
2473          return reg;
2474       }
2475       /* else, keep searching */
2476    }
2477 
2478    assert(!"Failed to find immediate register!");
2479 
2480    /* Just return IMM[0].xxxx */
2481    memset(&reg, 0, sizeof(reg));
2482    reg.Register.File = TGSI_FILE_IMMEDIATE;
2483    return reg;
2484 }
2485 
2486 
2487 /**
2488  * Return a tgsi_full_src_register for an immediate/literal
2489  * union tgsi_immediate_data value of the form {value, value, value, value}.
2490  * \sa make_immediate_reg_4() regarding allowed values.
2491  */
2492 static struct tgsi_full_src_register
make_immediate_reg(struct svga_shader_emitter_v10 * emit,union tgsi_immediate_data value)2493 make_immediate_reg(struct svga_shader_emitter_v10 *emit,
2494                    union tgsi_immediate_data value)
2495 {
2496    struct tgsi_full_src_register reg;
2497    int immpos = find_immediate(emit, value, 0);
2498 
2499    assert(immpos >= 0);
2500 
2501    memset(&reg, 0, sizeof(reg));
2502    reg.Register.File = TGSI_FILE_IMMEDIATE;
2503    reg.Register.Index = immpos;
2504    reg.Register.SwizzleX =
2505    reg.Register.SwizzleY =
2506    reg.Register.SwizzleZ =
2507    reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value);
2508 
2509    return reg;
2510 }
2511 
2512 
2513 /**
2514  * Return a tgsi_full_src_register for an immediate/literal float[4] value.
2515  * \sa make_immediate_reg_4() regarding allowed values.
2516  */
2517 static struct tgsi_full_src_register
make_immediate_reg_float4(struct svga_shader_emitter_v10 * emit,float x,float y,float z,float w)2518 make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit,
2519                           float x, float y, float z, float w)
2520 {
2521    union tgsi_immediate_data imm[4];
2522    imm[0].Float = x;
2523    imm[1].Float = y;
2524    imm[2].Float = z;
2525    imm[3].Float = w;
2526    return make_immediate_reg_4(emit, imm);
2527 }
2528 
2529 
2530 /**
2531  * Return a tgsi_full_src_register for an immediate/literal float value
2532  * of the form {value, value, value, value}.
2533  * \sa make_immediate_reg_4() regarding allowed values.
2534  */
2535 static struct tgsi_full_src_register
make_immediate_reg_float(struct svga_shader_emitter_v10 * emit,float value)2536 make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value)
2537 {
2538    union tgsi_immediate_data imm;
2539    imm.Float = value;
2540    return make_immediate_reg(emit, imm);
2541 }
2542 
2543 
2544 /**
2545  * Return a tgsi_full_src_register for an immediate/literal int[4] vector.
2546  */
2547 static struct tgsi_full_src_register
make_immediate_reg_int4(struct svga_shader_emitter_v10 * emit,int x,int y,int z,int w)2548 make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit,
2549                         int x, int y, int z, int w)
2550 {
2551    union tgsi_immediate_data imm[4];
2552    imm[0].Int = x;
2553    imm[1].Int = y;
2554    imm[2].Int = z;
2555    imm[3].Int = w;
2556    return make_immediate_reg_4(emit, imm);
2557 }
2558 
2559 
2560 /**
2561  * Return a tgsi_full_src_register for an immediate/literal int value
2562  * of the form {value, value, value, value}.
2563  * \sa make_immediate_reg_4() regarding allowed values.
2564  */
2565 static struct tgsi_full_src_register
make_immediate_reg_int(struct svga_shader_emitter_v10 * emit,int value)2566 make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value)
2567 {
2568    union tgsi_immediate_data imm;
2569    imm.Int = value;
2570    return make_immediate_reg(emit, imm);
2571 }
2572 
2573 
2574 static struct tgsi_full_src_register
make_immediate_reg_double(struct svga_shader_emitter_v10 * emit,double value)2575 make_immediate_reg_double(struct svga_shader_emitter_v10 *emit, double value)
2576 {
2577    struct tgsi_full_src_register reg;
2578    int immpos = find_immediate_dbl(emit, value, value);
2579 
2580    assert(immpos >= 0);
2581 
2582    memset(&reg, 0, sizeof(reg));
2583    reg.Register.File = TGSI_FILE_IMMEDIATE;
2584    reg.Register.Index = immpos;
2585    reg.Register.SwizzleX = TGSI_SWIZZLE_X;
2586    reg.Register.SwizzleY = TGSI_SWIZZLE_Y;
2587    reg.Register.SwizzleZ = TGSI_SWIZZLE_Z;
2588    reg.Register.SwizzleW = TGSI_SWIZZLE_W;
2589 
2590    return reg;
2591 }
2592 
2593 
2594 /**
2595  * Allocate space for a union tgsi_immediate_data[4] immediate.
2596  * \return  the index/position of the immediate.
2597  */
2598 static unsigned
alloc_immediate_4(struct svga_shader_emitter_v10 * emit,const union tgsi_immediate_data imm[4])2599 alloc_immediate_4(struct svga_shader_emitter_v10 *emit,
2600                   const union tgsi_immediate_data imm[4])
2601 {
2602    unsigned n = emit->num_immediates++;
2603    assert(!emit->immediates_emitted);
2604    assert(n < ARRAY_SIZE(emit->immediates));
2605    emit->immediates[n][0] = imm[0];
2606    emit->immediates[n][1] = imm[1];
2607    emit->immediates[n][2] = imm[2];
2608    emit->immediates[n][3] = imm[3];
2609    return n;
2610 }
2611 
2612 
2613 /**
2614  * Allocate space for a float[4] immediate.
2615  * \return  the index/position of the immediate.
2616  */
2617 static unsigned
alloc_immediate_float4(struct svga_shader_emitter_v10 * emit,float x,float y,float z,float w)2618 alloc_immediate_float4(struct svga_shader_emitter_v10 *emit,
2619                        float x, float y, float z, float w)
2620 {
2621    union tgsi_immediate_data imm[4];
2622    imm[0].Float = x;
2623    imm[1].Float = y;
2624    imm[2].Float = z;
2625    imm[3].Float = w;
2626    return alloc_immediate_4(emit, imm);
2627 }
2628 
2629 
2630 /**
2631  * Allocate space for an int[4] immediate.
2632  * \return  the index/position of the immediate.
2633  */
2634 static unsigned
alloc_immediate_int4(struct svga_shader_emitter_v10 * emit,int x,int y,int z,int w)2635 alloc_immediate_int4(struct svga_shader_emitter_v10 *emit,
2636                        int x, int y, int z, int w)
2637 {
2638    union tgsi_immediate_data imm[4];
2639    imm[0].Int = x;
2640    imm[1].Int = y;
2641    imm[2].Int = z;
2642    imm[3].Int = w;
2643    return alloc_immediate_4(emit, imm);
2644 }
2645 
2646 
2647 static unsigned
alloc_immediate_double2(struct svga_shader_emitter_v10 * emit,double x,double y)2648 alloc_immediate_double2(struct svga_shader_emitter_v10 *emit,
2649                         double x, double y)
2650 {
2651    unsigned n = emit->num_immediates++;
2652    assert(!emit->immediates_emitted);
2653    assert(n < ARRAY_SIZE(emit->immediates));
2654    emit->immediates_dbl[n][0] = x;
2655    emit->immediates_dbl[n][1] = y;
2656    return n;
2657 
2658 }
2659 
2660 
2661 /**
2662  * Allocate a shader input to store a system value.
2663  */
2664 static unsigned
alloc_system_value_index(struct svga_shader_emitter_v10 * emit,unsigned index)2665 alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index)
2666 {
2667    const unsigned n = emit->linkage.input_map_max + 1 + index;
2668    assert(index < ARRAY_SIZE(emit->system_value_indexes));
2669    emit->system_value_indexes[index] = n;
2670    return n;
2671 }
2672 
2673 
2674 /**
2675  * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10.
2676  */
2677 static boolean
emit_vgpu10_immediate(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_immediate * imm)2678 emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit,
2679                       const struct tgsi_full_immediate *imm)
2680 {
2681    /* We don't actually emit any code here.  We just save the
2682     * immediate values and emit them later.
2683     */
2684    alloc_immediate_4(emit, imm->u);
2685    return TRUE;
2686 }
2687 
2688 
2689 /**
2690  * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block
2691  * containing all the immediate values previously allocated
2692  * with alloc_immediate_4().
2693  */
2694 static boolean
emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 * emit)2695 emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit)
2696 {
2697    VGPU10OpcodeToken0 token;
2698 
2699    assert(!emit->immediates_emitted);
2700 
2701    token.value = 0;
2702    token.opcodeType = VGPU10_OPCODE_CUSTOMDATA;
2703    token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER;
2704 
2705    /* Note: no begin/end_emit_instruction() calls */
2706    emit_dword(emit, token.value);
2707    emit_dword(emit, 2 + 4 * emit->num_immediates);
2708    emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates);
2709 
2710    emit->immediates_emitted = TRUE;
2711 
2712    return TRUE;
2713 }
2714 
2715 
2716 /**
2717  * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10
2718  * interpolation mode.
2719  * \return a VGPU10_INTERPOLATION_x value
2720  */
2721 static unsigned
translate_interpolation(const struct svga_shader_emitter_v10 * emit,enum tgsi_interpolate_mode interp,enum tgsi_interpolate_loc interpolate_loc)2722 translate_interpolation(const struct svga_shader_emitter_v10 *emit,
2723                         enum tgsi_interpolate_mode interp,
2724                         enum tgsi_interpolate_loc interpolate_loc)
2725 {
2726    if (interp == TGSI_INTERPOLATE_COLOR) {
2727       interp = emit->key.fs.flatshade ?
2728          TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE;
2729    }
2730 
2731    switch (interp) {
2732    case TGSI_INTERPOLATE_CONSTANT:
2733       return VGPU10_INTERPOLATION_CONSTANT;
2734    case TGSI_INTERPOLATE_LINEAR:
2735       if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
2736          return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID;
2737       } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
2738                  emit->version >= 41) {
2739          return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE;
2740       } else {
2741          return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE;
2742       }
2743       break;
2744    case TGSI_INTERPOLATE_PERSPECTIVE:
2745       if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) {
2746          return VGPU10_INTERPOLATION_LINEAR_CENTROID;
2747       } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE &&
2748                  emit->version >= 41) {
2749          return VGPU10_INTERPOLATION_LINEAR_SAMPLE;
2750       } else {
2751          return VGPU10_INTERPOLATION_LINEAR;
2752       }
2753       break;
2754    default:
2755       assert(!"Unexpected interpolation mode");
2756       return VGPU10_INTERPOLATION_CONSTANT;
2757    }
2758 }
2759 
2760 
2761 /**
2762  * Translate a TGSI property to VGPU10.
2763  * Don't emit any instructions yet, only need to gather the primitive property
2764  * information.  The output primitive topology might be changed later. The
2765  * final property instructions will be emitted as part of the pre-helper code.
2766  */
2767 static boolean
emit_vgpu10_property(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_property * prop)2768 emit_vgpu10_property(struct svga_shader_emitter_v10 *emit,
2769                      const struct tgsi_full_property *prop)
2770 {
2771    static const VGPU10_PRIMITIVE primType[] = {
2772       VGPU10_PRIMITIVE_POINT,           /* PIPE_PRIM_POINTS */
2773       VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINES */
2774       VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINE_LOOP */
2775       VGPU10_PRIMITIVE_LINE,            /* PIPE_PRIM_LINE_STRIP */
2776       VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLES */
2777       VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLE_STRIP */
2778       VGPU10_PRIMITIVE_TRIANGLE,        /* PIPE_PRIM_TRIANGLE_FAN */
2779       VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_QUADS */
2780       VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_QUAD_STRIP */
2781       VGPU10_PRIMITIVE_UNDEFINED,       /* PIPE_PRIM_POLYGON */
2782       VGPU10_PRIMITIVE_LINE_ADJ,        /* PIPE_PRIM_LINES_ADJACENCY */
2783       VGPU10_PRIMITIVE_LINE_ADJ,        /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
2784       VGPU10_PRIMITIVE_TRIANGLE_ADJ,    /* PIPE_PRIM_TRIANGLES_ADJACENCY */
2785       VGPU10_PRIMITIVE_TRIANGLE_ADJ     /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
2786    };
2787 
2788    static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = {
2789       VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST,     /* PIPE_PRIM_POINTS */
2790       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* PIPE_PRIM_LINES */
2791       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST,      /* PIPE_PRIM_LINE_LOOP */
2792       VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP,     /* PIPE_PRIM_LINE_STRIP */
2793       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST,  /* PIPE_PRIM_TRIANGLES */
2794       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */
2795       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */
2796       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_QUADS */
2797       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_QUAD_STRIP */
2798       VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED,     /* PIPE_PRIM_POLYGON */
2799       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* PIPE_PRIM_LINES_ADJACENCY */
2800       VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ,  /* PIPE_PRIM_LINE_STRIP_ADJACENCY */
2801       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */
2802       VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */
2803    };
2804 
2805    static const unsigned inputArraySize[] = {
2806       0,       /* VGPU10_PRIMITIVE_UNDEFINED */
2807       1,       /* VGPU10_PRIMITIVE_POINT */
2808       2,       /* VGPU10_PRIMITIVE_LINE */
2809       3,       /* VGPU10_PRIMITIVE_TRIANGLE */
2810       0,
2811       0,
2812       4,       /* VGPU10_PRIMITIVE_LINE_ADJ */
2813       6        /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */
2814    };
2815 
2816    switch (prop->Property.PropertyName) {
2817    case TGSI_PROPERTY_GS_INPUT_PRIM:
2818       assert(prop->u[0].Data < ARRAY_SIZE(primType));
2819       emit->gs.prim_type = primType[prop->u[0].Data];
2820       assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED);
2821       emit->gs.input_size = inputArraySize[emit->gs.prim_type];
2822       break;
2823 
2824    case TGSI_PROPERTY_GS_OUTPUT_PRIM:
2825       assert(prop->u[0].Data < ARRAY_SIZE(primTopology));
2826       emit->gs.prim_topology = primTopology[prop->u[0].Data];
2827       assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED);
2828       break;
2829 
2830    case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
2831       emit->gs.max_out_vertices = prop->u[0].Data;
2832       break;
2833 
2834    case TGSI_PROPERTY_GS_INVOCATIONS:
2835       emit->gs.invocations = prop->u[0].Data;
2836       break;
2837 
2838    case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
2839    case TGSI_PROPERTY_NEXT_SHADER:
2840    case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
2841       /* no-op */
2842       break;
2843 
2844    case TGSI_PROPERTY_TCS_VERTICES_OUT:
2845       /* This info is already captured in the shader key */
2846       break;
2847 
2848    case TGSI_PROPERTY_TES_PRIM_MODE:
2849       emit->tes.prim_mode = prop->u[0].Data;
2850       break;
2851 
2852    case TGSI_PROPERTY_TES_SPACING:
2853       emit->tes.spacing = prop->u[0].Data;
2854       break;
2855 
2856    case TGSI_PROPERTY_TES_VERTEX_ORDER_CW:
2857       emit->tes.vertices_order_cw = prop->u[0].Data;
2858       break;
2859 
2860    case TGSI_PROPERTY_TES_POINT_MODE:
2861       emit->tes.point_mode = prop->u[0].Data;
2862       break;
2863 
2864    default:
2865       debug_printf("Unexpected TGSI property %s\n",
2866                    tgsi_property_names[prop->Property.PropertyName]);
2867    }
2868 
2869    return TRUE;
2870 }
2871 
2872 
2873 static void
emit_property_instruction(struct svga_shader_emitter_v10 * emit,VGPU10OpcodeToken0 opcode0,unsigned nData,unsigned data)2874 emit_property_instruction(struct svga_shader_emitter_v10 *emit,
2875                           VGPU10OpcodeToken0 opcode0, unsigned nData,
2876                           unsigned data)
2877 {
2878    begin_emit_instruction(emit);
2879    emit_dword(emit, opcode0.value);
2880    if (nData)
2881       emit_dword(emit, data);
2882    end_emit_instruction(emit);
2883 }
2884 
2885 
2886 /**
2887  * Emit property instructions
2888  */
2889 static void
emit_property_instructions(struct svga_shader_emitter_v10 * emit)2890 emit_property_instructions(struct svga_shader_emitter_v10 *emit)
2891 {
2892    VGPU10OpcodeToken0 opcode0;
2893 
2894    assert(emit->unit == PIPE_SHADER_GEOMETRY);
2895 
2896    /* emit input primitive type declaration */
2897    opcode0.value = 0;
2898    opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE;
2899    opcode0.primitive = emit->gs.prim_type;
2900    emit_property_instruction(emit, opcode0, 0, 0);
2901 
2902    /* emit max output vertices */
2903    opcode0.value = 0;
2904    opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT;
2905    emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices);
2906 
2907    if (emit->version >= 50 && emit->gs.invocations > 0) {
2908       opcode0.value = 0;
2909       opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT;
2910       emit_property_instruction(emit, opcode0, 1, emit->gs.invocations);
2911    }
2912 }
2913 
2914 
2915 /**
2916  * A helper function to declare tessellator domain in a hull shader or
2917  * in the domain shader.
2918  */
2919 static void
emit_tessellator_domain(struct svga_shader_emitter_v10 * emit,enum pipe_prim_type prim_mode)2920 emit_tessellator_domain(struct svga_shader_emitter_v10 *emit,
2921                         enum pipe_prim_type prim_mode)
2922 {
2923    VGPU10OpcodeToken0 opcode0;
2924 
2925    opcode0.value = 0;
2926    opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_DOMAIN;
2927    switch (prim_mode) {
2928    case PIPE_PRIM_QUADS:
2929    case PIPE_PRIM_LINES:
2930       opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_QUAD;
2931       break;
2932    case PIPE_PRIM_TRIANGLES:
2933       opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_TRI;
2934       break;
2935    default:
2936       debug_printf("Invalid tessellator prim mode %d\n", prim_mode);
2937       opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_UNDEFINED;
2938    }
2939    begin_emit_instruction(emit);
2940    emit_dword(emit, opcode0.value);
2941    end_emit_instruction(emit);
2942 }
2943 
2944 
2945 /**
2946  * Emit domain shader declarations.
2947  */
2948 static void
emit_domain_shader_declarations(struct svga_shader_emitter_v10 * emit)2949 emit_domain_shader_declarations(struct svga_shader_emitter_v10 *emit)
2950 {
2951    VGPU10OpcodeToken0 opcode0;
2952 
2953    assert(emit->unit == PIPE_SHADER_TESS_EVAL);
2954 
2955    /* Emit the input control point count */
2956    assert(emit->key.tes.vertices_per_patch >= 0 &&
2957           emit->key.tes.vertices_per_patch <= 32);
2958 
2959    opcode0.value = 0;
2960    opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT;
2961    opcode0.controlPointCount = emit->key.tes.vertices_per_patch;
2962    begin_emit_instruction(emit);
2963    emit_dword(emit, opcode0.value);
2964    end_emit_instruction(emit);
2965 
2966    emit_tessellator_domain(emit, emit->tes.prim_mode);
2967 }
2968 
2969 
2970 /**
2971  * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed
2972  * to implement some instructions.  We pre-allocate those values here
2973  * in the immediate constant buffer.
2974  */
2975 static void
alloc_common_immediates(struct svga_shader_emitter_v10 * emit)2976 alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
2977 {
2978    unsigned n = 0;
2979 
2980    emit->common_immediate_pos[n++] =
2981       alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f);
2982 
2983    if (emit->info.opcode_count[TGSI_OPCODE_LIT] > 0) {
2984       emit->common_immediate_pos[n++] =
2985          alloc_immediate_float4(emit, 128.0f, -128.0f, 0.0f, 0.0f);
2986    }
2987 
2988    emit->common_immediate_pos[n++] =
2989       alloc_immediate_int4(emit, 0, 1, 0, -1);
2990 
2991    if (emit->info.opcode_count[TGSI_OPCODE_IMSB] > 0 ||
2992        emit->info.opcode_count[TGSI_OPCODE_UMSB] > 0) {
2993       emit->common_immediate_pos[n++] =
2994          alloc_immediate_int4(emit, 31, 0, 0, 0);
2995    }
2996 
2997    if (emit->info.opcode_count[TGSI_OPCODE_UBFE] > 0 ||
2998        emit->info.opcode_count[TGSI_OPCODE_IBFE] > 0 ||
2999        emit->info.opcode_count[TGSI_OPCODE_BFI] > 0) {
3000       emit->common_immediate_pos[n++] =
3001          alloc_immediate_int4(emit, 32, 0, 0, 0);
3002    }
3003 
3004    if (emit->key.vs.attrib_puint_to_snorm) {
3005       emit->common_immediate_pos[n++] =
3006          alloc_immediate_float4(emit, -2.0f, 2.0f, 3.0f, -1.66666f);
3007    }
3008 
3009    if (emit->key.vs.attrib_puint_to_uscaled) {
3010       emit->common_immediate_pos[n++] =
3011          alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f);
3012    }
3013 
3014    if (emit->key.vs.attrib_puint_to_sscaled) {
3015       emit->common_immediate_pos[n++] =
3016          alloc_immediate_int4(emit, 22, 12, 2, 0);
3017 
3018       emit->common_immediate_pos[n++] =
3019          alloc_immediate_int4(emit, 22, 30, 0, 0);
3020    }
3021 
3022    if (emit->vposition.num_prescale > 1) {
3023       unsigned i;
3024       for (i = 0; i < emit->vposition.num_prescale; i+=4) {
3025          emit->common_immediate_pos[n++] =
3026             alloc_immediate_int4(emit, i, i+1, i+2, i+3);
3027       }
3028    }
3029 
3030    emit->immediates_dbl = (double (*)[2]) emit->immediates;
3031 
3032    if (emit->info.opcode_count[TGSI_OPCODE_DNEG] > 0) {
3033       emit->common_immediate_pos[n++] =
3034          alloc_immediate_double2(emit, -1.0, -1.0);
3035    }
3036 
3037    if (emit->info.opcode_count[TGSI_OPCODE_DSQRT] > 0) {
3038       emit->common_immediate_pos[n++] =
3039          alloc_immediate_double2(emit, 0.0, 0.0);
3040       emit->common_immediate_pos[n++] =
3041          alloc_immediate_double2(emit, 1.0, 1.0);
3042    }
3043 
3044    if (emit->info.opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0) {
3045       emit->common_immediate_pos[n++] =
3046          alloc_immediate_float4(emit, 16.0f, -16.0f, 0.0, 0.0);
3047    }
3048 
3049    assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
3050 
3051    unsigned i;
3052 
3053    for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
3054       if (emit->key.tex[i].texel_bias) {
3055          /* Replace 0.0f if more immediate float value is needed */
3056          emit->common_immediate_pos[n++] =
3057             alloc_immediate_float4(emit, 0.0001f, 0.0f, 0.0f, 0.0f);
3058          break;
3059       }
3060    }
3061 
3062    assert(n <= ARRAY_SIZE(emit->common_immediate_pos));
3063    emit->num_common_immediates = n;
3064 }
3065 
3066 
3067 /**
3068  * Emit hull shader declarations.
3069 */
3070 static void
emit_hull_shader_declarations(struct svga_shader_emitter_v10 * emit)3071 emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit)
3072 {
3073    VGPU10OpcodeToken0 opcode0;
3074 
3075    /* Emit the input control point count */
3076    assert(emit->key.tcs.vertices_per_patch > 0 &&
3077           emit->key.tcs.vertices_per_patch <= 32);
3078 
3079    opcode0.value = 0;
3080    opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT;
3081    opcode0.controlPointCount = emit->key.tcs.vertices_per_patch;
3082    begin_emit_instruction(emit);
3083    emit_dword(emit, opcode0.value);
3084    end_emit_instruction(emit);
3085 
3086    /* Emit the output control point count */
3087    assert(emit->key.tcs.vertices_out >= 0 && emit->key.tcs.vertices_out <= 32);
3088 
3089    opcode0.value = 0;
3090    opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT;
3091    opcode0.controlPointCount = emit->key.tcs.vertices_out;
3092    begin_emit_instruction(emit);
3093    emit_dword(emit, opcode0.value);
3094    end_emit_instruction(emit);
3095 
3096    /* Emit tessellator domain */
3097    emit_tessellator_domain(emit, emit->key.tcs.prim_mode);
3098 
3099    /* Emit tessellator output primitive */
3100    opcode0.value = 0;
3101    opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE;
3102    if (emit->key.tcs.point_mode) {
3103       opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_POINT;
3104    }
3105    else if (emit->key.tcs.prim_mode == PIPE_PRIM_LINES) {
3106       opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_LINE;
3107    }
3108    else {
3109       assert(emit->key.tcs.prim_mode == PIPE_PRIM_QUADS ||
3110              emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES);
3111 
3112       if (emit->key.tcs.vertices_order_cw)
3113          opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CCW;
3114       else
3115          opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CW;
3116    }
3117    begin_emit_instruction(emit);
3118    emit_dword(emit, opcode0.value);
3119    end_emit_instruction(emit);
3120 
3121    /* Emit tessellator partitioning */
3122    opcode0.value = 0;
3123    opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_PARTITIONING;
3124    switch (emit->key.tcs.spacing) {
3125    case PIPE_TESS_SPACING_FRACTIONAL_ODD:
3126       opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD;
3127       break;
3128    case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
3129       opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN;
3130       break;
3131    case PIPE_TESS_SPACING_EQUAL:
3132       opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_INTEGER;
3133       break;
3134    default:
3135       debug_printf("invalid tessellator spacing %d\n", emit->key.tcs.spacing);
3136       opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_UNDEFINED;
3137    }
3138    begin_emit_instruction(emit);
3139    emit_dword(emit, opcode0.value);
3140    end_emit_instruction(emit);
3141 
3142    /* Declare constant registers */
3143    emit_constant_declaration(emit);
3144 
3145    /* Declare samplers and resources */
3146    emit_sampler_declarations(emit);
3147    emit_resource_declarations(emit);
3148 
3149    alloc_common_immediates(emit);
3150 
3151    int nVertices = emit->key.tcs.vertices_per_patch;
3152    emit->tcs.imm_index =
3153       alloc_immediate_int4(emit, nVertices, nVertices, nVertices, 0);
3154 
3155    /* Now, emit the constant block containing all the immediates
3156     * declared by shader, as well as the extra ones seen above.
3157     */
3158    emit_vgpu10_immediates_block(emit);
3159 
3160 }
3161 
3162 
3163 /**
3164  * A helper function to determine if control point phase is needed.
3165  * Returns TRUE if there is control point output.
3166  */
3167 static boolean
needs_control_point_phase(struct svga_shader_emitter_v10 * emit)3168 needs_control_point_phase(struct svga_shader_emitter_v10 *emit)
3169 {
3170    unsigned i;
3171 
3172    assert(emit->unit == PIPE_SHADER_TESS_CTRL);
3173 
3174    /* If output control point count does not match the input count,
3175     * we need a control point phase to explicitly set the output control
3176     * points.
3177     */
3178    if ((emit->key.tcs.vertices_per_patch != emit->key.tcs.vertices_out) &&
3179        emit->key.tcs.vertices_out)
3180       return TRUE;
3181 
3182    for (i = 0; i < emit->info.num_outputs; i++) {
3183       switch (emit->info.output_semantic_name[i]) {
3184       case TGSI_SEMANTIC_PATCH:
3185       case TGSI_SEMANTIC_TESSOUTER:
3186       case TGSI_SEMANTIC_TESSINNER:
3187          break;
3188       default:
3189          return TRUE;
3190       }
3191    }
3192    return FALSE;
3193 }
3194 
3195 
3196 /**
3197  * A helper function to add shader signature for passthrough control point
3198  * phase. This signature is also generated for passthrough control point
3199  * phase from HLSL compiler and is needed by Metal Renderer.
3200  */
3201 static void
emit_passthrough_control_point_signature(struct svga_shader_emitter_v10 * emit)3202 emit_passthrough_control_point_signature(struct svga_shader_emitter_v10 *emit)
3203 {
3204    struct svga_shader_signature *sgn = &emit->signature;
3205    SVGA3dDXShaderSignatureEntry *sgnEntry;
3206    unsigned i;
3207 
3208    for (i = 0; i < emit->info.num_inputs; i++) {
3209       unsigned index = emit->linkage.input_map[i];
3210       enum tgsi_semantic sem_name = emit->info.input_semantic_name[i];
3211 
3212       sgnEntry = &sgn->inputs[sgn->header.numInputSignatures++];
3213 
3214       set_shader_signature_entry(sgnEntry, index,
3215                                  tgsi_semantic_to_sgn_name[sem_name],
3216                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3217                                  SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3218                                  SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3219 
3220       sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++];
3221 
3222       set_shader_signature_entry(sgnEntry, i,
3223                                  tgsi_semantic_to_sgn_name[sem_name],
3224                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3225                                  SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3226                                  SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3227    }
3228 }
3229 
3230 
3231 /**
3232  * A helper function to emit an instruction to start the control point phase
3233  * in the hull shader.
3234  */
3235 static void
emit_control_point_phase_instruction(struct svga_shader_emitter_v10 * emit)3236 emit_control_point_phase_instruction(struct svga_shader_emitter_v10 *emit)
3237 {
3238    VGPU10OpcodeToken0 opcode0;
3239 
3240    opcode0.value = 0;
3241    opcode0.opcodeType = VGPU10_OPCODE_HS_CONTROL_POINT_PHASE;
3242    begin_emit_instruction(emit);
3243    emit_dword(emit, opcode0.value);
3244    end_emit_instruction(emit);
3245 }
3246 
3247 
3248 /**
3249  * Start the hull shader control point phase
3250  */
3251 static boolean
emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 * emit)3252 emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit)
3253 {
3254    /* If there is no control point output, skip the control point phase. */
3255    if (!needs_control_point_phase(emit)) {
3256       if (!emit->key.tcs.vertices_out) {
3257          /**
3258           * If the tcs does not explicitly generate any control point output
3259           * and the tes does not use any input control point, then
3260           * emit an empty control point phase with zero output control
3261           * point count.
3262           */
3263          emit_control_point_phase_instruction(emit);
3264 
3265          /**
3266           * Since this is an empty control point phase, we will need to
3267           * add input signatures when we parse the tcs again in the
3268           * patch constant phase.
3269           */
3270          emit->tcs.fork_phase_add_signature = TRUE;
3271       }
3272       else {
3273          /**
3274           * Before skipping the control point phase, add the signature for
3275           * the passthrough control point.
3276           */
3277          emit_passthrough_control_point_signature(emit);
3278       }
3279       return FALSE;
3280    }
3281 
3282    /* Start the control point phase in the hull shader */
3283    emit_control_point_phase_instruction(emit);
3284 
3285    /* Declare the output control point ID */
3286    if (emit->tcs.invocation_id_sys_index == INVALID_INDEX) {
3287       /* Add invocation id declaration if it does not exist */
3288       emit->tcs.invocation_id_sys_index = emit->info.num_system_values + 1;
3289    }
3290 
3291    emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3292                           VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID,
3293                           VGPU10_OPERAND_INDEX_0D,
3294                           0, 1,
3295                           VGPU10_NAME_UNDEFINED,
3296                           VGPU10_OPERAND_0_COMPONENT, 0,
3297                           0,
3298                           VGPU10_INTERPOLATION_CONSTANT, TRUE,
3299                           SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
3300 
3301    if (emit->tcs.prim_id_index != INVALID_INDEX) {
3302       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3303                              VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
3304                              VGPU10_OPERAND_INDEX_0D,
3305                              0, 1,
3306                              VGPU10_NAME_UNDEFINED,
3307                              VGPU10_OPERAND_0_COMPONENT,
3308                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
3309                              0,
3310                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
3311                              SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
3312    }
3313 
3314    return TRUE;
3315 }
3316 
3317 
3318 /**
3319  * Start the hull shader patch constant phase and
3320  * do the second pass of the tcs translation and emit
3321  * the relevant declarations and instructions for this phase.
3322  */
3323 static boolean
emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 * emit,struct tgsi_parse_context * parse)3324 emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 *emit,
3325                                       struct tgsi_parse_context *parse)
3326 {
3327    unsigned inst_number = 0;
3328    boolean ret = TRUE;
3329    VGPU10OpcodeToken0 opcode0;
3330 
3331    emit->skip_instruction = FALSE;
3332 
3333    /* Start the patch constant phase */
3334    opcode0.value = 0;
3335    opcode0.opcodeType = VGPU10_OPCODE_HS_FORK_PHASE;
3336    begin_emit_instruction(emit);
3337    emit_dword(emit, opcode0.value);
3338    end_emit_instruction(emit);
3339 
3340    /* Set the current phase to patch constant phase */
3341    emit->tcs.control_point_phase = FALSE;
3342 
3343    if (emit->tcs.prim_id_index != INVALID_INDEX) {
3344       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
3345                              VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
3346                              VGPU10_OPERAND_INDEX_0D,
3347                              0, 1,
3348                              VGPU10_NAME_UNDEFINED,
3349                              VGPU10_OPERAND_0_COMPONENT,
3350                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
3351                              0,
3352                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
3353                              SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID);
3354    }
3355 
3356    /* Emit declarations for this phase */
3357    emit->index_range.required =
3358       emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE;
3359    emit_tcs_input_declarations(emit);
3360 
3361    if (emit->index_range.start_index != INVALID_INDEX) {
3362       emit_index_range_declaration(emit);
3363    }
3364 
3365    emit->index_range.required =
3366       emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE;
3367    emit_tcs_output_declarations(emit);
3368 
3369    if (emit->index_range.start_index != INVALID_INDEX) {
3370       emit_index_range_declaration(emit);
3371    }
3372    emit->index_range.required = FALSE;
3373 
3374    emit_temporaries_declaration(emit);
3375 
3376    /* Reset the token position to the first instruction token
3377     * in preparation for the second pass of the shader
3378     */
3379    parse->Position = emit->tcs.instruction_token_pos;
3380 
3381    while (!tgsi_parse_end_of_tokens(parse)) {
3382       tgsi_parse_token(parse);
3383 
3384       assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
3385       ret = emit_vgpu10_instruction(emit, inst_number++,
3386                                     &parse->FullToken.FullInstruction);
3387 
3388       /* Usually this applies to TCS only. If shader is reading output of
3389        * patch constant in fork phase, we should reemit all instructions
3390        * which are writting into ouput of patch constant in fork phase
3391        * to store results into temporaries.
3392        */
3393       if (emit->reemit_instruction) {
3394          assert(emit->unit == PIPE_SHADER_TESS_CTRL);
3395          ret = emit_vgpu10_instruction(emit, inst_number,
3396                                        &parse->FullToken.FullInstruction);
3397       }
3398 
3399       if (!ret)
3400          return FALSE;
3401    }
3402 
3403    return TRUE;
3404 }
3405 
3406 
3407 /**
3408  * Emit index range declaration.
3409  */
3410 static boolean
emit_index_range_declaration(struct svga_shader_emitter_v10 * emit)3411 emit_index_range_declaration(struct svga_shader_emitter_v10 *emit)
3412 {
3413    if (emit->version < 50)
3414       return TRUE;
3415 
3416    assert(emit->index_range.start_index != INVALID_INDEX);
3417    assert(emit->index_range.count != 0);
3418    assert(emit->index_range.required);
3419    assert(emit->index_range.operandType != VGPU10_NUM_OPERANDS);
3420    assert(emit->index_range.dim != 0);
3421    assert(emit->index_range.size != 0);
3422 
3423    VGPU10OpcodeToken0 opcode0;
3424    VGPU10OperandToken0 operand0;
3425 
3426    opcode0.value = 0;
3427    opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEX_RANGE;
3428 
3429    operand0.value = 0;
3430    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
3431    operand0.indexDimension = emit->index_range.dim;
3432    operand0.operandType = emit->index_range.operandType;
3433    operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
3434    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3435 
3436    if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D)
3437       operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3438 
3439    begin_emit_instruction(emit);
3440    emit_dword(emit, opcode0.value);
3441    emit_dword(emit, operand0.value);
3442 
3443    if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D) {
3444       emit_dword(emit, emit->index_range.size);
3445       emit_dword(emit, emit->index_range.start_index);
3446       emit_dword(emit, emit->index_range.count);
3447    }
3448    else {
3449       emit_dword(emit, emit->index_range.start_index);
3450       emit_dword(emit, emit->index_range.count);
3451    }
3452 
3453    end_emit_instruction(emit);
3454 
3455    /* Reset fields in emit->index_range struct except
3456     * emit->index_range.required which will be reset afterwards
3457     */
3458    emit->index_range.count = 0;
3459    emit->index_range.operandType = VGPU10_NUM_OPERANDS;
3460    emit->index_range.start_index = INVALID_INDEX;
3461    emit->index_range.size = 0;
3462    emit->index_range.dim = 0;
3463 
3464    return TRUE;
3465 }
3466 
3467 
3468 /**
3469  * Emit a vgpu10 declaration "instruction".
3470  * \param index  the register index
3471  * \param size   array size of the operand. In most cases, it is 1,
3472  *               but for inputs to geometry shader, the array size varies
3473  *               depending on the primitive type.
3474  */
3475 static void
emit_decl_instruction(struct svga_shader_emitter_v10 * emit,VGPU10OpcodeToken0 opcode0,VGPU10OperandToken0 operand0,VGPU10NameToken name_token,unsigned index,unsigned size)3476 emit_decl_instruction(struct svga_shader_emitter_v10 *emit,
3477                       VGPU10OpcodeToken0 opcode0,
3478                       VGPU10OperandToken0 operand0,
3479                       VGPU10NameToken name_token,
3480                       unsigned index, unsigned size)
3481 {
3482    assert(opcode0.opcodeType);
3483    assert(operand0.mask ||
3484           (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT) ||
3485           (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH) ||
3486           (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK) ||
3487           (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) ||
3488           (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) ||
3489           (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID) ||
3490           (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK) ||
3491           (operand0.operandType == VGPU10_OPERAND_TYPE_STREAM));
3492 
3493    begin_emit_instruction(emit);
3494    emit_dword(emit, opcode0.value);
3495 
3496    emit_dword(emit, operand0.value);
3497 
3498    if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) {
3499       /* Next token is the index of the register to declare */
3500       emit_dword(emit, index);
3501    }
3502    else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) {
3503       /* Next token is the size of the register */
3504       emit_dword(emit, size);
3505 
3506       /* Followed by the index of the register */
3507       emit_dword(emit, index);
3508    }
3509 
3510    if (name_token.value) {
3511       emit_dword(emit, name_token.value);
3512    }
3513 
3514    end_emit_instruction(emit);
3515 }
3516 
3517 
3518 /**
3519  * Emit the declaration for a shader input.
3520  * \param opcodeType  opcode type, one of VGPU10_OPCODE_DCL_INPUTx
3521  * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x
3522  * \param dim         index dimension
3523  * \param index       the input register index
3524  * \param size        array size of the operand. In most cases, it is 1,
3525  *                    but for inputs to geometry shader, the array size varies
3526  *                    depending on the primitive type. For tessellation control
3527  *                    shader, the array size is the vertex count per patch.
3528  * \param name        one of VGPU10_NAME_x
3529  * \parma numComp     number of components
3530  * \param selMode     component selection mode
3531  * \param usageMask   bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
3532  * \param interpMode  interpolation mode
3533  */
3534 static void
emit_input_declaration(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcodeType,VGPU10_OPERAND_TYPE operandType,VGPU10_OPERAND_INDEX_DIMENSION dim,unsigned index,unsigned size,VGPU10_SYSTEM_NAME name,VGPU10_OPERAND_NUM_COMPONENTS numComp,VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,unsigned usageMask,VGPU10_INTERPOLATION_MODE interpMode,boolean addSignature,SVGA3dDXSignatureSemanticName sgnName)3535 emit_input_declaration(struct svga_shader_emitter_v10 *emit,
3536                        VGPU10_OPCODE_TYPE opcodeType,
3537                        VGPU10_OPERAND_TYPE operandType,
3538                        VGPU10_OPERAND_INDEX_DIMENSION dim,
3539                        unsigned index, unsigned size,
3540                        VGPU10_SYSTEM_NAME name,
3541                        VGPU10_OPERAND_NUM_COMPONENTS numComp,
3542                        VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode,
3543                        unsigned usageMask,
3544                        VGPU10_INTERPOLATION_MODE interpMode,
3545                        boolean addSignature,
3546                        SVGA3dDXSignatureSemanticName sgnName)
3547 {
3548    VGPU10OpcodeToken0 opcode0;
3549    VGPU10OperandToken0 operand0;
3550    VGPU10NameToken name_token;
3551 
3552    assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
3553    assert(opcodeType == VGPU10_OPCODE_DCL_INPUT ||
3554           opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV ||
3555           opcodeType == VGPU10_OPCODE_DCL_INPUT_SGV ||
3556           opcodeType == VGPU10_OPCODE_DCL_INPUT_PS ||
3557           opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SIV ||
3558           opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV);
3559    assert(operandType == VGPU10_OPERAND_TYPE_INPUT ||
3560           operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID ||
3561           operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK ||
3562           operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID ||
3563           operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID ||
3564           operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT ||
3565           operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT ||
3566           operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT ||
3567           operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID ||
3568           operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID ||
3569           operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP);
3570 
3571    assert(numComp <= VGPU10_OPERAND_4_COMPONENT);
3572    assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE);
3573    assert(dim <= VGPU10_OPERAND_INDEX_3D);
3574    assert(name == VGPU10_NAME_UNDEFINED ||
3575           name == VGPU10_NAME_POSITION ||
3576           name == VGPU10_NAME_INSTANCE_ID ||
3577           name == VGPU10_NAME_VERTEX_ID ||
3578           name == VGPU10_NAME_PRIMITIVE_ID ||
3579           name == VGPU10_NAME_IS_FRONT_FACE ||
3580           name == VGPU10_NAME_SAMPLE_INDEX ||
3581           name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
3582           name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX);
3583 
3584    assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED ||
3585           interpMode == VGPU10_INTERPOLATION_CONSTANT ||
3586           interpMode == VGPU10_INTERPOLATION_LINEAR ||
3587           interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID ||
3588           interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE ||
3589           interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID ||
3590           interpMode == VGPU10_INTERPOLATION_LINEAR_SAMPLE ||
3591           interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE);
3592 
3593    check_register_index(emit, opcodeType, index);
3594 
3595    opcode0.value = operand0.value = name_token.value = 0;
3596 
3597    opcode0.opcodeType = opcodeType;
3598    opcode0.interpolationMode = interpMode;
3599 
3600    operand0.operandType = operandType;
3601    operand0.numComponents = numComp;
3602    operand0.selectionMode = selMode;
3603    operand0.mask = usageMask;
3604    operand0.indexDimension = dim;
3605    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3606    if (dim == VGPU10_OPERAND_INDEX_2D)
3607       operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3608 
3609    name_token.name = name;
3610 
3611    emit_decl_instruction(emit, opcode0, operand0, name_token, index, size);
3612 
3613    if (addSignature) {
3614       struct svga_shader_signature *sgn = &emit->signature;
3615       if (operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT) {
3616          /* Set patch constant signature */
3617          SVGA3dDXShaderSignatureEntry *sgnEntry =
3618             &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
3619          set_shader_signature_entry(sgnEntry, index,
3620                                     sgnName, usageMask,
3621                                     SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3622                                     SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3623 
3624       } else if (operandType == VGPU10_OPERAND_TYPE_INPUT ||
3625                  operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT) {
3626          /* Set input signature */
3627          SVGA3dDXShaderSignatureEntry *sgnEntry =
3628             &sgn->inputs[sgn->header.numInputSignatures++];
3629          set_shader_signature_entry(sgnEntry, index,
3630                                     sgnName, usageMask,
3631                                     SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3632                                     SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3633       }
3634    }
3635 
3636    if (emit->index_range.required) {
3637       /* Here, index_range declaration is only applicable for opcodeType
3638        * VGPU10_OPCODE_DCL_INPUT and VGPU10_OPCODE_DCL_INPUT_PS and
3639        * for operandType VGPU10_OPERAND_TYPE_INPUT,
3640        * VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT and
3641        * VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT.
3642        */
3643       if ((opcodeType != VGPU10_OPCODE_DCL_INPUT &&
3644            opcodeType != VGPU10_OPCODE_DCL_INPUT_PS) ||
3645           (operandType != VGPU10_OPERAND_TYPE_INPUT &&
3646            operandType != VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT &&
3647            operandType != VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT)) {
3648          if (emit->index_range.start_index != INVALID_INDEX) {
3649             emit_index_range_declaration(emit);
3650          }
3651          return;
3652       }
3653 
3654       if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) {
3655          /* Need record new index_range */
3656          emit->index_range.count = 1;
3657          emit->index_range.operandType = operandType;
3658          emit->index_range.start_index = index;
3659          emit->index_range.size = size;
3660          emit->index_range.dim = dim;
3661       }
3662       else if (index !=
3663                (emit->index_range.start_index + emit->index_range.count) ||
3664                emit->index_range.operandType != operandType) {
3665          /* Input index is not contiguous with index range or operandType is
3666           * different from index range's operandType. We need to emit current
3667           * index_range first and then start recording next index range.
3668           */
3669          emit_index_range_declaration(emit);
3670 
3671          emit->index_range.count = 1;
3672          emit->index_range.operandType = operandType;
3673          emit->index_range.start_index = index;
3674          emit->index_range.size = size;
3675          emit->index_range.dim = dim;
3676       }
3677       else if (emit->index_range.operandType == operandType) {
3678          /* Since input index is contiguous with index range and operandType
3679           * is same as index range's operandType, increment index range count.
3680           */
3681          emit->index_range.count++;
3682       }
3683    }
3684 }
3685 
3686 
3687 /**
3688  * Emit the declaration for a shader output.
3689  * \param type  one of VGPU10_OPCODE_DCL_OUTPUTx
3690  * \param index  the output register index
3691  * \param name  one of VGPU10_NAME_x
3692  * \param usageMask  bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values
3693  */
3694 static void
emit_output_declaration(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE type,unsigned index,VGPU10_SYSTEM_NAME name,unsigned writemask,boolean addSignature,SVGA3dDXSignatureSemanticName sgnName)3695 emit_output_declaration(struct svga_shader_emitter_v10 *emit,
3696                         VGPU10_OPCODE_TYPE type, unsigned index,
3697                         VGPU10_SYSTEM_NAME name,
3698                         unsigned writemask,
3699                         boolean addSignature,
3700                         SVGA3dDXSignatureSemanticName sgnName)
3701 {
3702    VGPU10OpcodeToken0 opcode0;
3703    VGPU10OperandToken0 operand0;
3704    VGPU10NameToken name_token;
3705 
3706    assert(writemask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL);
3707    assert(type == VGPU10_OPCODE_DCL_OUTPUT ||
3708           type == VGPU10_OPCODE_DCL_OUTPUT_SGV ||
3709           type == VGPU10_OPCODE_DCL_OUTPUT_SIV);
3710    assert(name == VGPU10_NAME_UNDEFINED ||
3711           name == VGPU10_NAME_POSITION ||
3712           name == VGPU10_NAME_PRIMITIVE_ID ||
3713           name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX ||
3714           name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX ||
3715           name == VGPU10_NAME_CLIP_DISTANCE);
3716 
3717    check_register_index(emit, type, index);
3718 
3719    opcode0.value = operand0.value = name_token.value = 0;
3720 
3721    opcode0.opcodeType = type;
3722    operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
3723    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
3724    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
3725    operand0.mask = writemask;
3726    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
3727    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
3728 
3729    name_token.name = name;
3730 
3731    emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
3732 
3733    /* Capture output signature */
3734    if (addSignature) {
3735       struct svga_shader_signature *sgn = &emit->signature;
3736       SVGA3dDXShaderSignatureEntry *sgnEntry =
3737          &sgn->outputs[sgn->header.numOutputSignatures++];
3738       set_shader_signature_entry(sgnEntry, index,
3739                                  sgnName, writemask,
3740                                  SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
3741                                  SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
3742    }
3743 
3744    if (emit->index_range.required) {
3745       /* Here, index_range declaration is only applicable for opcodeType
3746        * VGPU10_OPCODE_DCL_OUTPUT and for operandType
3747        * VGPU10_OPERAND_TYPE_OUTPUT.
3748        */
3749       if (type != VGPU10_OPCODE_DCL_OUTPUT) {
3750          if (emit->index_range.start_index != INVALID_INDEX) {
3751             emit_index_range_declaration(emit);
3752          }
3753          return;
3754       }
3755 
3756       if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) {
3757          /* Need record new index_range */
3758          emit->index_range.count = 1;
3759          emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
3760          emit->index_range.start_index = index;
3761          emit->index_range.size = 1;
3762          emit->index_range.dim = VGPU10_OPERAND_INDEX_1D;
3763       }
3764       else if (index !=
3765                (emit->index_range.start_index + emit->index_range.count)) {
3766          /* Output index is not contiguous with index range. We need to
3767           * emit current index_range first and then start recording next
3768           * index range.
3769           */
3770          emit_index_range_declaration(emit);
3771 
3772          emit->index_range.count = 1;
3773          emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT;
3774          emit->index_range.start_index = index;
3775          emit->index_range.size = 1;
3776          emit->index_range.dim = VGPU10_OPERAND_INDEX_1D;
3777       }
3778       else {
3779          /* Since output index is contiguous with index range, increment
3780           * index range count.
3781           */
3782          emit->index_range.count++;
3783       }
3784    }
3785 }
3786 
3787 
3788 /**
3789  * Emit the declaration for the fragment depth output.
3790  */
3791 static void
emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 * emit)3792 emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit)
3793 {
3794    VGPU10OpcodeToken0 opcode0;
3795    VGPU10OperandToken0 operand0;
3796    VGPU10NameToken name_token;
3797 
3798    assert(emit->unit == PIPE_SHADER_FRAGMENT);
3799 
3800    opcode0.value = operand0.value = name_token.value = 0;
3801 
3802    opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
3803    operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH;
3804    operand0.numComponents = VGPU10_OPERAND_1_COMPONENT;
3805    operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
3806    operand0.mask = 0;
3807 
3808    emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
3809 }
3810 
3811 
3812 /**
3813  * Emit the declaration for the fragment sample mask/coverage output.
3814  */
3815 static void
emit_samplemask_output_declaration(struct svga_shader_emitter_v10 * emit)3816 emit_samplemask_output_declaration(struct svga_shader_emitter_v10 *emit)
3817 {
3818    VGPU10OpcodeToken0 opcode0;
3819    VGPU10OperandToken0 operand0;
3820    VGPU10NameToken name_token;
3821 
3822    assert(emit->unit == PIPE_SHADER_FRAGMENT);
3823    assert(emit->version >= 41);
3824 
3825    opcode0.value = operand0.value = name_token.value = 0;
3826 
3827    opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT;
3828    operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK;
3829    operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
3830    operand0.indexDimension = VGPU10_OPERAND_INDEX_0D;
3831    operand0.mask = 0;
3832 
3833    emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1);
3834 }
3835 
3836 
3837 /**
3838  * Emit output declarations for fragment shader.
3839  */
3840 static void
emit_fs_output_declarations(struct svga_shader_emitter_v10 * emit)3841 emit_fs_output_declarations(struct svga_shader_emitter_v10 *emit)
3842 {
3843    unsigned int i;
3844 
3845    for (i = 0; i < emit->info.num_outputs; i++) {
3846       /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/
3847       const enum tgsi_semantic semantic_name =
3848          emit->info.output_semantic_name[i];
3849       const unsigned semantic_index = emit->info.output_semantic_index[i];
3850       unsigned index = i;
3851 
3852       if (semantic_name == TGSI_SEMANTIC_COLOR) {
3853          assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index));
3854 
3855          emit->fs.color_out_index[semantic_index] = index;
3856 
3857          emit->fs.num_color_outputs = MAX2(emit->fs.num_color_outputs,
3858                                               index + 1);
3859 
3860          /* The semantic index is the shader's color output/buffer index */
3861          emit_output_declaration(emit,
3862                                  VGPU10_OPCODE_DCL_OUTPUT, semantic_index,
3863                                  VGPU10_NAME_UNDEFINED,
3864                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3865                                  TRUE,
3866                                  map_tgsi_semantic_to_sgn_name(semantic_name));
3867 
3868          if (semantic_index == 0) {
3869             if (emit->key.fs.write_color0_to_n_cbufs > 1) {
3870                /* Emit declarations for the additional color outputs
3871                 * for broadcasting.
3872                 */
3873                unsigned j;
3874                for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) {
3875                   /* Allocate a new output index */
3876                   unsigned idx = emit->info.num_outputs + j - 1;
3877                   emit->fs.color_out_index[j] = idx;
3878                   emit_output_declaration(emit,
3879                                         VGPU10_OPCODE_DCL_OUTPUT, idx,
3880                                         VGPU10_NAME_UNDEFINED,
3881                                         VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
3882                                         TRUE,
3883                                         map_tgsi_semantic_to_sgn_name(semantic_name));
3884                   emit->info.output_semantic_index[idx] = j;
3885                }
3886 
3887                emit->fs.num_color_outputs =
3888                      emit->key.fs.write_color0_to_n_cbufs;
3889             }
3890          }
3891       }
3892       else if (semantic_name == TGSI_SEMANTIC_POSITION) {
3893          /* Fragment depth output */
3894          emit_fragdepth_output_declaration(emit);
3895       }
3896       else if (semantic_name == TGSI_SEMANTIC_SAMPLEMASK) {
3897          /* Sample mask output */
3898          emit_samplemask_output_declaration(emit);
3899       }
3900       else {
3901          assert(!"Bad output semantic name");
3902       }
3903    }
3904 }
3905 
3906 
3907 /**
3908  * Emit common output declaration for vertex processing.
3909  */
3910 static void
emit_vertex_output_declaration(struct svga_shader_emitter_v10 * emit,unsigned index,unsigned writemask,boolean addSignature)3911 emit_vertex_output_declaration(struct svga_shader_emitter_v10 *emit,
3912                                unsigned index, unsigned writemask,
3913                                boolean addSignature)
3914 {
3915    const enum tgsi_semantic semantic_name =
3916          emit->info.output_semantic_name[index];
3917    const unsigned semantic_index = emit->info.output_semantic_index[index];
3918    unsigned name, type;
3919    unsigned final_mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
3920 
3921    assert(emit->unit != PIPE_SHADER_FRAGMENT &&
3922           emit->unit != PIPE_SHADER_COMPUTE);
3923 
3924    switch (semantic_name) {
3925    case TGSI_SEMANTIC_POSITION:
3926       if (emit->unit == PIPE_SHADER_TESS_CTRL) {
3927          /* position will be declared in control point only */
3928          assert(emit->tcs.control_point_phase);
3929          type = VGPU10_OPCODE_DCL_OUTPUT;
3930          name = VGPU10_NAME_UNDEFINED;
3931          emit_output_declaration(emit, type, index, name, final_mask, TRUE,
3932                                  SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
3933          return;
3934       }
3935       else {
3936          type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
3937          name = VGPU10_NAME_POSITION;
3938       }
3939       /* Save the index of the vertex position output register */
3940       emit->vposition.out_index = index;
3941       break;
3942    case TGSI_SEMANTIC_CLIPDIST:
3943       type = VGPU10_OPCODE_DCL_OUTPUT_SIV;
3944       name = VGPU10_NAME_CLIP_DISTANCE;
3945       /* save the starting index of the clip distance output register */
3946       if (semantic_index == 0)
3947          emit->clip_dist_out_index = index;
3948       final_mask = apply_clip_plane_mask(emit, writemask, semantic_index);
3949       if (final_mask == 0x0)
3950          return; /* discard this do-nothing declaration */
3951       break;
3952    case TGSI_SEMANTIC_CLIPVERTEX:
3953       type = VGPU10_OPCODE_DCL_OUTPUT;
3954       name = VGPU10_NAME_UNDEFINED;
3955       emit->clip_vertex_out_index = index;
3956       break;
3957    default:
3958       /* generic output */
3959       type = VGPU10_OPCODE_DCL_OUTPUT;
3960       name = VGPU10_NAME_UNDEFINED;
3961    }
3962 
3963    emit_output_declaration(emit, type, index, name, final_mask, addSignature,
3964                            map_tgsi_semantic_to_sgn_name(semantic_name));
3965 }
3966 
3967 
3968 /**
3969  * Emit declaration for outputs in vertex shader.
3970  */
3971 static void
emit_vs_output_declarations(struct svga_shader_emitter_v10 * emit)3972 emit_vs_output_declarations(struct svga_shader_emitter_v10 *emit)
3973 {
3974    unsigned i;
3975    for (i = 0; i < emit->info.num_outputs; i++) {
3976       emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE);
3977    }
3978 }
3979 
3980 
3981 /**
3982  * A helper function to determine the writemask for an output
3983  * for the specified stream.
3984  */
3985 static unsigned
output_writemask_for_stream(unsigned stream,ubyte output_streams,ubyte output_usagemask)3986 output_writemask_for_stream(unsigned stream, ubyte output_streams,
3987                                  ubyte output_usagemask)
3988 {
3989    unsigned i;
3990    unsigned writemask = 0;
3991 
3992    for (i = 0; i < 4; i++) {
3993       if ((output_streams & 0x3) == stream)
3994          writemask |= (VGPU10_OPERAND_4_COMPONENT_MASK_X << i);
3995       output_streams >>= 2;
3996    }
3997    return writemask & output_usagemask;
3998 }
3999 
4000 
4001 /**
4002  * Emit declaration for outputs in geometry shader.
4003  */
4004 static void
emit_gs_output_declarations(struct svga_shader_emitter_v10 * emit)4005 emit_gs_output_declarations(struct svga_shader_emitter_v10 *emit)
4006 {
4007    unsigned i;
4008    VGPU10OpcodeToken0 opcode0;
4009    unsigned numStreamsSupported = 1;
4010    int s;
4011 
4012    if (emit->version >= 50) {
4013       numStreamsSupported = ARRAY_SIZE(emit->info.num_stream_output_components);
4014    }
4015 
4016    /**
4017     * Start emitting from the last stream first, so we end with
4018     * stream 0, so any of the auxiliary output declarations will
4019     * go to stream 0.
4020     */
4021    for (s = numStreamsSupported-1; s >= 0; s--) {
4022 
4023       if (emit->info.num_stream_output_components[s] == 0)
4024          continue;
4025 
4026       if (emit->version >= 50) {
4027          /* DCL_STREAM stream */
4028          begin_emit_instruction(emit);
4029          emit_opcode(emit, VGPU10_OPCODE_DCL_STREAM, FALSE);
4030          emit_stream_register(emit, s);
4031          end_emit_instruction(emit);
4032       }
4033 
4034       /* emit output primitive topology declaration */
4035       opcode0.value = 0;
4036       opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY;
4037       opcode0.primitiveTopology = emit->gs.prim_topology;
4038       emit_property_instruction(emit, opcode0, 0, 0);
4039 
4040       for (i = 0; i < emit->info.num_outputs; i++) {
4041          unsigned writemask;
4042 
4043          /* find out the writemask for this stream */
4044          writemask = output_writemask_for_stream(s, emit->info.output_streams[i],
4045                                                  emit->output_usage_mask[i]);
4046 
4047          if (writemask) {
4048             enum tgsi_semantic semantic_name =
4049                emit->info.output_semantic_name[i];
4050 
4051             /* TODO: Still need to take care of a special case where a
4052              *       single varying spans across multiple output registers.
4053              */
4054             switch(semantic_name) {
4055             case TGSI_SEMANTIC_PRIMID:
4056                emit_output_declaration(emit,
4057                                        VGPU10_OPCODE_DCL_OUTPUT_SGV, i,
4058                                        VGPU10_NAME_PRIMITIVE_ID,
4059                                        VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4060                                        FALSE,
4061                                        map_tgsi_semantic_to_sgn_name(semantic_name));
4062                break;
4063             case TGSI_SEMANTIC_LAYER:
4064                emit_output_declaration(emit,
4065                                        VGPU10_OPCODE_DCL_OUTPUT_SIV, i,
4066                                        VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX,
4067                                        VGPU10_OPERAND_4_COMPONENT_MASK_X,
4068                                        FALSE,
4069                                        map_tgsi_semantic_to_sgn_name(semantic_name));
4070                break;
4071             case TGSI_SEMANTIC_VIEWPORT_INDEX:
4072                emit_output_declaration(emit,
4073                                        VGPU10_OPCODE_DCL_OUTPUT_SIV, i,
4074                                        VGPU10_NAME_VIEWPORT_ARRAY_INDEX,
4075                                        VGPU10_OPERAND_4_COMPONENT_MASK_X,
4076                                        FALSE,
4077                                        map_tgsi_semantic_to_sgn_name(semantic_name));
4078                emit->gs.viewport_index_out_index = i;
4079                break;
4080             default:
4081                emit_vertex_output_declaration(emit, i, writemask, FALSE);
4082             }
4083          }
4084       }
4085    }
4086 
4087    /* For geometry shader outputs, it is possible the same register is
4088     * declared multiple times for different streams. So to avoid
4089     * redundant signature entries, geometry shader output signature is done
4090     * outside of the declaration.
4091     */
4092    struct svga_shader_signature *sgn = &emit->signature;
4093    SVGA3dDXShaderSignatureEntry *sgnEntry;
4094 
4095    for (i = 0; i < emit->info.num_outputs; i++) {
4096       if (emit->output_usage_mask[i]) {
4097          enum tgsi_semantic sem_name = emit->info.output_semantic_name[i];
4098 
4099          sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++];
4100          set_shader_signature_entry(sgnEntry, i,
4101                                     map_tgsi_semantic_to_sgn_name(sem_name),
4102                                     emit->output_usage_mask[i],
4103                                     SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4104                                     SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4105       }
4106    }
4107 }
4108 
4109 
4110 /**
4111  * Emit the declaration for the tess inner/outer output.
4112  * \param opcodeType either VGPU10_OPCODE_DCL_OUTPUT_SIV or _INPUT_SIV
4113  * \param operandType either VGPU10_OPERAND_TYPE_OUTPUT or _INPUT
4114  * \param name VGPU10_NAME_FINAL_*_TESSFACTOR value
4115  */
4116 static void
emit_tesslevel_declaration(struct svga_shader_emitter_v10 * emit,unsigned index,unsigned opcodeType,unsigned operandType,VGPU10_SYSTEM_NAME name,SVGA3dDXSignatureSemanticName sgnName)4117 emit_tesslevel_declaration(struct svga_shader_emitter_v10 *emit,
4118                            unsigned index, unsigned opcodeType,
4119                            unsigned operandType, VGPU10_SYSTEM_NAME name,
4120                            SVGA3dDXSignatureSemanticName sgnName)
4121 {
4122    VGPU10OpcodeToken0 opcode0;
4123    VGPU10OperandToken0 operand0;
4124    VGPU10NameToken name_token;
4125 
4126    assert(emit->version >= 50);
4127    assert(name >= VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR ||
4128           (emit->key.tcs.prim_mode == PIPE_PRIM_LINES &&
4129            name == VGPU10_NAME_UNDEFINED));
4130    assert(name <= VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
4131 
4132    assert(operandType == VGPU10_OPERAND_TYPE_OUTPUT ||
4133           operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT);
4134 
4135    opcode0.value = operand0.value = name_token.value = 0;
4136 
4137    opcode0.opcodeType = opcodeType;
4138    operand0.operandType = operandType;
4139    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
4140    operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
4141    operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
4142    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
4143    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
4144 
4145    name_token.name = name;
4146    emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1);
4147 
4148    /* Capture patch constant signature */
4149    struct svga_shader_signature *sgn = &emit->signature;
4150    SVGA3dDXShaderSignatureEntry *sgnEntry =
4151       &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
4152    set_shader_signature_entry(sgnEntry, index,
4153                               sgnName, VGPU10_OPERAND_4_COMPONENT_MASK_X,
4154                               SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4155                               SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4156 }
4157 
4158 
4159 /**
4160  * Emit output declarations for tessellation control shader.
4161  */
4162 static void
emit_tcs_output_declarations(struct svga_shader_emitter_v10 * emit)4163 emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit)
4164 {
4165    unsigned int i;
4166    unsigned outputIndex = emit->num_outputs;
4167    struct svga_shader_signature *sgn = &emit->signature;
4168 
4169    /**
4170     * Initialize patch_generic_out_count so it won't be counted twice
4171     * since this function is called twice, one for control point phase
4172     * and another time for patch constant phase.
4173     */
4174    emit->tcs.patch_generic_out_count = 0;
4175 
4176    for (i = 0; i < emit->info.num_outputs; i++) {
4177       unsigned index = i;
4178       const enum tgsi_semantic semantic_name =
4179          emit->info.output_semantic_name[i];
4180 
4181       switch (semantic_name) {
4182       case TGSI_SEMANTIC_TESSINNER:
4183          emit->tcs.inner.tgsi_index = i;
4184 
4185          /* skip per-patch output declarations in control point phase */
4186          if (emit->tcs.control_point_phase)
4187             break;
4188 
4189          emit->tcs.inner.out_index = outputIndex;
4190          switch (emit->key.tcs.prim_mode) {
4191          case PIPE_PRIM_QUADS:
4192             emit_tesslevel_declaration(emit, outputIndex++,
4193                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4194                VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4195                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4196 
4197             emit_tesslevel_declaration(emit, outputIndex++,
4198                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4199                VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4200                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4201             break;
4202          case PIPE_PRIM_TRIANGLES:
4203             emit_tesslevel_declaration(emit, outputIndex++,
4204                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4205                VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
4206                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
4207             break;
4208          case PIPE_PRIM_LINES:
4209             break;
4210          default:
4211             debug_printf("Unsupported primitive type");
4212          }
4213          break;
4214 
4215       case TGSI_SEMANTIC_TESSOUTER:
4216          emit->tcs.outer.tgsi_index = i;
4217 
4218          /* skip per-patch output declarations in control point phase */
4219          if (emit->tcs.control_point_phase)
4220             break;
4221 
4222          emit->tcs.outer.out_index = outputIndex;
4223          switch (emit->key.tcs.prim_mode) {
4224          case PIPE_PRIM_QUADS:
4225             for (int j = 0; j < 4; j++) {
4226                emit_tesslevel_declaration(emit, outputIndex++,
4227                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4228                   VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j,
4229                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j);
4230             }
4231             break;
4232          case PIPE_PRIM_TRIANGLES:
4233             for (int j = 0; j < 3; j++) {
4234                emit_tesslevel_declaration(emit, outputIndex++,
4235                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4236                   VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j,
4237                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j);
4238             }
4239             break;
4240          case PIPE_PRIM_LINES:
4241             for (int j = 0; j < 2; j++) {
4242                emit_tesslevel_declaration(emit, outputIndex++,
4243                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4244                   VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j,
4245                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j);
4246             }
4247             break;
4248          default:
4249             debug_printf("Unsupported primitive type");
4250          }
4251          break;
4252 
4253       case TGSI_SEMANTIC_PATCH:
4254          if (emit->tcs.patch_generic_out_index == INVALID_INDEX)
4255             emit->tcs.patch_generic_out_index= i;
4256          emit->tcs.patch_generic_out_count++;
4257 
4258          /* skip per-patch output declarations in control point phase */
4259          if (emit->tcs.control_point_phase)
4260             break;
4261 
4262          emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, index,
4263                                  VGPU10_NAME_UNDEFINED,
4264                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4265                                  FALSE,
4266                                  map_tgsi_semantic_to_sgn_name(semantic_name));
4267 
4268          SVGA3dDXShaderSignatureEntry *sgnEntry =
4269             &sgn->patchConstants[sgn->header.numPatchConstantSignatures++];
4270          set_shader_signature_entry(sgnEntry, index,
4271                                     map_tgsi_semantic_to_sgn_name(semantic_name),
4272                                     VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4273                                     SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN,
4274                                     SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT);
4275 
4276          break;
4277 
4278       default:
4279          /* save the starting index of control point outputs */
4280          if (emit->tcs.control_point_out_index == INVALID_INDEX)
4281             emit->tcs.control_point_out_index = i;
4282          emit->tcs.control_point_out_count++;
4283 
4284          /* skip control point output declarations in patch constant phase */
4285          if (!emit->tcs.control_point_phase)
4286             break;
4287 
4288          emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i],
4289                                         TRUE);
4290 
4291       }
4292    }
4293 
4294    if (emit->tcs.control_point_phase) {
4295       /**
4296        * Add missing control point output in control point phase.
4297        */
4298       if (emit->tcs.control_point_out_index == INVALID_INDEX) {
4299          /* use register index after tessellation factors */
4300          switch (emit->key.tcs.prim_mode) {
4301          case PIPE_PRIM_QUADS:
4302             emit->tcs.control_point_out_index = outputIndex + 6;
4303             break;
4304          case PIPE_PRIM_TRIANGLES:
4305             emit->tcs.control_point_out_index = outputIndex + 4;
4306             break;
4307          default:
4308             emit->tcs.control_point_out_index = outputIndex + 2;
4309             break;
4310          }
4311          emit->tcs.control_point_out_count++;
4312          emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV,
4313                                  emit->tcs.control_point_out_index,
4314                                  VGPU10_NAME_POSITION,
4315                                  VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4316                                  TRUE,
4317                                  SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
4318 
4319          /* If tcs does not output any control point output,
4320           * we can end the hull shader control point phase here
4321           * after emitting the default control point output.
4322           */
4323          emit->skip_instruction = TRUE;
4324       }
4325    }
4326    else {
4327       if (emit->tcs.outer.out_index == INVALID_INDEX) {
4328          /* since the TCS did not declare out outer tess level output register,
4329           * we declare it here for patch constant phase only.
4330           */
4331          emit->tcs.outer.out_index = outputIndex;
4332          if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
4333             for (int i = 0; i < 4; i++) {
4334                emit_tesslevel_declaration(emit, outputIndex++,
4335                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4336                   VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i,
4337                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i);
4338             }
4339          }
4340          else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
4341             for (int i = 0; i < 3; i++) {
4342                emit_tesslevel_declaration(emit, outputIndex++,
4343                   VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4344                   VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i,
4345                   SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i);
4346             }
4347          }
4348       }
4349 
4350       if (emit->tcs.inner.out_index == INVALID_INDEX) {
4351          /* since the TCS did not declare out inner tess level output register,
4352           * we declare it here
4353           */
4354          emit->tcs.inner.out_index = outputIndex;
4355          if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
4356             emit_tesslevel_declaration(emit, outputIndex++,
4357                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4358                VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4359                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4360             emit_tesslevel_declaration(emit, outputIndex++,
4361                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4362                VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4363                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4364          }
4365          else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
4366             emit_tesslevel_declaration(emit, outputIndex++,
4367                VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT,
4368                VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
4369                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
4370          }
4371       }
4372    }
4373    emit->num_outputs = outputIndex;
4374 }
4375 
4376 
4377 /**
4378  * Emit output declarations for tessellation evaluation shader.
4379  */
4380 static void
emit_tes_output_declarations(struct svga_shader_emitter_v10 * emit)4381 emit_tes_output_declarations(struct svga_shader_emitter_v10 *emit)
4382 {
4383    unsigned int i;
4384 
4385    for (i = 0; i < emit->info.num_outputs; i++) {
4386       emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE);
4387    }
4388 }
4389 
4390 
4391 /**
4392  * Emit the declaration for a system value input/output.
4393  */
4394 static void
emit_system_value_declaration(struct svga_shader_emitter_v10 * emit,enum tgsi_semantic semantic_name,unsigned index)4395 emit_system_value_declaration(struct svga_shader_emitter_v10 *emit,
4396                               enum tgsi_semantic semantic_name, unsigned index)
4397 {
4398    switch (semantic_name) {
4399    case TGSI_SEMANTIC_INSTANCEID:
4400       index = alloc_system_value_index(emit, index);
4401       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
4402                              VGPU10_OPERAND_TYPE_INPUT,
4403                              VGPU10_OPERAND_INDEX_1D,
4404                              index, 1,
4405                              VGPU10_NAME_INSTANCE_ID,
4406                              VGPU10_OPERAND_4_COMPONENT,
4407                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4408                              VGPU10_OPERAND_4_COMPONENT_MASK_X,
4409                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4410                              map_tgsi_semantic_to_sgn_name(semantic_name));
4411       break;
4412    case TGSI_SEMANTIC_VERTEXID:
4413       emit->vs.vertex_id_sys_index = index;
4414       index = alloc_system_value_index(emit, index);
4415       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV,
4416                              VGPU10_OPERAND_TYPE_INPUT,
4417                              VGPU10_OPERAND_INDEX_1D,
4418                              index, 1,
4419                              VGPU10_NAME_VERTEX_ID,
4420                              VGPU10_OPERAND_4_COMPONENT,
4421                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4422                              VGPU10_OPERAND_4_COMPONENT_MASK_X,
4423                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4424                              map_tgsi_semantic_to_sgn_name(semantic_name));
4425       break;
4426    case TGSI_SEMANTIC_SAMPLEID:
4427       assert(emit->unit == PIPE_SHADER_FRAGMENT);
4428       emit->fs.sample_id_sys_index = index;
4429       index = alloc_system_value_index(emit, index);
4430       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_PS_SIV,
4431                              VGPU10_OPERAND_TYPE_INPUT,
4432                              VGPU10_OPERAND_INDEX_1D,
4433                              index, 1,
4434                              VGPU10_NAME_SAMPLE_INDEX,
4435                              VGPU10_OPERAND_4_COMPONENT,
4436                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4437                              VGPU10_OPERAND_4_COMPONENT_MASK_X,
4438                              VGPU10_INTERPOLATION_CONSTANT, TRUE,
4439                              map_tgsi_semantic_to_sgn_name(semantic_name));
4440       break;
4441    case TGSI_SEMANTIC_SAMPLEPOS:
4442       /* This system value contains the position of the current sample
4443        * when using per-sample shading.  We implement this by calling
4444        * the VGPU10_OPCODE_SAMPLE_POS instruction with the current sample
4445        * index as the argument.  See emit_sample_position_instructions().
4446        */
4447       assert(emit->version >= 41);
4448       emit->fs.sample_pos_sys_index = index;
4449       index = alloc_system_value_index(emit, index);
4450       break;
4451    case TGSI_SEMANTIC_INVOCATIONID:
4452       /* Note: invocation id input is mapped to different register depending
4453        * on the shader type. In GS, it will be mapped to vGSInstanceID#.
4454        * In TCS, it will be mapped to vOutputControlPointID#.
4455        * Since in both cases, the mapped name is unique rather than
4456        * just a generic input name ("v#"), so there is no need to remap
4457        * the index value.
4458        */
4459       assert(emit->unit == PIPE_SHADER_GEOMETRY ||
4460              emit->unit == PIPE_SHADER_TESS_CTRL);
4461       assert(emit->version >= 50);
4462 
4463       if (emit->unit == PIPE_SHADER_GEOMETRY) {
4464          emit->gs.invocation_id_sys_index = index;
4465          emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4466                                 VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID,
4467                                 VGPU10_OPERAND_INDEX_0D,
4468                                 index, 1,
4469                                 VGPU10_NAME_UNDEFINED,
4470                                 VGPU10_OPERAND_0_COMPONENT,
4471                                 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4472                                 0,
4473                                 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4474                                 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4475       } else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4476          /* The emission of the control point id will be done
4477           * in the control point phase in emit_hull_shader_control_point_phase().
4478           */
4479          emit->tcs.invocation_id_sys_index = index;
4480       }
4481       break;
4482    case TGSI_SEMANTIC_SAMPLEMASK:
4483       /* Note: the PS sample mask input has a unique name ("vCoverage#")
4484        * rather than just a generic input name ("v#") so no need to remap the
4485        * index value.
4486        */
4487       assert(emit->unit == PIPE_SHADER_FRAGMENT);
4488       assert(emit->version >= 50);
4489       emit->fs.sample_mask_in_sys_index = index;
4490       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4491                              VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK,
4492                              VGPU10_OPERAND_INDEX_0D,
4493                              index, 1,
4494                              VGPU10_NAME_UNDEFINED,
4495                              VGPU10_OPERAND_1_COMPONENT,
4496                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4497                              0,
4498                              VGPU10_INTERPOLATION_CONSTANT, TRUE,
4499                              SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4500       break;
4501    case TGSI_SEMANTIC_TESSCOORD:
4502       assert(emit->version >= 50);
4503 
4504       unsigned usageMask = 0;
4505 
4506       if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) {
4507          usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XYZ;
4508       }
4509       else if (emit->tes.prim_mode == PIPE_PRIM_LINES ||
4510                emit->tes.prim_mode == PIPE_PRIM_QUADS) {
4511          usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XY;
4512       }
4513 
4514       emit->tes.tesscoord_sys_index = index;
4515       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4516                              VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT,
4517                              VGPU10_OPERAND_INDEX_0D,
4518                              index, 1,
4519                              VGPU10_NAME_UNDEFINED,
4520                              VGPU10_OPERAND_4_COMPONENT,
4521                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4522                              usageMask,
4523                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4524                              SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4525       break;
4526    case TGSI_SEMANTIC_TESSINNER:
4527       assert(emit->version >= 50);
4528       emit->tes.inner.tgsi_index = index;
4529       break;
4530    case TGSI_SEMANTIC_TESSOUTER:
4531       assert(emit->version >= 50);
4532       emit->tes.outer.tgsi_index = index;
4533       break;
4534    case TGSI_SEMANTIC_VERTICESIN:
4535       assert(emit->unit == PIPE_SHADER_TESS_CTRL);
4536       assert(emit->version >= 50);
4537 
4538       /* save the system value index */
4539       emit->tcs.vertices_per_patch_index = index;
4540       break;
4541    case TGSI_SEMANTIC_PRIMID:
4542       assert(emit->version >= 50);
4543       if (emit->unit == PIPE_SHADER_TESS_CTRL) {
4544          emit->tcs.prim_id_index = index;
4545       }
4546       else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
4547          emit->tes.prim_id_index = index;
4548          emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4549                                 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID,
4550                                 VGPU10_OPERAND_INDEX_0D,
4551                                 index, 1,
4552                                 VGPU10_NAME_UNDEFINED,
4553                                 VGPU10_OPERAND_0_COMPONENT,
4554                                 VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4555                                 0,
4556                                 VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4557                                 map_tgsi_semantic_to_sgn_name(semantic_name));
4558       }
4559       break;
4560    default:
4561       debug_printf("unexpected system value semantic index %u / %s\n",
4562                    semantic_name, tgsi_semantic_names[semantic_name]);
4563    }
4564 }
4565 
4566 /**
4567  * Translate a TGSI declaration to VGPU10.
4568  */
4569 static boolean
emit_vgpu10_declaration(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_declaration * decl)4570 emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit,
4571                         const struct tgsi_full_declaration *decl)
4572 {
4573    switch (decl->Declaration.File) {
4574    case TGSI_FILE_INPUT:
4575       /* do nothing - see emit_input_declarations() */
4576       return TRUE;
4577 
4578    case TGSI_FILE_OUTPUT:
4579       assert(decl->Range.First == decl->Range.Last);
4580       emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask;
4581       return TRUE;
4582 
4583    case TGSI_FILE_TEMPORARY:
4584       /* Don't declare the temps here.  Just keep track of how many
4585        * and emit the declaration later.
4586        */
4587       if (decl->Declaration.Array) {
4588          /* Indexed temporary array.  Save the start index of the array
4589           * and the size of the array.
4590           */
4591          const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS);
4592          assert(arrayID < ARRAY_SIZE(emit->temp_arrays));
4593 
4594          /* Save this array so we can emit the declaration for it later */
4595          create_temp_array(emit, arrayID, decl->Range.First,
4596                            decl->Range.Last - decl->Range.First + 1,
4597                            decl->Range.First);
4598       }
4599 
4600       /* for all temps, indexed or not, keep track of highest index */
4601       emit->num_shader_temps = MAX2(emit->num_shader_temps,
4602                                     decl->Range.Last + 1);
4603       return TRUE;
4604 
4605    case TGSI_FILE_CONSTANT:
4606       /* Don't declare constants here.  Just keep track and emit later. */
4607       {
4608          unsigned constbuf = 0, num_consts;
4609          if (decl->Declaration.Dimension) {
4610             constbuf = decl->Dim.Index2D;
4611          }
4612          /* We throw an assertion here when, in fact, the shader should never
4613           * have linked due to constbuf index out of bounds, so we shouldn't
4614           * have reached here.
4615           */
4616          assert(constbuf < ARRAY_SIZE(emit->num_shader_consts));
4617 
4618          num_consts = MAX2(emit->num_shader_consts[constbuf],
4619                            decl->Range.Last + 1);
4620 
4621          if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) {
4622             debug_printf("Warning: constant buffer is declared to size [%u]"
4623                          " but [%u] is the limit.\n",
4624                          num_consts,
4625                          VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
4626          }
4627          /* The linker doesn't enforce the max UBO size so we clamp here */
4628          emit->num_shader_consts[constbuf] =
4629             MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT);
4630       }
4631       return TRUE;
4632 
4633    case TGSI_FILE_IMMEDIATE:
4634       assert(!"TGSI_FILE_IMMEDIATE not handled yet!");
4635       return FALSE;
4636 
4637    case TGSI_FILE_SYSTEM_VALUE:
4638       emit_system_value_declaration(emit, decl->Semantic.Name,
4639                                     decl->Range.First);
4640       return TRUE;
4641 
4642    case TGSI_FILE_SAMPLER:
4643       /* Don't declare samplers here.  Just keep track and emit later. */
4644       emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1);
4645       return TRUE;
4646 
4647 #if 0
4648    case TGSI_FILE_RESOURCE:
4649       /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/
4650       /* XXX more, VGPU10_RETURN_TYPE_FLOAT */
4651       assert(!"TGSI_FILE_RESOURCE not handled yet");
4652       return FALSE;
4653 #endif
4654 
4655    case TGSI_FILE_ADDRESS:
4656       emit->num_address_regs = MAX2(emit->num_address_regs,
4657                                     decl->Range.Last + 1);
4658       return TRUE;
4659 
4660    case TGSI_FILE_SAMPLER_VIEW:
4661       {
4662          unsigned unit = decl->Range.First;
4663          assert(decl->Range.First == decl->Range.Last);
4664          emit->sampler_target[unit] = decl->SamplerView.Resource;
4665          /* Note: we can ignore YZW return types for now */
4666          emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX;
4667          emit->sampler_view[unit] = TRUE;
4668       }
4669       return TRUE;
4670 
4671    default:
4672       assert(!"Unexpected type of declaration");
4673       return FALSE;
4674    }
4675 }
4676 
4677 
4678 
4679 /**
4680  * Emit input declarations for fragment shader.
4681  */
4682 static void
emit_fs_input_declarations(struct svga_shader_emitter_v10 * emit)4683 emit_fs_input_declarations(struct svga_shader_emitter_v10 *emit)
4684 {
4685    unsigned i;
4686 
4687    for (i = 0; i < emit->linkage.num_inputs; i++) {
4688       enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
4689       unsigned usage_mask = emit->info.input_usage_mask[i];
4690       unsigned index = emit->linkage.input_map[i];
4691       unsigned type, interpolationMode, name;
4692       unsigned mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
4693 
4694       if (usage_mask == 0)
4695          continue;  /* register is not actually used */
4696 
4697       if (semantic_name == TGSI_SEMANTIC_POSITION) {
4698          /* fragment position input */
4699          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4700          interpolationMode = VGPU10_INTERPOLATION_LINEAR;
4701          name = VGPU10_NAME_POSITION;
4702          if (usage_mask & TGSI_WRITEMASK_W) {
4703             /* we need to replace use of 'w' with '1/w' */
4704             emit->fs.fragcoord_input_index = i;
4705          }
4706       }
4707       else if (semantic_name == TGSI_SEMANTIC_FACE) {
4708          /* fragment front-facing input */
4709          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4710          interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4711          name = VGPU10_NAME_IS_FRONT_FACE;
4712          emit->fs.face_input_index = i;
4713       }
4714       else if (semantic_name == TGSI_SEMANTIC_PRIMID) {
4715          /* primitive ID */
4716          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4717          interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4718          name = VGPU10_NAME_PRIMITIVE_ID;
4719       }
4720       else if (semantic_name == TGSI_SEMANTIC_SAMPLEID) {
4721          /* sample index / ID */
4722          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4723          interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4724          name = VGPU10_NAME_SAMPLE_INDEX;
4725       }
4726       else if (semantic_name == TGSI_SEMANTIC_LAYER) {
4727          /* render target array index */
4728          if (emit->key.fs.layer_to_zero) {
4729             /**
4730              * The shader from the previous stage does not write to layer,
4731              * so reading the layer index in fragment shader should return 0.
4732              */
4733             emit->fs.layer_input_index = i;
4734             continue;
4735          } else {
4736             type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4737             interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4738             name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX;
4739             mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
4740          }
4741       }
4742       else if (semantic_name == TGSI_SEMANTIC_VIEWPORT_INDEX) {
4743          /* viewport index */
4744          type = VGPU10_OPCODE_DCL_INPUT_PS_SGV;
4745          interpolationMode = VGPU10_INTERPOLATION_CONSTANT;
4746          name = VGPU10_NAME_VIEWPORT_ARRAY_INDEX;
4747          mask = VGPU10_OPERAND_4_COMPONENT_MASK_X;
4748       }
4749       else {
4750          /* general fragment input */
4751          type = VGPU10_OPCODE_DCL_INPUT_PS;
4752          interpolationMode =
4753                translate_interpolation(emit,
4754                                        emit->info.input_interpolate[i],
4755                                        emit->info.input_interpolate_loc[i]);
4756 
4757          /* keeps track if flat interpolation mode is being used */
4758          emit->uses_flat_interp = emit->uses_flat_interp ||
4759                (interpolationMode == VGPU10_INTERPOLATION_CONSTANT);
4760 
4761          name = VGPU10_NAME_UNDEFINED;
4762       }
4763 
4764       emit_input_declaration(emit, type,
4765                              VGPU10_OPERAND_TYPE_INPUT,
4766                              VGPU10_OPERAND_INDEX_1D, index, 1,
4767                              name,
4768                              VGPU10_OPERAND_4_COMPONENT,
4769                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4770                              mask,
4771                              interpolationMode, TRUE,
4772                              map_tgsi_semantic_to_sgn_name(semantic_name));
4773    }
4774 }
4775 
4776 
4777 /**
4778  * Emit input declarations for vertex shader.
4779  */
4780 static void
emit_vs_input_declarations(struct svga_shader_emitter_v10 * emit)4781 emit_vs_input_declarations(struct svga_shader_emitter_v10 *emit)
4782 {
4783    unsigned i;
4784 
4785    for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) {
4786       unsigned usage_mask = emit->info.input_usage_mask[i];
4787       unsigned index = i;
4788 
4789       if (usage_mask == 0)
4790          continue;  /* register is not actually used */
4791 
4792       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4793                              VGPU10_OPERAND_TYPE_INPUT,
4794                              VGPU10_OPERAND_INDEX_1D, index, 1,
4795                              VGPU10_NAME_UNDEFINED,
4796                              VGPU10_OPERAND_4_COMPONENT,
4797                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4798                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4799                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4800                              SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
4801    }
4802 }
4803 
4804 
4805 /**
4806  * Emit input declarations for geometry shader.
4807  */
4808 static void
emit_gs_input_declarations(struct svga_shader_emitter_v10 * emit)4809 emit_gs_input_declarations(struct svga_shader_emitter_v10 *emit)
4810 {
4811    unsigned i;
4812 
4813    for (i = 0; i < emit->info.num_inputs; i++) {
4814       enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
4815       unsigned usage_mask = emit->info.input_usage_mask[i];
4816       unsigned index = emit->linkage.input_map[i];
4817       unsigned opcodeType, operandType;
4818       unsigned numComp, selMode;
4819       unsigned name;
4820       unsigned dim;
4821 
4822       if (usage_mask == 0)
4823          continue;  /* register is not actually used */
4824 
4825       opcodeType = VGPU10_OPCODE_DCL_INPUT;
4826       operandType = VGPU10_OPERAND_TYPE_INPUT;
4827       numComp = VGPU10_OPERAND_4_COMPONENT;
4828       selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE;
4829       name = VGPU10_NAME_UNDEFINED;
4830 
4831       /* all geometry shader inputs are two dimensional except
4832        * gl_PrimitiveID
4833        */
4834       dim = VGPU10_OPERAND_INDEX_2D;
4835 
4836       if (semantic_name == TGSI_SEMANTIC_PRIMID) {
4837          /* Primitive ID */
4838          operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID;
4839          dim = VGPU10_OPERAND_INDEX_0D;
4840          numComp = VGPU10_OPERAND_0_COMPONENT;
4841          selMode = 0;
4842 
4843          /* also save the register index so we can check for
4844           * primitive id when emit src register. We need to modify the
4845           * operand type, index dimension when emit primitive id src reg.
4846           */
4847           emit->gs.prim_id_index = i;
4848       }
4849       else if (semantic_name == TGSI_SEMANTIC_POSITION) {
4850          /* vertex position input */
4851          opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV;
4852          name = VGPU10_NAME_POSITION;
4853       }
4854 
4855       emit_input_declaration(emit, opcodeType, operandType,
4856                              dim, index,
4857                              emit->gs.input_size,
4858                              name,
4859                              numComp, selMode,
4860                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4861                              VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4862                              map_tgsi_semantic_to_sgn_name(semantic_name));
4863    }
4864 }
4865 
4866 
4867 /**
4868  * Emit input declarations for tessellation control shader.
4869  */
4870 static void
emit_tcs_input_declarations(struct svga_shader_emitter_v10 * emit)4871 emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit)
4872 {
4873    unsigned i;
4874    unsigned size = emit->key.tcs.vertices_per_patch;
4875    unsigned indicesMask = 0;
4876    boolean addSignature = TRUE;
4877 
4878    if (!emit->tcs.control_point_phase)
4879       addSignature = emit->tcs.fork_phase_add_signature;
4880 
4881    for (i = 0; i < emit->info.num_inputs; i++) {
4882       unsigned usage_mask = emit->info.input_usage_mask[i];
4883       unsigned index = emit->linkage.input_map[i];
4884       enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i];
4885       VGPU10_SYSTEM_NAME name = VGPU10_NAME_UNDEFINED;
4886       VGPU10_OPERAND_TYPE operandType = VGPU10_OPERAND_TYPE_INPUT;
4887       SVGA3dDXSignatureSemanticName sgn_name =
4888          map_tgsi_semantic_to_sgn_name(semantic_name);
4889 
4890       /* indices that are declared */
4891       indicesMask |= 1 << index;
4892 
4893       if (semantic_name == TGSI_SEMANTIC_POSITION ||
4894           index == emit->linkage.position_index) {
4895          /* save the input control point index for later use */
4896          emit->tcs.control_point_input_index = i;
4897       }
4898       else if (usage_mask == 0) {
4899          continue;  /* register is not actually used */
4900       }
4901       else if (semantic_name == TGSI_SEMANTIC_CLIPDIST) {
4902          /* The shadow copy is being used here. So set the signature name
4903           * to UNDEFINED.
4904           */
4905          sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
4906       }
4907 
4908       /* input control points in the patch constant phase are emitted in the
4909        * vicp register rather than the v register.
4910        */
4911       if (!emit->tcs.control_point_phase) {
4912          operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
4913       }
4914 
4915       /* Tessellation control shader inputs are two dimensional.
4916        * The array size is determined by the patch vertex count.
4917        */
4918       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4919                              operandType,
4920                              VGPU10_OPERAND_INDEX_2D,
4921                              index, size, name,
4922                              VGPU10_OPERAND_4_COMPONENT,
4923                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4924                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4925                              VGPU10_INTERPOLATION_UNDEFINED,
4926                              addSignature, sgn_name);
4927    }
4928 
4929    if (emit->tcs.control_point_phase) {
4930       if (emit->tcs.control_point_input_index == INVALID_INDEX) {
4931 
4932          /* Add input control point declaration if it does not exist */
4933          if ((indicesMask & (1 << emit->linkage.position_index)) == 0) {
4934             emit->linkage.input_map[emit->linkage.num_inputs] =
4935                emit->linkage.position_index;
4936             emit->tcs.control_point_input_index = emit->linkage.num_inputs++;
4937 
4938             emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
4939                                    VGPU10_OPERAND_TYPE_INPUT,
4940                                    VGPU10_OPERAND_INDEX_2D,
4941                                    emit->linkage.position_index,
4942                                    emit->key.tcs.vertices_per_patch,
4943                                    VGPU10_NAME_UNDEFINED,
4944                                    VGPU10_OPERAND_4_COMPONENT,
4945                                    VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
4946                                    VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
4947                                    VGPU10_INTERPOLATION_UNDEFINED, TRUE,
4948                                    SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
4949          }
4950       }
4951 
4952       /* Also add an address register for the indirection to the
4953        * input control points
4954        */
4955       emit->tcs.control_point_addr_index = emit->num_address_regs++;
4956    }
4957 }
4958 
4959 
4960 static void
emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 * emit)4961 emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 *emit)
4962 {
4963 
4964    /* In tcs, tess factors are emitted as extra outputs.
4965     * The starting register index for the tess factors is captured
4966     * in the compile key.
4967     */
4968    unsigned inputIndex = emit->key.tes.tessfactor_index;
4969 
4970    if (emit->tes.prim_mode == PIPE_PRIM_QUADS) {
4971       if (emit->key.tes.need_tessouter) {
4972          emit->tes.outer.in_index = inputIndex;
4973          for (int i = 0; i < 4; i++) {
4974             emit_tesslevel_declaration(emit, inputIndex++,
4975                VGPU10_OPCODE_DCL_INPUT_SIV,
4976                VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
4977                VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i,
4978                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i);
4979          }
4980       }
4981 
4982       if (emit->key.tes.need_tessinner) {
4983          emit->tes.inner.in_index = inputIndex;
4984          emit_tesslevel_declaration(emit, inputIndex++,
4985             VGPU10_OPCODE_DCL_INPUT_SIV,
4986             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
4987             VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR,
4988             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR);
4989 
4990          emit_tesslevel_declaration(emit, inputIndex++,
4991             VGPU10_OPCODE_DCL_INPUT_SIV,
4992             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
4993             VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR,
4994             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR);
4995       }
4996    }
4997    else if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) {
4998       if (emit->key.tes.need_tessouter) {
4999          emit->tes.outer.in_index = inputIndex;
5000          for (int i = 0; i < 3; i++) {
5001             emit_tesslevel_declaration(emit, inputIndex++,
5002                VGPU10_OPCODE_DCL_INPUT_SIV,
5003                VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5004                VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i,
5005                SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i);
5006          }
5007       }
5008 
5009       if (emit->key.tes.need_tessinner) {
5010          emit->tes.inner.in_index = inputIndex;
5011          emit_tesslevel_declaration(emit, inputIndex++,
5012             VGPU10_OPCODE_DCL_INPUT_SIV,
5013             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5014             VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR,
5015             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR);
5016       }
5017    }
5018    else if (emit->tes.prim_mode == PIPE_PRIM_LINES) {
5019       if (emit->key.tes.need_tessouter) {
5020          emit->tes.outer.in_index = inputIndex;
5021          emit_tesslevel_declaration(emit, inputIndex++,
5022             VGPU10_OPCODE_DCL_INPUT_SIV,
5023             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5024             VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR,
5025             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR);
5026 
5027          emit_tesslevel_declaration(emit, inputIndex++,
5028             VGPU10_OPCODE_DCL_INPUT_SIV,
5029             VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5030             VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR,
5031             SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR);
5032       }
5033    }
5034 }
5035 
5036 
5037 /**
5038  * Emit input declarations for tessellation evaluation shader.
5039  */
5040 static void
emit_tes_input_declarations(struct svga_shader_emitter_v10 * emit)5041 emit_tes_input_declarations(struct svga_shader_emitter_v10 *emit)
5042 {
5043    unsigned i;
5044 
5045    for (i = 0; i < emit->info.num_inputs; i++) {
5046       unsigned usage_mask = emit->info.input_usage_mask[i];
5047       unsigned index = emit->linkage.input_map[i];
5048       unsigned size;
5049       const enum tgsi_semantic semantic_name =
5050          emit->info.input_semantic_name[i];
5051       SVGA3dDXSignatureSemanticName sgn_name;
5052       VGPU10_OPERAND_TYPE operandType;
5053       VGPU10_OPERAND_INDEX_DIMENSION dim;
5054 
5055       if (usage_mask == 0)
5056          usage_mask = 1;  /* at least set usage mask to one */
5057 
5058       if (semantic_name == TGSI_SEMANTIC_PATCH) {
5059          operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT;
5060          dim = VGPU10_OPERAND_INDEX_1D;
5061          size = 1;
5062          sgn_name = map_tgsi_semantic_to_sgn_name(semantic_name);
5063       }
5064       else {
5065          operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT;
5066          dim = VGPU10_OPERAND_INDEX_2D;
5067          size = emit->key.tes.vertices_per_patch;
5068          sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED;
5069       }
5070 
5071       emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, operandType,
5072                              dim, index, size, VGPU10_NAME_UNDEFINED,
5073                              VGPU10_OPERAND_4_COMPONENT,
5074                              VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5075                              VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5076                              VGPU10_INTERPOLATION_UNDEFINED,
5077                              TRUE, sgn_name);
5078    }
5079 
5080    emit_tessfactor_input_declarations(emit);
5081 
5082    /* DX spec requires DS input controlpoint/patch-constant signatures to match
5083     * the HS output controlpoint/patch-constant signatures exactly.
5084     * Add missing input declarations even if they are not used in the shader.
5085     */
5086    if (emit->linkage.num_inputs < emit->linkage.prevShader.num_outputs) {
5087       struct tgsi_shader_info *prevInfo = emit->prevShaderInfo;
5088       for (i = 0; i < emit->linkage.prevShader.num_outputs; i++) {
5089 
5090           /* If a tcs output does not have a corresponding input register in
5091            * tes, add one.
5092            */
5093           if (emit->linkage.prevShader.output_map[i] >
5094               emit->linkage.input_map_max) {
5095              const enum tgsi_semantic sem_name = prevInfo->output_semantic_name[i];
5096 
5097              if (sem_name == TGSI_SEMANTIC_PATCH) {
5098                 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5099                                        VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT,
5100                                        VGPU10_OPERAND_INDEX_1D,
5101                                        i, 1, VGPU10_NAME_UNDEFINED,
5102                                        VGPU10_OPERAND_4_COMPONENT,
5103                                        VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5104                                        VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5105                                        VGPU10_INTERPOLATION_UNDEFINED,
5106                                        TRUE,
5107                                        map_tgsi_semantic_to_sgn_name(sem_name));
5108 
5109              } else if (sem_name != TGSI_SEMANTIC_TESSINNER &&
5110                         sem_name != TGSI_SEMANTIC_TESSOUTER) {
5111                 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT,
5112                                        VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT,
5113                                        VGPU10_OPERAND_INDEX_2D,
5114                                        i, emit->key.tes.vertices_per_patch,
5115                                        VGPU10_NAME_UNDEFINED,
5116                                        VGPU10_OPERAND_4_COMPONENT,
5117                                        VGPU10_OPERAND_4_COMPONENT_MASK_MODE,
5118                                        VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5119                                        VGPU10_INTERPOLATION_UNDEFINED,
5120                                        TRUE,
5121                                        map_tgsi_semantic_to_sgn_name(sem_name));
5122              }
5123              /* tessellation factors are taken care of in
5124               * emit_tessfactor_input_declarations().
5125               */
5126          }
5127       }
5128    }
5129 }
5130 
5131 
5132 /**
5133  * Emit all input declarations.
5134  */
5135 static boolean
emit_input_declarations(struct svga_shader_emitter_v10 * emit)5136 emit_input_declarations(struct svga_shader_emitter_v10 *emit)
5137 {
5138    emit->index_range.required =
5139       emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE;
5140 
5141    switch (emit->unit) {
5142    case PIPE_SHADER_FRAGMENT:
5143       emit_fs_input_declarations(emit);
5144       break;
5145    case PIPE_SHADER_GEOMETRY:
5146       emit_gs_input_declarations(emit);
5147       break;
5148    case PIPE_SHADER_VERTEX:
5149       emit_vs_input_declarations(emit);
5150       break;
5151    case PIPE_SHADER_TESS_CTRL:
5152       emit_tcs_input_declarations(emit);
5153       break;
5154    case PIPE_SHADER_TESS_EVAL:
5155       emit_tes_input_declarations(emit);
5156       break;
5157    case PIPE_SHADER_COMPUTE:
5158       //XXX emit_cs_input_declarations(emit);
5159       break;
5160    default:
5161       assert(0);
5162    }
5163 
5164    if (emit->index_range.start_index != INVALID_INDEX) {
5165       emit_index_range_declaration(emit);
5166    }
5167    emit->index_range.required = FALSE;
5168    return TRUE;
5169 }
5170 
5171 
5172 /**
5173  * Emit all output declarations.
5174  */
5175 static boolean
emit_output_declarations(struct svga_shader_emitter_v10 * emit)5176 emit_output_declarations(struct svga_shader_emitter_v10 *emit)
5177 {
5178    emit->index_range.required =
5179       emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE;
5180 
5181    switch (emit->unit) {
5182    case PIPE_SHADER_FRAGMENT:
5183       emit_fs_output_declarations(emit);
5184       break;
5185    case PIPE_SHADER_GEOMETRY:
5186       emit_gs_output_declarations(emit);
5187       break;
5188    case PIPE_SHADER_VERTEX:
5189       emit_vs_output_declarations(emit);
5190       break;
5191    case PIPE_SHADER_TESS_CTRL:
5192       emit_tcs_output_declarations(emit);
5193       break;
5194    case PIPE_SHADER_TESS_EVAL:
5195       emit_tes_output_declarations(emit);
5196       break;
5197    case PIPE_SHADER_COMPUTE:
5198       //XXX emit_cs_output_declarations(emit);
5199       break;
5200    default:
5201       assert(0);
5202    }
5203 
5204    if (emit->vposition.so_index != INVALID_INDEX &&
5205        emit->vposition.out_index != INVALID_INDEX) {
5206 
5207       assert(emit->unit != PIPE_SHADER_FRAGMENT);
5208 
5209       /* Emit the declaration for the non-adjusted vertex position
5210        * for stream output purpose
5211        */
5212       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5213                               emit->vposition.so_index,
5214                               VGPU10_NAME_UNDEFINED,
5215                               VGPU10_OPERAND_4_COMPONENT_MASK_ALL,
5216                               TRUE,
5217                               SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION);
5218    }
5219 
5220    if (emit->clip_dist_so_index != INVALID_INDEX &&
5221        emit->clip_dist_out_index != INVALID_INDEX) {
5222 
5223       assert(emit->unit != PIPE_SHADER_FRAGMENT);
5224 
5225       /* Emit the declaration for the clip distance shadow copy which
5226        * will be used for stream output purpose and for clip distance
5227        * varying variable
5228        */
5229       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5230                               emit->clip_dist_so_index,
5231                               VGPU10_NAME_UNDEFINED,
5232                               emit->output_usage_mask[emit->clip_dist_out_index],
5233                               TRUE,
5234                               SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5235 
5236       if (emit->info.num_written_clipdistance > 4) {
5237          /* for the second clip distance register, each handles 4 planes */
5238          emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT,
5239                                  emit->clip_dist_so_index + 1,
5240                                  VGPU10_NAME_UNDEFINED,
5241                                  emit->output_usage_mask[emit->clip_dist_out_index+1],
5242                                  TRUE,
5243                                  SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED);
5244       }
5245    }
5246 
5247    if (emit->index_range.start_index != INVALID_INDEX) {
5248       emit_index_range_declaration(emit);
5249    }
5250    emit->index_range.required = FALSE;
5251    return TRUE;
5252 }
5253 
5254 
5255 /**
5256  * A helper function to create a temporary indexable array
5257  * and initialize the corresponding entries in the temp_map array.
5258  */
5259 static void
create_temp_array(struct svga_shader_emitter_v10 * emit,unsigned arrayID,unsigned first,unsigned count,unsigned startIndex)5260 create_temp_array(struct svga_shader_emitter_v10 *emit,
5261                   unsigned arrayID, unsigned first, unsigned count,
5262                   unsigned startIndex)
5263 {
5264    unsigned i, tempIndex = startIndex;
5265 
5266    emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1);
5267    assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS);
5268    emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS);
5269 
5270    emit->temp_arrays[arrayID].start = first;
5271    emit->temp_arrays[arrayID].size = count;
5272 
5273    /* Fill in the temp_map entries for this temp array */
5274    for (i = 0; i < count; i++, tempIndex++) {
5275       emit->temp_map[tempIndex].arrayId = arrayID;
5276       emit->temp_map[tempIndex].index = i;
5277    }
5278 }
5279 
5280 
5281 /**
5282  * Emit the declaration for the temporary registers.
5283  */
5284 static boolean
emit_temporaries_declaration(struct svga_shader_emitter_v10 * emit)5285 emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit)
5286 {
5287    unsigned total_temps, reg, i;
5288 
5289    total_temps = emit->num_shader_temps;
5290 
5291    /* If there is indirect access to non-indexable temps in the shader,
5292     * convert those temps to indexable temps. This works around a bug
5293     * in the GLSL->TGSI translator exposed in piglit test
5294     * glsl-1.20/execution/fs-const-array-of-struct-of-array.shader_test.
5295     * Internal temps added by the driver remain as non-indexable temps.
5296     */
5297    if ((emit->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) &&
5298        emit->num_temp_arrays == 0) {
5299       create_temp_array(emit, 1, 0, total_temps, 0);
5300    }
5301 
5302    /* Allocate extra temps for specially-implemented instructions,
5303     * such as LIT.
5304     */
5305    total_temps += MAX_INTERNAL_TEMPS;
5306 
5307    /* Allocate extra temps for clip distance or clip vertex.
5308     */
5309    if (emit->clip_mode == CLIP_DISTANCE) {
5310       /* We need to write the clip distance to a temporary register
5311        * first. Then it will be copied to the shadow copy for
5312        * the clip distance varying variable and stream output purpose.
5313        * It will also be copied to the actual CLIPDIST register
5314        * according to the enabled clip planes
5315        */
5316       emit->clip_dist_tmp_index = total_temps++;
5317       if (emit->info.num_written_clipdistance > 4)
5318          total_temps++; /* second clip register */
5319    }
5320    else if (emit->clip_mode == CLIP_VERTEX && emit->key.last_vertex_stage) {
5321       /* If the current shader is in the last vertex processing stage,
5322        * We need to convert the TGSI CLIPVERTEX output to one or more
5323        * clip distances.  Allocate a temp reg for the clipvertex here.
5324        */
5325       assert(emit->info.writes_clipvertex > 0);
5326       emit->clip_vertex_tmp_index = total_temps;
5327       total_temps++;
5328    }
5329 
5330    if (emit->info.uses_vertexid) {
5331       assert(emit->unit == PIPE_SHADER_VERTEX);
5332       emit->vs.vertex_id_tmp_index = total_temps++;
5333    }
5334 
5335    if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) {
5336       if (emit->vposition.need_prescale || emit->key.vs.undo_viewport ||
5337           emit->key.clip_plane_enable ||
5338           emit->vposition.so_index != INVALID_INDEX) {
5339          emit->vposition.tmp_index = total_temps;
5340          total_temps += 1;
5341       }
5342 
5343       if (emit->vposition.need_prescale) {
5344          emit->vposition.prescale_scale_index = total_temps++;
5345          emit->vposition.prescale_trans_index = total_temps++;
5346       }
5347 
5348       if (emit->unit == PIPE_SHADER_VERTEX) {
5349          unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 |
5350                                  emit->key.vs.adjust_attrib_itof |
5351                                  emit->key.vs.adjust_attrib_utof |
5352                                  emit->key.vs.attrib_is_bgra |
5353                                  emit->key.vs.attrib_puint_to_snorm |
5354                                  emit->key.vs.attrib_puint_to_uscaled |
5355                                  emit->key.vs.attrib_puint_to_sscaled);
5356          while (attrib_mask) {
5357             unsigned index = u_bit_scan(&attrib_mask);
5358             emit->vs.adjusted_input[index] = total_temps++;
5359          }
5360       }
5361       else if (emit->unit == PIPE_SHADER_GEOMETRY) {
5362          if (emit->key.gs.writes_viewport_index)
5363             emit->gs.viewport_index_tmp_index = total_temps++;
5364       }
5365    }
5366    else if (emit->unit == PIPE_SHADER_FRAGMENT) {
5367       if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS ||
5368           emit->key.fs.write_color0_to_n_cbufs > 1) {
5369          /* Allocate a temp to hold the output color */
5370          emit->fs.color_tmp_index = total_temps;
5371          total_temps += 1;
5372       }
5373 
5374       if (emit->fs.face_input_index != INVALID_INDEX) {
5375          /* Allocate a temp for the +/-1 face register */
5376          emit->fs.face_tmp_index = total_temps;
5377          total_temps += 1;
5378       }
5379 
5380       if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
5381          /* Allocate a temp for modified fragment position register */
5382          emit->fs.fragcoord_tmp_index = total_temps;
5383          total_temps += 1;
5384       }
5385 
5386       if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
5387          /* Allocate a temp for the sample position */
5388          emit->fs.sample_pos_tmp_index = total_temps++;
5389       }
5390    }
5391    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
5392       if (emit->vposition.need_prescale) {
5393          emit->vposition.tmp_index = total_temps++;
5394          emit->vposition.prescale_scale_index = total_temps++;
5395          emit->vposition.prescale_trans_index = total_temps++;
5396       }
5397 
5398       if (emit->tes.inner.tgsi_index) {
5399          emit->tes.inner.temp_index = total_temps;
5400          total_temps += 1;
5401       }
5402 
5403       if (emit->tes.outer.tgsi_index) {
5404          emit->tes.outer.temp_index = total_temps;
5405          total_temps += 1;
5406       }
5407    }
5408    else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
5409       if (emit->tcs.inner.tgsi_index != INVALID_INDEX) {
5410          if (!emit->tcs.control_point_phase) {
5411             emit->tcs.inner.temp_index = total_temps;
5412             total_temps += 1;
5413          }
5414       }
5415       if (emit->tcs.outer.tgsi_index != INVALID_INDEX) {
5416          if (!emit->tcs.control_point_phase) {
5417             emit->tcs.outer.temp_index = total_temps;
5418             total_temps += 1;
5419          }
5420       }
5421 
5422       if (emit->tcs.control_point_phase &&
5423           emit->info.reads_pervertex_outputs) {
5424          emit->tcs.control_point_tmp_index = total_temps;
5425          total_temps += emit->tcs.control_point_out_count;
5426       }
5427       else if (!emit->tcs.control_point_phase &&
5428                emit->info.reads_perpatch_outputs) {
5429 
5430          /* If there is indirect access to the patch constant outputs
5431           * in the control point phase, then an indexable temporary array
5432           * will be created for these patch constant outputs.
5433           * Note, indirect access can only be applicable to
5434           * patch constant outputs in the control point phase.
5435           */
5436          if (emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT)) {
5437             unsigned arrayID =
5438                emit->num_temp_arrays ? emit->num_temp_arrays : 1;
5439             create_temp_array(emit, arrayID, 0,
5440                               emit->tcs.patch_generic_out_count, total_temps);
5441          }
5442          emit->tcs.patch_generic_tmp_index = total_temps;
5443          total_temps += emit->tcs.patch_generic_out_count;
5444       }
5445 
5446       emit->tcs.invocation_id_tmp_index = total_temps++;
5447    }
5448 
5449    for (i = 0; i < emit->num_address_regs; i++) {
5450       emit->address_reg_index[i] = total_temps++;
5451    }
5452 
5453    /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10
5454     * temp indexes.  Basically, we compact all the non-array temp register
5455     * indexes into a consecutive series.
5456     *
5457     * Before, we may have some TGSI declarations like:
5458     *   DCL TEMP[0..1], LOCAL
5459     *   DCL TEMP[2..4], ARRAY(1), LOCAL
5460     *   DCL TEMP[5..7], ARRAY(2), LOCAL
5461     *   plus, some extra temps, like TEMP[8], TEMP[9] for misc things
5462     *
5463     * After, we'll have a map like this:
5464     *   temp_map[0] = { array 0, index 0 }
5465     *   temp_map[1] = { array 0, index 1 }
5466     *   temp_map[2] = { array 1, index 0 }
5467     *   temp_map[3] = { array 1, index 1 }
5468     *   temp_map[4] = { array 1, index 2 }
5469     *   temp_map[5] = { array 2, index 0 }
5470     *   temp_map[6] = { array 2, index 1 }
5471     *   temp_map[7] = { array 2, index 2 }
5472     *   temp_map[8] = { array 0, index 2 }
5473     *   temp_map[9] = { array 0, index 3 }
5474     *
5475     * We'll declare two arrays of 3 elements, plus a set of four non-indexed
5476     * temps numbered 0..3
5477     *
5478     * Any time we emit a temporary register index, we'll have to use the
5479     * temp_map[] table to convert the TGSI index to the VGPU10 index.
5480     *
5481     * Finally, we recompute the total_temps value here.
5482     */
5483    reg = 0;
5484    for (i = 0; i < total_temps; i++) {
5485       if (emit->temp_map[i].arrayId == 0) {
5486          emit->temp_map[i].index = reg++;
5487       }
5488    }
5489 
5490    if (0) {
5491       debug_printf("total_temps %u\n", total_temps);
5492       for (i = 0; i < total_temps; i++) {
5493          debug_printf("temp %u ->  array %u  index %u\n",
5494                       i, emit->temp_map[i].arrayId, emit->temp_map[i].index);
5495       }
5496    }
5497 
5498    total_temps = reg;
5499 
5500    /* Emit declaration of ordinary temp registers */
5501    if (total_temps > 0) {
5502       VGPU10OpcodeToken0 opcode0;
5503 
5504       opcode0.value = 0;
5505       opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS;
5506 
5507       begin_emit_instruction(emit);
5508       emit_dword(emit, opcode0.value);
5509       emit_dword(emit, total_temps);
5510       end_emit_instruction(emit);
5511    }
5512 
5513    /* Emit declarations for indexable temp arrays.  Skip 0th entry since
5514     * it's unused.
5515     */
5516    for (i = 1; i < emit->num_temp_arrays; i++) {
5517       unsigned num_temps = emit->temp_arrays[i].size;
5518 
5519       if (num_temps > 0) {
5520          VGPU10OpcodeToken0 opcode0;
5521 
5522          opcode0.value = 0;
5523          opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP;
5524 
5525          begin_emit_instruction(emit);
5526          emit_dword(emit, opcode0.value);
5527          emit_dword(emit, i); /* which array */
5528          emit_dword(emit, num_temps);
5529          emit_dword(emit, 4); /* num components */
5530          end_emit_instruction(emit);
5531 
5532          total_temps += num_temps;
5533       }
5534    }
5535 
5536    /* Check that the grand total of all regular and indexed temps is
5537     * under the limit.
5538     */
5539    check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1);
5540 
5541    return TRUE;
5542 }
5543 
5544 
5545 static boolean
emit_constant_declaration(struct svga_shader_emitter_v10 * emit)5546 emit_constant_declaration(struct svga_shader_emitter_v10 *emit)
5547 {
5548    VGPU10OpcodeToken0 opcode0;
5549    VGPU10OperandToken0 operand0;
5550    unsigned total_consts, i;
5551 
5552    opcode0.value = 0;
5553    opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER;
5554    opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED;
5555    /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */
5556 
5557    operand0.value = 0;
5558    operand0.numComponents = VGPU10_OPERAND_4_COMPONENT;
5559    operand0.indexDimension = VGPU10_OPERAND_INDEX_2D;
5560    operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5561    operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5562    operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER;
5563    operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE;
5564    operand0.swizzleX = 0;
5565    operand0.swizzleY = 1;
5566    operand0.swizzleZ = 2;
5567    operand0.swizzleW = 3;
5568 
5569    /**
5570     * Emit declaration for constant buffer [0].  We also allocate
5571     * room for the extra constants here.
5572     */
5573    total_consts = emit->num_shader_consts[0];
5574 
5575    /* Now, allocate constant slots for the "extra" constants.
5576     * Note: it's critical that these extra constant locations
5577     * exactly match what's emitted by the "extra" constants code
5578     * in svga_state_constants.c
5579     */
5580 
5581    /* Vertex position scale/translation */
5582    if (emit->vposition.need_prescale) {
5583       emit->vposition.prescale_cbuf_index = total_consts;
5584       total_consts += (2 * emit->vposition.num_prescale);
5585    }
5586 
5587    if (emit->unit == PIPE_SHADER_VERTEX) {
5588       if (emit->key.vs.undo_viewport) {
5589          emit->vs.viewport_index = total_consts++;
5590       }
5591       if (emit->key.vs.need_vertex_id_bias) {
5592          emit->vs.vertex_id_bias_index = total_consts++;
5593       }
5594    }
5595 
5596    /* user-defined clip planes */
5597    if (emit->key.clip_plane_enable) {
5598       unsigned n = util_bitcount(emit->key.clip_plane_enable);
5599       assert(emit->unit != PIPE_SHADER_FRAGMENT &&
5600              emit->unit != PIPE_SHADER_COMPUTE);
5601       for (i = 0; i < n; i++) {
5602          emit->clip_plane_const[i] = total_consts++;
5603       }
5604    }
5605 
5606    for (i = 0; i < emit->num_samplers; i++) {
5607 
5608       if (emit->sampler_view[i]) {
5609 
5610          /* Texcoord scale factors for RECT textures */
5611          if (emit->key.tex[i].unnormalized) {
5612             emit->texcoord_scale_index[i] = total_consts++;
5613          }
5614 
5615          /* Texture buffer sizes */
5616          if (emit->sampler_target[i] == TGSI_TEXTURE_BUFFER) {
5617             emit->texture_buffer_size_index[i] = total_consts++;
5618          }
5619       }
5620    }
5621 
5622    if (total_consts > 0) {
5623       begin_emit_instruction(emit);
5624       emit_dword(emit, opcode0.value);
5625       emit_dword(emit, operand0.value);
5626       emit_dword(emit, 0);  /* which const buffer slot */
5627       emit_dword(emit, total_consts);
5628       end_emit_instruction(emit);
5629    }
5630 
5631    /* Declare remaining constant buffers (UBOs) */
5632    for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) {
5633       if (emit->num_shader_consts[i] > 0) {
5634          begin_emit_instruction(emit);
5635          emit_dword(emit, opcode0.value);
5636          emit_dword(emit, operand0.value);
5637          emit_dword(emit, i);  /* which const buffer slot */
5638          emit_dword(emit, emit->num_shader_consts[i]);
5639          end_emit_instruction(emit);
5640       }
5641    }
5642 
5643    return TRUE;
5644 }
5645 
5646 
5647 /**
5648  * Emit declarations for samplers.
5649  */
5650 static boolean
emit_sampler_declarations(struct svga_shader_emitter_v10 * emit)5651 emit_sampler_declarations(struct svga_shader_emitter_v10 *emit)
5652 {
5653    unsigned i;
5654 
5655    for (i = 0; i < emit->num_samplers; i++) {
5656       VGPU10OpcodeToken0 opcode0;
5657       VGPU10OperandToken0 operand0;
5658 
5659       opcode0.value = 0;
5660       opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER;
5661       opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT;
5662 
5663       operand0.value = 0;
5664       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
5665       operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER;
5666       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
5667       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5668 
5669       begin_emit_instruction(emit);
5670       emit_dword(emit, opcode0.value);
5671       emit_dword(emit, operand0.value);
5672       emit_dword(emit, i);
5673       end_emit_instruction(emit);
5674    }
5675 
5676    return TRUE;
5677 }
5678 
5679 
5680 /**
5681  * Translate TGSI_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x.
5682  */
5683 static unsigned
tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,unsigned num_samples,boolean is_array)5684 tgsi_texture_to_resource_dimension(enum tgsi_texture_type target,
5685                                    unsigned num_samples,
5686                                    boolean is_array)
5687 {
5688    if (target == TGSI_TEXTURE_2D_MSAA && num_samples < 2) {
5689       target = TGSI_TEXTURE_2D;
5690    }
5691    else if (target == TGSI_TEXTURE_2D_ARRAY_MSAA && num_samples < 2) {
5692       target = TGSI_TEXTURE_2D_ARRAY;
5693    }
5694 
5695    switch (target) {
5696    case TGSI_TEXTURE_BUFFER:
5697       return VGPU10_RESOURCE_DIMENSION_BUFFER;
5698    case TGSI_TEXTURE_1D:
5699       return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
5700    case TGSI_TEXTURE_2D:
5701    case TGSI_TEXTURE_RECT:
5702       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5703    case TGSI_TEXTURE_3D:
5704       return VGPU10_RESOURCE_DIMENSION_TEXTURE3D;
5705    case TGSI_TEXTURE_CUBE:
5706    case TGSI_TEXTURE_SHADOWCUBE:
5707       return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
5708    case TGSI_TEXTURE_SHADOW1D:
5709       return VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
5710    case TGSI_TEXTURE_SHADOW2D:
5711    case TGSI_TEXTURE_SHADOWRECT:
5712       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5713    case TGSI_TEXTURE_1D_ARRAY:
5714    case TGSI_TEXTURE_SHADOW1D_ARRAY:
5715       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY
5716          : VGPU10_RESOURCE_DIMENSION_TEXTURE1D;
5717    case TGSI_TEXTURE_2D_ARRAY:
5718    case TGSI_TEXTURE_SHADOW2D_ARRAY:
5719       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY
5720          : VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5721    case TGSI_TEXTURE_2D_MSAA:
5722       return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
5723    case TGSI_TEXTURE_2D_ARRAY_MSAA:
5724       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY
5725          : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS;
5726    case TGSI_TEXTURE_CUBE_ARRAY:
5727    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
5728       return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY
5729          : VGPU10_RESOURCE_DIMENSION_TEXTURECUBE;
5730    default:
5731       assert(!"Unexpected resource type");
5732       return VGPU10_RESOURCE_DIMENSION_TEXTURE2D;
5733    }
5734 }
5735 
5736 
5737 /**
5738  * Given a tgsi_return_type, return true iff it is an integer type.
5739  */
5740 static boolean
is_integer_type(enum tgsi_return_type type)5741 is_integer_type(enum tgsi_return_type type)
5742 {
5743    switch (type) {
5744       case TGSI_RETURN_TYPE_SINT:
5745       case TGSI_RETURN_TYPE_UINT:
5746          return TRUE;
5747       case TGSI_RETURN_TYPE_FLOAT:
5748       case TGSI_RETURN_TYPE_UNORM:
5749       case TGSI_RETURN_TYPE_SNORM:
5750          return FALSE;
5751       case TGSI_RETURN_TYPE_COUNT:
5752       default:
5753          assert(!"is_integer_type: Unknown tgsi_return_type");
5754          return FALSE;
5755    }
5756 }
5757 
5758 
5759 /**
5760  * Emit declarations for resources.
5761  * XXX When we're sure that all TGSI shaders will be generated with
5762  * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may
5763  * rework this code.
5764  */
5765 static boolean
emit_resource_declarations(struct svga_shader_emitter_v10 * emit)5766 emit_resource_declarations(struct svga_shader_emitter_v10 *emit)
5767 {
5768    unsigned i;
5769 
5770    /* Emit resource decl for each sampler */
5771    for (i = 0; i < emit->num_samplers; i++) {
5772       VGPU10OpcodeToken0 opcode0;
5773       VGPU10OperandToken0 operand0;
5774       VGPU10ResourceReturnTypeToken return_type;
5775       VGPU10_RESOURCE_RETURN_TYPE rt;
5776 
5777       opcode0.value = 0;
5778       opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;
5779       opcode0.resourceDimension =
5780          tgsi_texture_to_resource_dimension(emit->sampler_target[i],
5781                                             emit->key.tex[i].num_samples,
5782                                             emit->key.tex[i].is_array);
5783       opcode0.sampleCount = emit->key.tex[i].num_samples;
5784       operand0.value = 0;
5785       operand0.numComponents = VGPU10_OPERAND_0_COMPONENT;
5786       operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE;
5787       operand0.indexDimension = VGPU10_OPERAND_INDEX_1D;
5788       operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32;
5789 
5790 #if 1
5791       /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */
5792       STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1);
5793       STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1);
5794       STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1);
5795       STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1);
5796       STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1);
5797       assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT);
5798       rt = emit->sampler_return_type[i] + 1;
5799 #else
5800       switch (emit->sampler_return_type[i]) {
5801          case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break;
5802          case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break;
5803          case TGSI_RETURN_TYPE_SINT:  rt = VGPU10_RETURN_TYPE_SINT;  break;
5804          case TGSI_RETURN_TYPE_UINT:  rt = VGPU10_RETURN_TYPE_UINT;  break;
5805          case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break;
5806          case TGSI_RETURN_TYPE_COUNT:
5807          default:
5808             rt = VGPU10_RETURN_TYPE_FLOAT;
5809             assert(!"emit_resource_declarations: Unknown tgsi_return_type");
5810       }
5811 #endif
5812 
5813       return_type.value = 0;
5814       return_type.component0 = rt;
5815       return_type.component1 = rt;
5816       return_type.component2 = rt;
5817       return_type.component3 = rt;
5818 
5819       begin_emit_instruction(emit);
5820       emit_dword(emit, opcode0.value);
5821       emit_dword(emit, operand0.value);
5822       emit_dword(emit, i);
5823       emit_dword(emit, return_type.value);
5824       end_emit_instruction(emit);
5825    }
5826 
5827    return TRUE;
5828 }
5829 
5830 /**
5831  * Emit instruction with n=1, 2 or 3 source registers.
5832  */
5833 static void
emit_instruction_opn(struct svga_shader_emitter_v10 * emit,unsigned opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src1,const struct tgsi_full_src_register * src2,const struct tgsi_full_src_register * src3,boolean saturate,bool precise)5834 emit_instruction_opn(struct svga_shader_emitter_v10 *emit,
5835                      unsigned opcode,
5836                      const struct tgsi_full_dst_register *dst,
5837                      const struct tgsi_full_src_register *src1,
5838                      const struct tgsi_full_src_register *src2,
5839                      const struct tgsi_full_src_register *src3,
5840                      boolean saturate, bool precise)
5841 {
5842    begin_emit_instruction(emit);
5843    emit_opcode_precise(emit, opcode, saturate, precise);
5844    emit_dst_register(emit, dst);
5845    emit_src_register(emit, src1);
5846    if (src2) {
5847       emit_src_register(emit, src2);
5848    }
5849    if (src3) {
5850       emit_src_register(emit, src3);
5851    }
5852    end_emit_instruction(emit);
5853 }
5854 
5855 static void
emit_instruction_op1(struct svga_shader_emitter_v10 * emit,unsigned opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)5856 emit_instruction_op1(struct svga_shader_emitter_v10 *emit,
5857                      unsigned opcode,
5858                      const struct tgsi_full_dst_register *dst,
5859                      const struct tgsi_full_src_register *src)
5860 {
5861    emit_instruction_opn(emit, opcode, dst, src, NULL, NULL, FALSE, FALSE);
5862 }
5863 
5864 static void
emit_instruction_op2(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src1,const struct tgsi_full_src_register * src2)5865 emit_instruction_op2(struct svga_shader_emitter_v10 *emit,
5866                      VGPU10_OPCODE_TYPE opcode,
5867                      const struct tgsi_full_dst_register *dst,
5868                      const struct tgsi_full_src_register *src1,
5869                      const struct tgsi_full_src_register *src2)
5870 {
5871    emit_instruction_opn(emit, opcode, dst, src1, src2, NULL, FALSE, FALSE);
5872 }
5873 
5874 static void
emit_instruction_op3(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcode,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src1,const struct tgsi_full_src_register * src2,const struct tgsi_full_src_register * src3)5875 emit_instruction_op3(struct svga_shader_emitter_v10 *emit,
5876                      VGPU10_OPCODE_TYPE opcode,
5877                      const struct tgsi_full_dst_register *dst,
5878                      const struct tgsi_full_src_register *src1,
5879                      const struct tgsi_full_src_register *src2,
5880                      const struct tgsi_full_src_register *src3)
5881 {
5882    emit_instruction_opn(emit, opcode, dst, src1, src2, src3, FALSE, FALSE);
5883 }
5884 
5885 static void
emit_instruction_op0(struct svga_shader_emitter_v10 * emit,VGPU10_OPCODE_TYPE opcode)5886 emit_instruction_op0(struct svga_shader_emitter_v10 *emit,
5887                      VGPU10_OPCODE_TYPE opcode)
5888 {
5889    begin_emit_instruction(emit);
5890    emit_opcode(emit, opcode, FALSE);
5891    end_emit_instruction(emit);
5892 }
5893 
5894 /**
5895  * Tessellation inner/outer levels needs to be store into its
5896  * appropriate registers depending on prim_mode.
5897  */
5898 static void
store_tesslevels(struct svga_shader_emitter_v10 * emit)5899 store_tesslevels(struct svga_shader_emitter_v10 *emit)
5900 {
5901    int i;
5902 
5903    /* tessellation levels are required input/out in hull shader.
5904     * emitting the inner/outer tessellation levels, either from
5905     * values provided in tcs or fallback default values which is 1.0
5906     */
5907    if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) {
5908       struct tgsi_full_src_register temp_src;
5909 
5910       if (emit->tcs.inner.tgsi_index != INVALID_INDEX)
5911          temp_src = make_src_temp_reg(emit->tcs.inner.temp_index);
5912       else
5913          temp_src = make_immediate_reg_float(emit, 1.0f);
5914 
5915       for (i = 0; i < 2; i++) {
5916          struct tgsi_full_src_register src =
5917             scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
5918          struct tgsi_full_dst_register dst =
5919             make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index + i);
5920          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
5921          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
5922       }
5923 
5924       if (emit->tcs.outer.tgsi_index != INVALID_INDEX)
5925          temp_src = make_src_temp_reg(emit->tcs.outer.temp_index);
5926       else
5927          temp_src = make_immediate_reg_float(emit, 1.0f);
5928 
5929       for (i = 0; i < 4; i++) {
5930          struct tgsi_full_src_register src =
5931             scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
5932          struct tgsi_full_dst_register dst =
5933             make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i);
5934          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
5935          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
5936       }
5937    }
5938    else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) {
5939       struct tgsi_full_src_register temp_src;
5940 
5941       if (emit->tcs.inner.tgsi_index != INVALID_INDEX)
5942          temp_src = make_src_temp_reg(emit->tcs.inner.temp_index);
5943       else
5944          temp_src = make_immediate_reg_float(emit, 1.0f);
5945 
5946       struct tgsi_full_src_register src =
5947          scalar_src(&temp_src, TGSI_SWIZZLE_X);
5948       struct tgsi_full_dst_register dst =
5949          make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index);
5950       dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
5951       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
5952 
5953       if (emit->tcs.outer.tgsi_index != INVALID_INDEX)
5954          temp_src = make_src_temp_reg(emit->tcs.outer.temp_index);
5955       else
5956          temp_src = make_immediate_reg_float(emit, 1.0f);
5957 
5958       for (i = 0; i < 3; i++) {
5959          struct tgsi_full_src_register src =
5960             scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
5961          struct tgsi_full_dst_register dst =
5962             make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i);
5963          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
5964          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
5965       }
5966    }
5967    else if (emit->key.tcs.prim_mode ==  PIPE_PRIM_LINES) {
5968       if (emit->tcs.outer.tgsi_index != INVALID_INDEX) {
5969          struct tgsi_full_src_register temp_src =
5970             make_src_temp_reg(emit->tcs.outer.temp_index);
5971          for (i = 0; i < 2; i++) {
5972             struct tgsi_full_src_register src =
5973                scalar_src(&temp_src, TGSI_SWIZZLE_X + i);
5974             struct tgsi_full_dst_register dst =
5975                make_dst_reg(TGSI_FILE_OUTPUT,
5976                             emit->tcs.outer.out_index + i);
5977             dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
5978             emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
5979          }
5980       }
5981    }
5982    else {
5983       debug_printf("Unsupported primitive type");
5984    }
5985 }
5986 
5987 
5988 /**
5989  * Emit the actual clip distance instructions to be used for clipping
5990  * by copying the clip distance from the temporary registers to the
5991  * CLIPDIST registers written with the enabled planes mask.
5992  * Also copy the clip distance from the temporary to the clip distance
5993  * shadow copy register which will be referenced by the input shader
5994  */
5995 static void
emit_clip_distance_instructions(struct svga_shader_emitter_v10 * emit)5996 emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit)
5997 {
5998    struct tgsi_full_src_register tmp_clip_dist_src;
5999    struct tgsi_full_dst_register clip_dist_dst;
6000 
6001    unsigned i;
6002    unsigned clip_plane_enable = emit->key.clip_plane_enable;
6003    unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index;
6004    int num_written_clipdist = emit->info.num_written_clipdistance;
6005 
6006    assert(emit->clip_dist_out_index != INVALID_INDEX);
6007    assert(emit->clip_dist_tmp_index != INVALID_INDEX);
6008 
6009    /**
6010     * Temporary reset the temporary clip dist register index so
6011     * that the copy to the real clip dist register will not
6012     * attempt to copy to the temporary register again
6013     */
6014    emit->clip_dist_tmp_index = INVALID_INDEX;
6015 
6016    for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) {
6017 
6018       tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i);
6019 
6020       /**
6021        * copy to the shadow copy for use by varying variable and
6022        * stream output. All clip distances
6023        * will be written regardless of the enabled clipping planes.
6024        */
6025       clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
6026                                    emit->clip_dist_so_index + i);
6027 
6028       /* MOV clip_dist_so, tmp_clip_dist */
6029       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
6030                            &tmp_clip_dist_src);
6031 
6032       /**
6033        * copy those clip distances to enabled clipping planes
6034        * to CLIPDIST registers for clipping
6035        */
6036       if (clip_plane_enable & 0xf) {
6037          clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT,
6038                                       emit->clip_dist_out_index + i);
6039          clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf);
6040 
6041          /* MOV CLIPDIST, tmp_clip_dist */
6042          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst,
6043                               &tmp_clip_dist_src);
6044       }
6045       /* four clip planes per clip register */
6046       clip_plane_enable >>= 4;
6047    }
6048    /**
6049     * set the temporary clip dist register index back to the
6050     * temporary index for the next vertex
6051     */
6052    emit->clip_dist_tmp_index = clip_dist_tmp_index;
6053 }
6054 
6055 /* Declare clip distance output registers for user-defined clip planes
6056  * or the TGSI_CLIPVERTEX output.
6057  */
6058 static void
emit_clip_distance_declarations(struct svga_shader_emitter_v10 * emit)6059 emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit)
6060 {
6061    unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
6062    unsigned index = emit->num_outputs;
6063    unsigned plane_mask;
6064 
6065    assert(emit->unit != PIPE_SHADER_FRAGMENT);
6066    assert(num_clip_planes <= 8);
6067 
6068    if (emit->clip_mode != CLIP_LEGACY &&
6069        emit->clip_mode != CLIP_VERTEX) {
6070       return;
6071    }
6072 
6073    if (num_clip_planes == 0)
6074       return;
6075 
6076    /* Convert clip vertex to clip distances only in the last vertex stage */
6077    if (!emit->key.last_vertex_stage)
6078       return;
6079 
6080    /* Declare one or two clip output registers.  The number of components
6081     * in the mask reflects the number of clip planes.  For example, if 5
6082     * clip planes are needed, we'll declare outputs similar to:
6083     * dcl_output_siv o2.xyzw, clip_distance
6084     * dcl_output_siv o3.x, clip_distance
6085     */
6086    emit->clip_dist_out_index = index; /* save the starting clip dist reg index */
6087 
6088    plane_mask = (1 << num_clip_planes) - 1;
6089    if (plane_mask & 0xf) {
6090       unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
6091       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index,
6092                               VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE,
6093                               SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
6094       emit->num_outputs++;
6095    }
6096    if (plane_mask & 0xf0) {
6097       unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL;
6098       emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1,
6099                               VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE,
6100                               SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE);
6101       emit->num_outputs++;
6102    }
6103 }
6104 
6105 
6106 /**
6107  * Emit the instructions for writing to the clip distance registers
6108  * to handle legacy/automatic clip planes.
6109  * For each clip plane, the distance is the dot product of the vertex
6110  * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients.
6111  * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE
6112  * output registers already declared.
6113  */
6114 static void
emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 * emit,unsigned vpos_tmp_index)6115 emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit,
6116                              unsigned vpos_tmp_index)
6117 {
6118    unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable);
6119 
6120    assert(emit->clip_mode == CLIP_LEGACY);
6121    assert(num_clip_planes <= 8);
6122 
6123    assert(emit->unit == PIPE_SHADER_VERTEX ||
6124           emit->unit == PIPE_SHADER_GEOMETRY ||
6125           emit->unit == PIPE_SHADER_TESS_EVAL);
6126 
6127    for (i = 0; i < num_clip_planes; i++) {
6128       struct tgsi_full_dst_register dst;
6129       struct tgsi_full_src_register plane_src, vpos_src;
6130       unsigned reg_index = emit->clip_dist_out_index + i / 4;
6131       unsigned comp = i % 4;
6132       unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
6133 
6134       /* create dst, src regs */
6135       dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
6136       dst = writemask_dst(&dst, writemask);
6137 
6138       plane_src = make_src_const_reg(emit->clip_plane_const[i]);
6139       vpos_src = make_src_temp_reg(vpos_tmp_index);
6140 
6141       /* DP4 clip_dist, plane, vpos */
6142       emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
6143                            &plane_src, &vpos_src);
6144    }
6145 }
6146 
6147 
6148 /**
6149  * Emit the instructions for computing the clip distance results from
6150  * the clip vertex temporary.
6151  * For each clip plane, the distance is the dot product of the clip vertex
6152  * position (found in a temp reg) and the clip plane coefficients.
6153  */
6154 static void
emit_clip_vertex_instructions(struct svga_shader_emitter_v10 * emit)6155 emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit)
6156 {
6157    const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable);
6158    unsigned i;
6159    struct tgsi_full_dst_register dst;
6160    struct tgsi_full_src_register clipvert_src;
6161    const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index;
6162 
6163    assert(emit->unit == PIPE_SHADER_VERTEX ||
6164           emit->unit == PIPE_SHADER_GEOMETRY ||
6165           emit->unit == PIPE_SHADER_TESS_EVAL);
6166 
6167    assert(emit->clip_mode == CLIP_VERTEX);
6168 
6169    clipvert_src = make_src_temp_reg(clip_vertex_tmp);
6170 
6171    for (i = 0; i < num_clip; i++) {
6172       struct tgsi_full_src_register plane_src;
6173       unsigned reg_index = emit->clip_dist_out_index + i / 4;
6174       unsigned comp = i % 4;
6175       unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp;
6176 
6177       /* create dst, src regs */
6178       dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index);
6179       dst = writemask_dst(&dst, writemask);
6180 
6181       plane_src = make_src_const_reg(emit->clip_plane_const[i]);
6182 
6183       /* DP4 clip_dist, plane, vpos */
6184       emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst,
6185                            &plane_src, &clipvert_src);
6186    }
6187 
6188    /* copy temporary clip vertex register to the clip vertex register */
6189 
6190    assert(emit->clip_vertex_out_index != INVALID_INDEX);
6191 
6192    /**
6193     * temporary reset the temporary clip vertex register index so
6194     * that copy to the clip vertex register will not attempt
6195     * to copy to the temporary register again
6196     */
6197    emit->clip_vertex_tmp_index = INVALID_INDEX;
6198 
6199    /* MOV clip_vertex, clip_vertex_tmp */
6200    dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index);
6201    emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
6202                         &dst, &clipvert_src);
6203 
6204    /**
6205     * set the temporary clip vertex register index back to the
6206     * temporary index for the next vertex
6207     */
6208    emit->clip_vertex_tmp_index = clip_vertex_tmp;
6209 }
6210 
6211 /**
6212  * Emit code to convert RGBA to BGRA
6213  */
6214 static void
emit_swap_r_b(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6215 emit_swap_r_b(struct svga_shader_emitter_v10 *emit,
6216                      const struct tgsi_full_dst_register *dst,
6217                      const struct tgsi_full_src_register *src)
6218 {
6219    struct tgsi_full_src_register bgra_src =
6220       swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W);
6221 
6222    begin_emit_instruction(emit);
6223    emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
6224    emit_dst_register(emit, dst);
6225    emit_src_register(emit, &bgra_src);
6226    end_emit_instruction(emit);
6227 }
6228 
6229 
6230 /** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */
6231 static void
emit_puint_to_snorm(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6232 emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit,
6233                     const struct tgsi_full_dst_register *dst,
6234                     const struct tgsi_full_src_register *src)
6235 {
6236    struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f);
6237    struct tgsi_full_src_register two =
6238       make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f);
6239    struct tgsi_full_src_register neg_two =
6240       make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f);
6241 
6242    unsigned val_tmp = get_temp_index(emit);
6243    struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp);
6244    struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp);
6245 
6246    unsigned bias_tmp = get_temp_index(emit);
6247    struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp);
6248    struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp);
6249 
6250    /* val = src * 2.0 */
6251    emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, src, &two);
6252 
6253    /* bias = src > 0.5 */
6254    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, src, &half);
6255 
6256    /* bias = bias & -2.0 */
6257    emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst,
6258                         &bias_src, &neg_two);
6259 
6260    /* dst = val + bias */
6261    emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst,
6262                         &val_src, &bias_src);
6263 
6264    free_temp_indexes(emit);
6265 }
6266 
6267 
6268 /** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */
6269 static void
emit_puint_to_uscaled(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6270 emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit,
6271                       const struct tgsi_full_dst_register *dst,
6272                       const struct tgsi_full_src_register *src)
6273 {
6274    struct tgsi_full_src_register scale =
6275       make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f);
6276 
6277    /* dst = src * scale */
6278    emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale);
6279 }
6280 
6281 
6282 /** Convert from R32_UINT to 10_10_10_2_sscaled */
6283 static void
emit_puint_to_sscaled(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)6284 emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit,
6285                       const struct tgsi_full_dst_register *dst,
6286                       const struct tgsi_full_src_register *src)
6287 {
6288    struct tgsi_full_src_register lshift =
6289       make_immediate_reg_int4(emit, 22, 12, 2, 0);
6290    struct tgsi_full_src_register rshift =
6291       make_immediate_reg_int4(emit, 22, 22, 22, 30);
6292 
6293    struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X);
6294 
6295    unsigned tmp = get_temp_index(emit);
6296    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6297    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6298 
6299    /*
6300     * r = (pixel << 22) >> 22;   # signed int in [511, -512]
6301     * g = (pixel << 12) >> 22;   # signed int in [511, -512]
6302     * b = (pixel <<  2) >> 22;   # signed int in [511, -512]
6303     * a = (pixel <<  0) >> 30;   # signed int in [1, -2]
6304     * dst = i_to_f(r,g,b,a);     # convert to float
6305     */
6306    emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst,
6307                         &src_xxxx, &lshift);
6308    emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst,
6309                         &tmp_src, &rshift);
6310    emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src);
6311 
6312    free_temp_indexes(emit);
6313 }
6314 
6315 
6316 /**
6317  * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction.
6318  */
6319 static boolean
emit_arl_uarl(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6320 emit_arl_uarl(struct svga_shader_emitter_v10 *emit,
6321               const struct tgsi_full_instruction *inst)
6322 {
6323    unsigned index = inst->Dst[0].Register.Index;
6324    struct tgsi_full_dst_register dst;
6325    VGPU10_OPCODE_TYPE opcode;
6326 
6327    assert(index < MAX_VGPU10_ADDR_REGS);
6328    dst = make_dst_temp_reg(emit->address_reg_index[index]);
6329    dst = writemask_dst(&dst, inst->Dst[0].Register.WriteMask);
6330 
6331    /* ARL dst, s0
6332     * Translates into:
6333     * FTOI address_tmp, s0
6334     *
6335     * UARL dst, s0
6336     * Translates into:
6337     * MOV address_tmp, s0
6338     */
6339    if (inst->Instruction.Opcode == TGSI_OPCODE_ARL)
6340       opcode = VGPU10_OPCODE_FTOI;
6341    else
6342       opcode = VGPU10_OPCODE_MOV;
6343 
6344    emit_instruction_op1(emit, opcode, &dst, &inst->Src[0]);
6345 
6346    return TRUE;
6347 }
6348 
6349 
6350 /**
6351  * Emit code for TGSI_OPCODE_CAL instruction.
6352  */
6353 static boolean
emit_cal(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6354 emit_cal(struct svga_shader_emitter_v10 *emit,
6355          const struct tgsi_full_instruction *inst)
6356 {
6357    unsigned label = inst->Label.Label;
6358    VGPU10OperandToken0 operand;
6359    operand.value = 0;
6360    operand.operandType = VGPU10_OPERAND_TYPE_LABEL;
6361 
6362    begin_emit_instruction(emit);
6363    emit_dword(emit, operand.value);
6364    emit_dword(emit, label);
6365    end_emit_instruction(emit);
6366 
6367    return TRUE;
6368 }
6369 
6370 
6371 /**
6372  * Emit code for TGSI_OPCODE_IABS instruction.
6373  */
6374 static boolean
emit_iabs(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6375 emit_iabs(struct svga_shader_emitter_v10 *emit,
6376           const struct tgsi_full_instruction *inst)
6377 {
6378    /* dst.x = (src0.x < 0) ? -src0.x : src0.x
6379     * dst.y = (src0.y < 0) ? -src0.y : src0.y
6380     * dst.z = (src0.z < 0) ? -src0.z : src0.z
6381     * dst.w = (src0.w < 0) ? -src0.w : src0.w
6382     *
6383     * Translates into
6384     *   IMAX dst, src, neg(src)
6385     */
6386    struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
6387    emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0],
6388                         &inst->Src[0], &neg_src);
6389 
6390    return TRUE;
6391 }
6392 
6393 
6394 /**
6395  * Emit code for TGSI_OPCODE_CMP instruction.
6396  */
6397 static boolean
emit_cmp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6398 emit_cmp(struct svga_shader_emitter_v10 *emit,
6399          const struct tgsi_full_instruction *inst)
6400 {
6401    /* dst.x = (src0.x < 0) ? src1.x : src2.x
6402     * dst.y = (src0.y < 0) ? src1.y : src2.y
6403     * dst.z = (src0.z < 0) ? src1.z : src2.z
6404     * dst.w = (src0.w < 0) ? src1.w : src2.w
6405     *
6406     * Translates into
6407     *   LT tmp, src0, 0.0
6408     *   MOVC dst, tmp, src1, src2
6409     */
6410    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
6411    unsigned tmp = get_temp_index(emit);
6412    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6413    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6414 
6415    emit_instruction_opn(emit, VGPU10_OPCODE_LT, &tmp_dst,
6416                         &inst->Src[0], &zero, NULL, FALSE,
6417                         inst->Instruction.Precise);
6418    emit_instruction_opn(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0],
6419                         &tmp_src, &inst->Src[1], &inst->Src[2],
6420                         inst->Instruction.Saturate, FALSE);
6421 
6422    free_temp_indexes(emit);
6423 
6424    return TRUE;
6425 }
6426 
6427 
6428 /**
6429  * Emit code for TGSI_OPCODE_DST instruction.
6430  */
6431 static boolean
emit_dst(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6432 emit_dst(struct svga_shader_emitter_v10 *emit,
6433          const struct tgsi_full_instruction *inst)
6434 {
6435    /*
6436     * dst.x = 1
6437     * dst.y = src0.y * src1.y
6438     * dst.z = src0.z
6439     * dst.w = src1.w
6440     */
6441 
6442    struct tgsi_full_src_register s0_yyyy =
6443       scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
6444    struct tgsi_full_src_register s0_zzzz =
6445       scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z);
6446    struct tgsi_full_src_register s1_yyyy =
6447       scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y);
6448    struct tgsi_full_src_register s1_wwww =
6449       scalar_src(&inst->Src[1], TGSI_SWIZZLE_W);
6450 
6451    /*
6452     * If dst and either src0 and src1 are the same we need
6453     * to create a temporary for it and insert a extra move.
6454     */
6455    unsigned tmp_move = get_temp_index(emit);
6456    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
6457    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
6458 
6459    /* MOV dst.x, 1.0 */
6460    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
6461       struct tgsi_full_dst_register dst_x =
6462          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
6463       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
6464 
6465       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one);
6466    }
6467 
6468    /* MUL dst.y, s0.y, s1.y */
6469    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
6470       struct tgsi_full_dst_register dst_y =
6471          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
6472 
6473       emit_instruction_opn(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy,
6474                            &s1_yyyy, NULL, inst->Instruction.Saturate,
6475                            inst->Instruction.Precise);
6476    }
6477 
6478    /* MOV dst.z, s0.z */
6479    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
6480       struct tgsi_full_dst_register dst_z =
6481          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
6482 
6483       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
6484                            &dst_z, &s0_zzzz, NULL, NULL,
6485                            inst->Instruction.Saturate,
6486                            inst->Instruction.Precise);
6487   }
6488 
6489    /* MOV dst.w, s1.w */
6490    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
6491       struct tgsi_full_dst_register dst_w =
6492          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
6493 
6494       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
6495                            &dst_w, &s1_wwww, NULL, NULL,
6496                            inst->Instruction.Saturate,
6497                            inst->Instruction.Precise);
6498    }
6499 
6500    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
6501    free_temp_indexes(emit);
6502 
6503    return TRUE;
6504 }
6505 
6506 
6507 /**
6508  * A helper function to return the stream index as specified in
6509  * the immediate register
6510  */
6511 static inline unsigned
find_stream_index(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * src)6512 find_stream_index(struct svga_shader_emitter_v10 *emit,
6513                   const struct tgsi_full_src_register *src)
6514 {
6515    return emit->immediates[src->Register.Index][src->Register.SwizzleX].Int;
6516 }
6517 
6518 
6519 /**
6520  * Emit code for TGSI_OPCODE_ENDPRIM (GS only)
6521  */
6522 static boolean
emit_endprim(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6523 emit_endprim(struct svga_shader_emitter_v10 *emit,
6524              const struct tgsi_full_instruction *inst)
6525 {
6526    assert(emit->unit == PIPE_SHADER_GEOMETRY);
6527 
6528    begin_emit_instruction(emit);
6529    if (emit->version >= 50) {
6530       unsigned streamIndex = find_stream_index(emit, &inst->Src[0]);
6531 
6532       if (emit->info.num_stream_output_components[streamIndex] == 0) {
6533          /**
6534           * If there is no output for this stream, discard this instruction.
6535           */
6536          emit->discard_instruction = TRUE;
6537       }
6538       else {
6539          emit_opcode(emit, VGPU10_OPCODE_CUT_STREAM, FALSE);
6540          assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE);
6541          emit_stream_register(emit, streamIndex);
6542       }
6543    }
6544    else {
6545       emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE);
6546    }
6547    end_emit_instruction(emit);
6548    return TRUE;
6549 }
6550 
6551 
6552 /**
6553  * Emit code for TGSI_OPCODE_EX2 (2^x) instruction.
6554  */
6555 static boolean
emit_ex2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6556 emit_ex2(struct svga_shader_emitter_v10 *emit,
6557          const struct tgsi_full_instruction *inst)
6558 {
6559    /* Note that TGSI_OPCODE_EX2 computes only one value from src.x
6560     * while VGPU10 computes four values.
6561     *
6562     * dst = EX2(src):
6563     *   dst.xyzw = 2.0 ^ src.x
6564     */
6565 
6566    struct tgsi_full_src_register src_xxxx =
6567       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
6568                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
6569 
6570    /* EXP tmp, s0.xxxx */
6571    emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx,
6572                         NULL, NULL,
6573                         inst->Instruction.Saturate,
6574                         inst->Instruction.Precise);
6575 
6576    return TRUE;
6577 }
6578 
6579 
6580 /**
6581  * Emit code for TGSI_OPCODE_EXP instruction.
6582  */
6583 static boolean
emit_exp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6584 emit_exp(struct svga_shader_emitter_v10 *emit,
6585          const struct tgsi_full_instruction *inst)
6586 {
6587    /*
6588     * dst.x = 2 ^ floor(s0.x)
6589     * dst.y = s0.x - floor(s0.x)
6590     * dst.z = 2 ^ s0.x
6591     * dst.w = 1.0
6592     */
6593 
6594    struct tgsi_full_src_register src_xxxx =
6595       scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
6596    unsigned tmp = get_temp_index(emit);
6597    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6598    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6599 
6600    /*
6601     * If dst and src are the same we need to create
6602     * a temporary for it and insert a extra move.
6603     */
6604    unsigned tmp_move = get_temp_index(emit);
6605    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
6606    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
6607 
6608    /* only use X component of temp reg */
6609    tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
6610    tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
6611 
6612    /* ROUND_NI tmp.x, s0.x */
6613    emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst,
6614                         &src_xxxx); /* round to -infinity */
6615 
6616    /* EXP dst.x, tmp.x */
6617    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
6618       struct tgsi_full_dst_register dst_x =
6619          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
6620 
6621       emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src,
6622                            NULL, NULL,
6623                            inst->Instruction.Saturate,
6624                            inst->Instruction.Precise);
6625    }
6626 
6627    /* ADD dst.y, s0.x, -tmp */
6628    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
6629       struct tgsi_full_dst_register dst_y =
6630          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
6631       struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src);
6632 
6633       emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx,
6634                            &neg_tmp_src, NULL,
6635                            inst->Instruction.Saturate,
6636                            inst->Instruction.Precise);
6637    }
6638 
6639    /* EXP dst.z, s0.x */
6640    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
6641       struct tgsi_full_dst_register dst_z =
6642          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
6643 
6644       emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx,
6645                            NULL, NULL,
6646                            inst->Instruction.Saturate,
6647                            inst->Instruction.Precise);
6648    }
6649 
6650    /* MOV dst.w, 1.0 */
6651    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
6652       struct tgsi_full_dst_register dst_w =
6653          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
6654       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
6655 
6656       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
6657    }
6658 
6659    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
6660 
6661    free_temp_indexes(emit);
6662 
6663    return TRUE;
6664 }
6665 
6666 
6667 /**
6668  * Emit code for TGSI_OPCODE_IF instruction.
6669  */
6670 static boolean
emit_if(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_src_register * src)6671 emit_if(struct svga_shader_emitter_v10 *emit,
6672         const struct tgsi_full_src_register *src)
6673 {
6674    VGPU10OpcodeToken0 opcode0;
6675 
6676    /* The src register should be a scalar */
6677    assert(src->Register.SwizzleX == src->Register.SwizzleY &&
6678           src->Register.SwizzleX == src->Register.SwizzleZ &&
6679           src->Register.SwizzleX == src->Register.SwizzleW);
6680 
6681    /* The only special thing here is that we need to set the
6682     * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if
6683     * src.x is non-zero.
6684     */
6685    opcode0.value = 0;
6686    opcode0.opcodeType = VGPU10_OPCODE_IF;
6687    opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO;
6688 
6689    begin_emit_instruction(emit);
6690    emit_dword(emit, opcode0.value);
6691    emit_src_register(emit, src);
6692    end_emit_instruction(emit);
6693 
6694    return TRUE;
6695 }
6696 
6697 
6698 /**
6699  * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of
6700  * the register components are negative).
6701  */
6702 static boolean
emit_kill_if(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6703 emit_kill_if(struct svga_shader_emitter_v10 *emit,
6704              const struct tgsi_full_instruction *inst)
6705 {
6706    unsigned tmp = get_temp_index(emit);
6707    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6708    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6709 
6710    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
6711 
6712    struct tgsi_full_dst_register tmp_dst_x =
6713       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
6714    struct tgsi_full_src_register tmp_src_xxxx =
6715       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
6716 
6717    /* tmp = src[0] < 0.0 */
6718    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], &zero);
6719 
6720    if (!same_swizzle_terms(&inst->Src[0])) {
6721       /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to
6722        * logically OR the swizzle terms.  Most uses of KILL_IF only
6723        * test one channel so it's good to avoid these extra steps.
6724        */
6725       struct tgsi_full_src_register tmp_src_yyyy =
6726          scalar_src(&tmp_src, TGSI_SWIZZLE_Y);
6727       struct tgsi_full_src_register tmp_src_zzzz =
6728          scalar_src(&tmp_src, TGSI_SWIZZLE_Z);
6729       struct tgsi_full_src_register tmp_src_wwww =
6730          scalar_src(&tmp_src, TGSI_SWIZZLE_W);
6731 
6732       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
6733                            &tmp_src_yyyy);
6734       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
6735                            &tmp_src_zzzz);
6736       emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx,
6737                            &tmp_src_wwww);
6738    }
6739 
6740    begin_emit_instruction(emit);
6741    emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */
6742    emit_src_register(emit, &tmp_src_xxxx);
6743    end_emit_instruction(emit);
6744 
6745    free_temp_indexes(emit);
6746 
6747    return TRUE;
6748 }
6749 
6750 
6751 /**
6752  * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard).
6753  */
6754 static boolean
emit_kill(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6755 emit_kill(struct svga_shader_emitter_v10 *emit,
6756           const struct tgsi_full_instruction *inst)
6757 {
6758    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
6759 
6760    /* DISCARD if 0.0 is zero */
6761    begin_emit_instruction(emit);
6762    emit_discard_opcode(emit, FALSE);
6763    emit_src_register(emit, &zero);
6764    end_emit_instruction(emit);
6765 
6766    return TRUE;
6767 }
6768 
6769 
6770 /**
6771  * Emit code for TGSI_OPCODE_LG2 instruction.
6772  */
6773 static boolean
emit_lg2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6774 emit_lg2(struct svga_shader_emitter_v10 *emit,
6775          const struct tgsi_full_instruction *inst)
6776 {
6777    /* Note that TGSI_OPCODE_LG2 computes only one value from src.x
6778     * while VGPU10 computes four values.
6779     *
6780     * dst = LG2(src):
6781     *   dst.xyzw = log2(src.x)
6782     */
6783 
6784    struct tgsi_full_src_register src_xxxx =
6785       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
6786                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
6787 
6788    /* LOG tmp, s0.xxxx */
6789    emit_instruction_opn(emit, VGPU10_OPCODE_LOG,
6790                         &inst->Dst[0], &src_xxxx, NULL, NULL,
6791                         inst->Instruction.Saturate,
6792                         inst->Instruction.Precise);
6793 
6794    return TRUE;
6795 }
6796 
6797 
6798 /**
6799  * Emit code for TGSI_OPCODE_LIT instruction.
6800  */
6801 static boolean
emit_lit(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6802 emit_lit(struct svga_shader_emitter_v10 *emit,
6803          const struct tgsi_full_instruction *inst)
6804 {
6805    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
6806 
6807    /*
6808     * If dst and src are the same we need to create
6809     * a temporary for it and insert a extra move.
6810     */
6811    unsigned tmp_move = get_temp_index(emit);
6812    struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move);
6813    struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move);
6814 
6815    /*
6816     * dst.x = 1
6817     * dst.y = max(src.x, 0)
6818     * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
6819     * dst.w = 1
6820     */
6821 
6822    /* MOV dst.x, 1.0 */
6823    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
6824       struct tgsi_full_dst_register dst_x =
6825          writemask_dst(&move_dst, TGSI_WRITEMASK_X);
6826       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one);
6827    }
6828 
6829    /* MOV dst.w, 1.0 */
6830    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
6831       struct tgsi_full_dst_register dst_w =
6832          writemask_dst(&move_dst, TGSI_WRITEMASK_W);
6833       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
6834    }
6835 
6836    /* MAX dst.y, src.x, 0.0 */
6837    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
6838       struct tgsi_full_dst_register dst_y =
6839          writemask_dst(&move_dst, TGSI_WRITEMASK_Y);
6840       struct tgsi_full_src_register zero =
6841          make_immediate_reg_float(emit, 0.0f);
6842       struct tgsi_full_src_register src_xxxx =
6843          swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
6844                      TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
6845 
6846       emit_instruction_opn(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx,
6847                            &zero, NULL, inst->Instruction.Saturate, FALSE);
6848    }
6849 
6850    /*
6851     * tmp1 = clamp(src.w, -128, 128);
6852     *   MAX tmp1, src.w, -128
6853     *   MIN tmp1, tmp1, 128
6854     *
6855     * tmp2 = max(tmp2, 0);
6856     *   MAX tmp2, src.y, 0
6857     *
6858     * tmp1 = pow(tmp2, tmp1);
6859     *   LOG tmp2, tmp2
6860     *   MUL tmp1, tmp2, tmp1
6861     *   EXP tmp1, tmp1
6862     *
6863     * tmp1 = (src.w == 0) ? 1 : tmp1;
6864     *   EQ tmp2, 0, src.w
6865     *   MOVC tmp1, tmp2, 1.0, tmp1
6866     *
6867     * dst.z = (0 < src.x) ? tmp1 : 0;
6868     *   LT tmp2, 0, src.x
6869     *   MOVC dst.z, tmp2, tmp1, 0.0
6870     */
6871    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
6872       struct tgsi_full_dst_register dst_z =
6873          writemask_dst(&move_dst, TGSI_WRITEMASK_Z);
6874 
6875       unsigned tmp1 = get_temp_index(emit);
6876       struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
6877       struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
6878       unsigned tmp2 = get_temp_index(emit);
6879       struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
6880       struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
6881 
6882       struct tgsi_full_src_register src_xxxx =
6883          scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
6884       struct tgsi_full_src_register src_yyyy =
6885          scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y);
6886       struct tgsi_full_src_register src_wwww =
6887          scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
6888 
6889       struct tgsi_full_src_register zero =
6890          make_immediate_reg_float(emit, 0.0f);
6891       struct tgsi_full_src_register lowerbound =
6892          make_immediate_reg_float(emit, -128.0f);
6893       struct tgsi_full_src_register upperbound =
6894          make_immediate_reg_float(emit, 128.0f);
6895 
6896       emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww,
6897                            &lowerbound);
6898       emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src,
6899                            &upperbound);
6900       emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy,
6901                            &zero);
6902 
6903       /* POW tmp1, tmp2, tmp1 */
6904       /* LOG tmp2, tmp2 */
6905       emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src);
6906 
6907       /* MUL tmp1, tmp2, tmp1 */
6908       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src,
6909                            &tmp1_src);
6910 
6911       /* EXP tmp1, tmp1 */
6912       emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src);
6913 
6914       /* EQ tmp2, 0, src.w */
6915       emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, &src_wwww);
6916       /* MOVC tmp1.z, tmp2, tmp1, 1.0 */
6917       emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst,
6918                            &tmp2_src, &one, &tmp1_src);
6919 
6920       /* LT tmp2, 0, src.x */
6921       emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, &src_xxxx);
6922       /* MOVC dst.z, tmp2, tmp1, 0.0 */
6923       emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z,
6924                            &tmp2_src, &tmp1_src, &zero);
6925    }
6926 
6927    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src);
6928    free_temp_indexes(emit);
6929 
6930    return TRUE;
6931 }
6932 
6933 
6934 /**
6935  * Emit Level Of Detail Query (LODQ) instruction.
6936  */
6937 static boolean
emit_lodq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6938 emit_lodq(struct svga_shader_emitter_v10 *emit,
6939           const struct tgsi_full_instruction *inst)
6940 {
6941    const uint unit = inst->Src[1].Register.Index;
6942 
6943    assert(emit->version >= 41);
6944 
6945    /* LOD dst, coord, resource, sampler */
6946    begin_emit_instruction(emit);
6947    emit_opcode(emit, VGPU10_OPCODE_LOD, FALSE);
6948    emit_dst_register(emit, &inst->Dst[0]);
6949    emit_src_register(emit, &inst->Src[0]); /* coord */
6950    emit_resource_register(emit, unit);
6951    emit_sampler_register(emit, unit);
6952    end_emit_instruction(emit);
6953 
6954    return TRUE;
6955 }
6956 
6957 
6958 /**
6959  * Emit code for TGSI_OPCODE_LOG instruction.
6960  */
6961 static boolean
emit_log(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)6962 emit_log(struct svga_shader_emitter_v10 *emit,
6963          const struct tgsi_full_instruction *inst)
6964 {
6965    /*
6966     * dst.x = floor(lg2(abs(s0.x)))
6967     * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x))))
6968     * dst.z = lg2(abs(s0.x))
6969     * dst.w = 1.0
6970     */
6971 
6972    struct tgsi_full_src_register src_xxxx =
6973       scalar_src(&inst->Src[0], TGSI_SWIZZLE_X);
6974    unsigned tmp = get_temp_index(emit);
6975    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
6976    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
6977    struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx);
6978 
6979    /* only use X component of temp reg */
6980    tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
6981    tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X);
6982 
6983    /* LOG tmp.x, abs(s0.x) */
6984    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) {
6985       emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &abs_src_xxxx);
6986    }
6987 
6988    /* MOV dst.z, tmp.x */
6989    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
6990       struct tgsi_full_dst_register dst_z =
6991          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z);
6992 
6993       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
6994                            &dst_z, &tmp_src, NULL, NULL,
6995                            inst->Instruction.Saturate, FALSE);
6996    }
6997 
6998    /* FLR tmp.x, tmp.x */
6999    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
7000       emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, &tmp_src);
7001    }
7002 
7003    /* MOV dst.x, tmp.x */
7004    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
7005       struct tgsi_full_dst_register dst_x =
7006          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X);
7007 
7008       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7009                            &dst_x, &tmp_src, NULL, NULL,
7010                            inst->Instruction.Saturate, FALSE);
7011    }
7012 
7013    /* EXP tmp.x, tmp.x */
7014    /* DIV dst.y, abs(s0.x), tmp.x */
7015    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
7016       struct tgsi_full_dst_register dst_y =
7017          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y);
7018 
7019       emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src);
7020       emit_instruction_opn(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx,
7021                            &tmp_src, NULL, inst->Instruction.Saturate, FALSE);
7022    }
7023 
7024    /* MOV dst.w, 1.0 */
7025    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
7026       struct tgsi_full_dst_register dst_w =
7027          writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W);
7028       struct tgsi_full_src_register one =
7029          make_immediate_reg_float(emit, 1.0f);
7030 
7031       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one);
7032    }
7033 
7034    free_temp_indexes(emit);
7035 
7036    return TRUE;
7037 }
7038 
7039 
7040 /**
7041  * Emit code for TGSI_OPCODE_LRP instruction.
7042  */
7043 static boolean
emit_lrp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7044 emit_lrp(struct svga_shader_emitter_v10 *emit,
7045          const struct tgsi_full_instruction *inst)
7046 {
7047    /* dst = LRP(s0, s1, s2):
7048     *   dst = s0 * (s1 - s2) + s2
7049     * Translates into:
7050     *   SUB tmp, s1, s2;        tmp = s1 - s2
7051     *   MAD dst, s0, tmp, s2;   dst = s0 * t1 + s2
7052     */
7053    unsigned tmp = get_temp_index(emit);
7054    struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp);
7055    struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp);
7056    struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]);
7057 
7058    /* ADD tmp, s1, -s2 */
7059    emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_tmp,
7060                         &inst->Src[1], &neg_src2, NULL, FALSE,
7061                         inst->Instruction.Precise);
7062 
7063    /* MAD dst, s1, tmp, s3 */
7064    emit_instruction_opn(emit, VGPU10_OPCODE_MAD, &inst->Dst[0],
7065                         &inst->Src[0], &src_tmp, &inst->Src[2],
7066                         inst->Instruction.Saturate,
7067                         inst->Instruction.Precise);
7068 
7069    free_temp_indexes(emit);
7070 
7071    return TRUE;
7072 }
7073 
7074 
7075 /**
7076  * Emit code for TGSI_OPCODE_POW instruction.
7077  */
7078 static boolean
emit_pow(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7079 emit_pow(struct svga_shader_emitter_v10 *emit,
7080          const struct tgsi_full_instruction *inst)
7081 {
7082    /* Note that TGSI_OPCODE_POW computes only one value from src0.x and
7083     * src1.x while VGPU10 computes four values.
7084     *
7085     * dst = POW(src0, src1):
7086     *   dst.xyzw = src0.x ^ src1.x
7087     */
7088    unsigned tmp = get_temp_index(emit);
7089    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7090    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7091    struct tgsi_full_src_register src0_xxxx =
7092       swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7093                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7094    struct tgsi_full_src_register src1_xxxx =
7095       swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
7096                   TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
7097 
7098    /* LOG tmp, s0.xxxx */
7099    emit_instruction_opn(emit, VGPU10_OPCODE_LOG,
7100                         &tmp_dst, &src0_xxxx, NULL, NULL,
7101                         FALSE, inst->Instruction.Precise);
7102 
7103    /* MUL tmp, tmp, s1.xxxx */
7104    emit_instruction_opn(emit, VGPU10_OPCODE_MUL,
7105                         &tmp_dst, &tmp_src, &src1_xxxx, NULL,
7106                         FALSE, inst->Instruction.Precise);
7107 
7108    /* EXP tmp, s0.xxxx */
7109    emit_instruction_opn(emit, VGPU10_OPCODE_EXP,
7110                         &inst->Dst[0], &tmp_src, NULL, NULL,
7111                         inst->Instruction.Saturate,
7112                         inst->Instruction.Precise);
7113 
7114    /* free tmp */
7115    free_temp_indexes(emit);
7116 
7117    return TRUE;
7118 }
7119 
7120 
7121 /**
7122  * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction.
7123  */
7124 static boolean
emit_rcp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7125 emit_rcp(struct svga_shader_emitter_v10 *emit,
7126          const struct tgsi_full_instruction *inst)
7127 {
7128    if (emit->version >= 50) {
7129       /* use new RCP instruction.  But VGPU10_OPCODE_RCP is component-wise
7130        * while TGSI_OPCODE_RCP computes dst.xyzw = 1.0 / src.xxxx so we need
7131        * to manipulate the src register's swizzle.
7132        */
7133       struct tgsi_full_src_register src = inst->Src[0];
7134       src.Register.SwizzleY =
7135       src.Register.SwizzleZ =
7136       src.Register.SwizzleW = src.Register.SwizzleX;
7137 
7138       begin_emit_instruction(emit);
7139       emit_opcode_precise(emit, VGPU10_OPCODE_RCP,
7140                           inst->Instruction.Saturate,
7141                           inst->Instruction.Precise);
7142       emit_dst_register(emit, &inst->Dst[0]);
7143       emit_src_register(emit, &src);
7144       end_emit_instruction(emit);
7145    }
7146    else {
7147       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7148 
7149       unsigned tmp = get_temp_index(emit);
7150       struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7151       struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7152 
7153       struct tgsi_full_dst_register tmp_dst_x =
7154          writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7155       struct tgsi_full_src_register tmp_src_xxxx =
7156          scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7157 
7158       /* DIV tmp.x, 1.0, s0 */
7159       emit_instruction_opn(emit, VGPU10_OPCODE_DIV,
7160                            &tmp_dst_x, &one, &inst->Src[0], NULL,
7161                            FALSE, inst->Instruction.Precise);
7162 
7163       /* MOV dst, tmp.xxxx */
7164       emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7165                            &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7166                            inst->Instruction.Saturate,
7167                            inst->Instruction.Precise);
7168 
7169       free_temp_indexes(emit);
7170    }
7171 
7172    return TRUE;
7173 }
7174 
7175 
7176 /**
7177  * Emit code for TGSI_OPCODE_RSQ instruction.
7178  */
7179 static boolean
emit_rsq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7180 emit_rsq(struct svga_shader_emitter_v10 *emit,
7181          const struct tgsi_full_instruction *inst)
7182 {
7183    /* dst = RSQ(src):
7184     *   dst.xyzw = 1 / sqrt(src.x)
7185     * Translates into:
7186     *   RSQ tmp, src.x
7187     *   MOV dst, tmp.xxxx
7188     */
7189 
7190    unsigned tmp = get_temp_index(emit);
7191    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7192    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7193 
7194    struct tgsi_full_dst_register tmp_dst_x =
7195       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7196    struct tgsi_full_src_register tmp_src_xxxx =
7197       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7198 
7199    /* RSQ tmp, src.x */
7200    emit_instruction_opn(emit, VGPU10_OPCODE_RSQ,
7201                         &tmp_dst_x, &inst->Src[0], NULL, NULL,
7202                         FALSE, inst->Instruction.Precise);
7203 
7204    /* MOV dst, tmp.xxxx */
7205    emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7206                         &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7207                         inst->Instruction.Saturate,
7208                         inst->Instruction.Precise);
7209 
7210    /* free tmp */
7211    free_temp_indexes(emit);
7212 
7213    return TRUE;
7214 }
7215 
7216 
7217 /**
7218  * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction.
7219  */
7220 static boolean
emit_seq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7221 emit_seq(struct svga_shader_emitter_v10 *emit,
7222          const struct tgsi_full_instruction *inst)
7223 {
7224    /* dst = SEQ(s0, s1):
7225     *   dst = s0 == s1 ? 1.0 : 0.0  (per component)
7226     * Translates into:
7227     *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
7228     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7229     */
7230    unsigned tmp = get_temp_index(emit);
7231    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7232    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7233    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7234    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7235 
7236    /* EQ tmp, s0, s1 */
7237    emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0],
7238                         &inst->Src[1]);
7239 
7240    /* MOVC dst, tmp, one, zero */
7241    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7242                         &one, &zero);
7243 
7244    free_temp_indexes(emit);
7245 
7246    return TRUE;
7247 }
7248 
7249 
7250 /**
7251  * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction.
7252  */
7253 static boolean
emit_sge(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7254 emit_sge(struct svga_shader_emitter_v10 *emit,
7255          const struct tgsi_full_instruction *inst)
7256 {
7257    /* dst = SGE(s0, s1):
7258     *   dst = s0 >= s1 ? 1.0 : 0.0  (per component)
7259     * Translates into:
7260     *   GE tmp, s0, s1;           tmp = s0 >= s1 : 0xffffffff : 0 (per comp)
7261     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7262     */
7263    unsigned tmp = get_temp_index(emit);
7264    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7265    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7266    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7267    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7268 
7269    /* GE tmp, s0, s1 */
7270    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0],
7271                         &inst->Src[1]);
7272 
7273    /* MOVC dst, tmp, one, zero */
7274    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7275                         &one, &zero);
7276 
7277    free_temp_indexes(emit);
7278 
7279    return TRUE;
7280 }
7281 
7282 
7283 /**
7284  * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction.
7285  */
7286 static boolean
emit_sgt(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7287 emit_sgt(struct svga_shader_emitter_v10 *emit,
7288          const struct tgsi_full_instruction *inst)
7289 {
7290    /* dst = SGT(s0, s1):
7291     *   dst = s0 > s1 ? 1.0 : 0.0  (per component)
7292     * Translates into:
7293     *   LT tmp, s1, s0;           tmp = s1 < s0 ? 0xffffffff : 0 (per comp)
7294     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7295     */
7296    unsigned tmp = get_temp_index(emit);
7297    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7298    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7299    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7300    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7301 
7302    /* LT tmp, s1, s0 */
7303    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1],
7304                         &inst->Src[0]);
7305 
7306    /* MOVC dst, tmp, one, zero */
7307    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7308                         &one, &zero);
7309 
7310    free_temp_indexes(emit);
7311 
7312    return TRUE;
7313 }
7314 
7315 
7316 /**
7317  * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions.
7318  */
7319 static boolean
emit_sincos(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7320 emit_sincos(struct svga_shader_emitter_v10 *emit,
7321          const struct tgsi_full_instruction *inst)
7322 {
7323    unsigned tmp = get_temp_index(emit);
7324    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7325    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7326 
7327    struct tgsi_full_src_register tmp_src_xxxx =
7328       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
7329    struct tgsi_full_dst_register tmp_dst_x =
7330       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
7331 
7332    begin_emit_instruction(emit);
7333    emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE);
7334 
7335    if(inst->Instruction.Opcode == TGSI_OPCODE_SIN)
7336    {
7337       emit_dst_register(emit, &tmp_dst_x);  /* first destination register */
7338       emit_null_dst_register(emit);  /* second destination register */
7339    }
7340    else {
7341       emit_null_dst_register(emit);
7342       emit_dst_register(emit, &tmp_dst_x);
7343    }
7344 
7345    emit_src_register(emit, &inst->Src[0]);
7346    end_emit_instruction(emit);
7347 
7348    emit_instruction_opn(emit, VGPU10_OPCODE_MOV,
7349                         &inst->Dst[0], &tmp_src_xxxx, NULL, NULL,
7350                         inst->Instruction.Saturate,
7351                         inst->Instruction.Precise);
7352 
7353    free_temp_indexes(emit);
7354 
7355    return TRUE;
7356 }
7357 
7358 
7359 /**
7360  * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction.
7361  */
7362 static boolean
emit_sle(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7363 emit_sle(struct svga_shader_emitter_v10 *emit,
7364          const struct tgsi_full_instruction *inst)
7365 {
7366    /* dst = SLE(s0, s1):
7367     *   dst = s0 <= s1 ? 1.0 : 0.0  (per component)
7368     * Translates into:
7369     *   GE tmp, s1, s0;           tmp = s1 >= s0 : 0xffffffff : 0 (per comp)
7370     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7371     */
7372    unsigned tmp = get_temp_index(emit);
7373    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7374    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7375    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7376    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7377 
7378    /* GE tmp, s1, s0 */
7379    emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1],
7380                         &inst->Src[0]);
7381 
7382    /* MOVC dst, tmp, one, zero */
7383    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7384                         &one, &zero);
7385 
7386    free_temp_indexes(emit);
7387 
7388    return TRUE;
7389 }
7390 
7391 
7392 /**
7393  * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction.
7394  */
7395 static boolean
emit_slt(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7396 emit_slt(struct svga_shader_emitter_v10 *emit,
7397          const struct tgsi_full_instruction *inst)
7398 {
7399    /* dst = SLT(s0, s1):
7400     *   dst = s0 < s1 ? 1.0 : 0.0  (per component)
7401     * Translates into:
7402     *   LT tmp, s0, s1;           tmp = s0 < s1 ? 0xffffffff : 0 (per comp)
7403     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7404     */
7405    unsigned tmp = get_temp_index(emit);
7406    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7407    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7408    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7409    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7410 
7411    /* LT tmp, s0, s1 */
7412    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0],
7413                         &inst->Src[1]);
7414 
7415    /* MOVC dst, tmp, one, zero */
7416    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7417                         &one, &zero);
7418 
7419    free_temp_indexes(emit);
7420 
7421    return TRUE;
7422 }
7423 
7424 
7425 /**
7426  * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction.
7427  */
7428 static boolean
emit_sne(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7429 emit_sne(struct svga_shader_emitter_v10 *emit,
7430          const struct tgsi_full_instruction *inst)
7431 {
7432    /* dst = SNE(s0, s1):
7433     *   dst = s0 != s1 ? 1.0 : 0.0  (per component)
7434     * Translates into:
7435     *   EQ tmp, s0, s1;           tmp = s0 == s1 : 0xffffffff : 0 (per comp)
7436     *   MOVC dst, tmp, 1.0, 0.0;  dst = tmp ? 1.0 : 0.0 (per component)
7437     */
7438    unsigned tmp = get_temp_index(emit);
7439    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7440    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7441    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7442    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7443 
7444    /* NE tmp, s0, s1 */
7445    emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0],
7446                         &inst->Src[1]);
7447 
7448    /* MOVC dst, tmp, one, zero */
7449    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src,
7450                         &one, &zero);
7451 
7452    free_temp_indexes(emit);
7453 
7454    return TRUE;
7455 }
7456 
7457 
7458 /**
7459  * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction.
7460  */
7461 static boolean
emit_ssg(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7462 emit_ssg(struct svga_shader_emitter_v10 *emit,
7463          const struct tgsi_full_instruction *inst)
7464 {
7465    /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
7466     * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
7467     * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
7468     * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
7469     * Translates into:
7470     *   LT tmp1, src, zero;           tmp1 = src < zero ? 0xffffffff : 0 (per comp)
7471     *   MOVC tmp2, tmp1, -1.0, 0.0;   tmp2 = tmp1 ? -1.0 : 0.0 (per component)
7472     *   LT tmp1, zero, src;           tmp1 = zero < src ? 0xffffffff : 0 (per comp)
7473     *   MOVC dst, tmp1, 1.0, tmp2;    dst = tmp1 ? 1.0 : tmp2 (per component)
7474     */
7475    struct tgsi_full_src_register zero =
7476       make_immediate_reg_float(emit, 0.0f);
7477    struct tgsi_full_src_register one =
7478       make_immediate_reg_float(emit, 1.0f);
7479    struct tgsi_full_src_register neg_one =
7480       make_immediate_reg_float(emit, -1.0f);
7481 
7482    unsigned tmp1 = get_temp_index(emit);
7483    struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
7484    struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
7485 
7486    unsigned tmp2 = get_temp_index(emit);
7487    struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
7488    struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
7489 
7490    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0],
7491                         &zero);
7492    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src,
7493                         &neg_one, &zero);
7494    emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero,
7495                         &inst->Src[0]);
7496    emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src,
7497                         &one, &tmp2_src);
7498 
7499    free_temp_indexes(emit);
7500 
7501    return TRUE;
7502 }
7503 
7504 
7505 /**
7506  * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction.
7507  */
7508 static boolean
emit_issg(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7509 emit_issg(struct svga_shader_emitter_v10 *emit,
7510           const struct tgsi_full_instruction *inst)
7511 {
7512    /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
7513     * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
7514     * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
7515     * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
7516     * Translates into:
7517     *   ILT tmp1, src, 0              tmp1 = src < 0 ? -1 : 0 (per component)
7518     *   ILT tmp2, 0, src              tmp2 = 0 < src ? -1 : 0 (per component)
7519     *   IADD dst, tmp1, neg(tmp2)     dst  = tmp1 - tmp2      (per component)
7520     */
7521    struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f);
7522 
7523    unsigned tmp1 = get_temp_index(emit);
7524    struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1);
7525    struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1);
7526 
7527    unsigned tmp2 = get_temp_index(emit);
7528    struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2);
7529    struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2);
7530 
7531    struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src);
7532 
7533    emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst,
7534                         &inst->Src[0], &zero);
7535    emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst,
7536                         &zero, &inst->Src[0]);
7537    emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0],
7538                         &tmp1_src, &neg_tmp2);
7539 
7540    free_temp_indexes(emit);
7541 
7542    return TRUE;
7543 }
7544 
7545 
7546 /**
7547  * Emit a comparison instruction.  The dest register will get
7548  * 0 or ~0 values depending on the outcome of comparing src0 to src1.
7549  */
7550 static void
emit_comparison(struct svga_shader_emitter_v10 * emit,SVGA3dCmpFunc func,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src0,const struct tgsi_full_src_register * src1)7551 emit_comparison(struct svga_shader_emitter_v10 *emit,
7552                 SVGA3dCmpFunc func,
7553                 const struct tgsi_full_dst_register *dst,
7554                 const struct tgsi_full_src_register *src0,
7555                 const struct tgsi_full_src_register *src1)
7556 {
7557    struct tgsi_full_src_register immediate;
7558    VGPU10OpcodeToken0 opcode0;
7559    boolean swapSrc = FALSE;
7560 
7561    /* Sanity checks for svga vs. gallium enums */
7562    STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1));
7563    STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1));
7564 
7565    opcode0.value = 0;
7566 
7567    switch (func) {
7568    case SVGA3D_CMP_NEVER:
7569       immediate = make_immediate_reg_int(emit, 0);
7570       /* MOV dst, {0} */
7571       begin_emit_instruction(emit);
7572       emit_dword(emit, VGPU10_OPCODE_MOV);
7573       emit_dst_register(emit, dst);
7574       emit_src_register(emit, &immediate);
7575       end_emit_instruction(emit);
7576       return;
7577    case SVGA3D_CMP_ALWAYS:
7578       immediate = make_immediate_reg_int(emit, -1);
7579       /* MOV dst, {-1} */
7580       begin_emit_instruction(emit);
7581       emit_dword(emit, VGPU10_OPCODE_MOV);
7582       emit_dst_register(emit, dst);
7583       emit_src_register(emit, &immediate);
7584       end_emit_instruction(emit);
7585       return;
7586    case SVGA3D_CMP_LESS:
7587       opcode0.opcodeType = VGPU10_OPCODE_LT;
7588       break;
7589    case SVGA3D_CMP_EQUAL:
7590       opcode0.opcodeType = VGPU10_OPCODE_EQ;
7591       break;
7592    case SVGA3D_CMP_LESSEQUAL:
7593       opcode0.opcodeType = VGPU10_OPCODE_GE;
7594       swapSrc = TRUE;
7595       break;
7596    case SVGA3D_CMP_GREATER:
7597       opcode0.opcodeType = VGPU10_OPCODE_LT;
7598       swapSrc = TRUE;
7599       break;
7600    case SVGA3D_CMP_NOTEQUAL:
7601       opcode0.opcodeType = VGPU10_OPCODE_NE;
7602       break;
7603    case SVGA3D_CMP_GREATEREQUAL:
7604       opcode0.opcodeType = VGPU10_OPCODE_GE;
7605       break;
7606    default:
7607       assert(!"Unexpected comparison mode");
7608       opcode0.opcodeType = VGPU10_OPCODE_EQ;
7609    }
7610 
7611    begin_emit_instruction(emit);
7612    emit_dword(emit, opcode0.value);
7613    emit_dst_register(emit, dst);
7614    if (swapSrc) {
7615       emit_src_register(emit, src1);
7616       emit_src_register(emit, src0);
7617    }
7618    else {
7619       emit_src_register(emit, src0);
7620       emit_src_register(emit, src1);
7621    }
7622    end_emit_instruction(emit);
7623 }
7624 
7625 
7626 /**
7627  * Get texel/address offsets for a texture instruction.
7628  */
7629 static void
get_texel_offsets(const struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,int offsets[3])7630 get_texel_offsets(const struct svga_shader_emitter_v10 *emit,
7631                   const struct tgsi_full_instruction *inst, int offsets[3])
7632 {
7633    if (inst->Texture.NumOffsets == 1) {
7634       /* According to OpenGL Shader Language spec the offsets are only
7635        * fetched from a previously-declared immediate/literal.
7636        */
7637       const struct tgsi_texture_offset *off = inst->TexOffsets;
7638       const unsigned index = off[0].Index;
7639       const unsigned swizzleX = off[0].SwizzleX;
7640       const unsigned swizzleY = off[0].SwizzleY;
7641       const unsigned swizzleZ = off[0].SwizzleZ;
7642       const union tgsi_immediate_data *imm = emit->immediates[index];
7643 
7644       assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE);
7645 
7646       offsets[0] = imm[swizzleX].Int;
7647       offsets[1] = imm[swizzleY].Int;
7648       offsets[2] = imm[swizzleZ].Int;
7649    }
7650    else {
7651       offsets[0] = offsets[1] = offsets[2] = 0;
7652    }
7653 }
7654 
7655 
7656 /**
7657  * Set up the coordinate register for texture sampling.
7658  * When we're sampling from a RECT texture we have to scale the
7659  * unnormalized coordinate to a normalized coordinate.
7660  * We do that by multiplying the coordinate by an "extra" constant.
7661  * An alternative would be to use the RESINFO instruction to query the
7662  * texture's size.
7663  */
7664 static struct tgsi_full_src_register
setup_texcoord(struct svga_shader_emitter_v10 * emit,unsigned unit,const struct tgsi_full_src_register * coord)7665 setup_texcoord(struct svga_shader_emitter_v10 *emit,
7666                unsigned unit,
7667                const struct tgsi_full_src_register *coord)
7668 {
7669    if (emit->sampler_view[unit] && emit->key.tex[unit].unnormalized) {
7670       unsigned scale_index = emit->texcoord_scale_index[unit];
7671       unsigned tmp = get_temp_index(emit);
7672       struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
7673       struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
7674       struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index);
7675 
7676       if (emit->key.tex[unit].texel_bias) {
7677          /* to fix texture coordinate rounding issue, 0.0001 offset is
7678           * been added. This fixes piglit test fbo-blit-scaled-linear. */
7679          struct tgsi_full_src_register offset =
7680             make_immediate_reg_float(emit, 0.0001f);
7681 
7682          /* ADD tmp, coord, offset */
7683          emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_dst,
7684                               coord, &offset);
7685          /* MUL tmp, tmp, scale */
7686          emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
7687                               &tmp_src, &scale_src);
7688       }
7689       else {
7690          /* MUL tmp, coord, const[] */
7691          emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst,
7692                               coord, &scale_src);
7693       }
7694       return tmp_src;
7695    }
7696    else {
7697       /* use texcoord as-is */
7698       return *coord;
7699    }
7700 }
7701 
7702 
7703 /**
7704  * For SAMPLE_C instructions, emit the extra src register which indicates
7705  * the reference/comparision value.
7706  */
7707 static void
emit_tex_compare_refcoord(struct svga_shader_emitter_v10 * emit,enum tgsi_texture_type target,const struct tgsi_full_src_register * coord)7708 emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit,
7709                           enum tgsi_texture_type target,
7710                           const struct tgsi_full_src_register *coord)
7711 {
7712    struct tgsi_full_src_register coord_src_ref;
7713    int component;
7714 
7715    assert(tgsi_is_shadow_target(target));
7716 
7717    component = tgsi_util_get_shadow_ref_src_index(target) % 4;
7718    assert(component >= 0);
7719 
7720    coord_src_ref = scalar_src(coord, component);
7721 
7722    emit_src_register(emit, &coord_src_ref);
7723 }
7724 
7725 
7726 /**
7727  * Info for implementing texture swizzles.
7728  * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle()
7729  * functions use this to encapsulate the extra steps needed to perform
7730  * a texture swizzle, or shadow/depth comparisons.
7731  * The shadow/depth comparison is only done here if for the cases where
7732  * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare).
7733  */
7734 struct tex_swizzle_info
7735 {
7736    boolean swizzled;
7737    boolean shadow_compare;
7738    unsigned unit;
7739    enum tgsi_texture_type texture_target;  /**< TGSI_TEXTURE_x */
7740    struct tgsi_full_src_register tmp_src;
7741    struct tgsi_full_dst_register tmp_dst;
7742    const struct tgsi_full_dst_register *inst_dst;
7743    const struct tgsi_full_src_register *coord_src;
7744 };
7745 
7746 
7747 /**
7748  * Do setup for handling texture swizzles or shadow compares.
7749  * \param unit  the texture unit
7750  * \param inst  the TGSI texture instruction
7751  * \param shadow_compare  do shadow/depth comparison?
7752  * \param swz  returns the swizzle info
7753  */
7754 static void
begin_tex_swizzle(struct svga_shader_emitter_v10 * emit,unsigned unit,const struct tgsi_full_instruction * inst,boolean shadow_compare,struct tex_swizzle_info * swz)7755 begin_tex_swizzle(struct svga_shader_emitter_v10 *emit,
7756                   unsigned unit,
7757                   const struct tgsi_full_instruction *inst,
7758                   boolean shadow_compare,
7759                   struct tex_swizzle_info *swz)
7760 {
7761    swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X ||
7762                     emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y ||
7763                     emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z ||
7764                     emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W);
7765 
7766    swz->shadow_compare = shadow_compare;
7767    swz->texture_target = inst->Texture.Texture;
7768 
7769    if (swz->swizzled || shadow_compare) {
7770       /* Allocate temp register for the result of the SAMPLE instruction
7771        * and the source of the MOV/compare/swizzle instructions.
7772        */
7773       unsigned tmp = get_temp_index(emit);
7774       swz->tmp_src = make_src_temp_reg(tmp);
7775       swz->tmp_dst = make_dst_temp_reg(tmp);
7776 
7777       swz->unit = unit;
7778    }
7779    swz->inst_dst = &inst->Dst[0];
7780    swz->coord_src = &inst->Src[0];
7781 
7782    emit->fs.shadow_compare_units |= shadow_compare << unit;
7783 }
7784 
7785 
7786 /**
7787  * Returns the register to put the SAMPLE instruction results into.
7788  * This will either be the original instruction dst reg (if no swizzle
7789  * and no shadow comparison) or a temporary reg if there is a swizzle.
7790  */
7791 static const struct tgsi_full_dst_register *
get_tex_swizzle_dst(const struct tex_swizzle_info * swz)7792 get_tex_swizzle_dst(const struct tex_swizzle_info *swz)
7793 {
7794    return (swz->swizzled || swz->shadow_compare)
7795       ? &swz->tmp_dst : swz->inst_dst;
7796 }
7797 
7798 
7799 /**
7800  * This emits the MOV instruction that actually implements a texture swizzle
7801  * and/or shadow comparison.
7802  */
7803 static void
end_tex_swizzle(struct svga_shader_emitter_v10 * emit,const struct tex_swizzle_info * swz)7804 end_tex_swizzle(struct svga_shader_emitter_v10 *emit,
7805                 const struct tex_swizzle_info *swz)
7806 {
7807    if (swz->shadow_compare) {
7808       /* Emit extra instructions to compare the fetched texel value against
7809        * a texture coordinate component.  The result of the comparison
7810        * is 0.0 or 1.0.
7811        */
7812       struct tgsi_full_src_register coord_src;
7813       struct tgsi_full_src_register texel_src =
7814          scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X);
7815       struct tgsi_full_src_register one =
7816          make_immediate_reg_float(emit, 1.0f);
7817       /* convert gallium comparison func to SVGA comparison func */
7818       SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1;
7819 
7820       int component =
7821          tgsi_util_get_shadow_ref_src_index(swz->texture_target) % 4;
7822       assert(component >= 0);
7823       coord_src = scalar_src(swz->coord_src, component);
7824 
7825       /* COMPARE tmp, coord, texel */
7826       emit_comparison(emit, compare_func,
7827                       &swz->tmp_dst, &coord_src, &texel_src);
7828 
7829       /* AND dest, tmp, {1.0} */
7830       begin_emit_instruction(emit);
7831       emit_opcode(emit, VGPU10_OPCODE_AND, FALSE);
7832       if (swz->swizzled) {
7833          emit_dst_register(emit, &swz->tmp_dst);
7834       }
7835       else {
7836          emit_dst_register(emit, swz->inst_dst);
7837       }
7838       emit_src_register(emit, &swz->tmp_src);
7839       emit_src_register(emit, &one);
7840       end_emit_instruction(emit);
7841    }
7842 
7843    if (swz->swizzled) {
7844       unsigned swz_r = emit->key.tex[swz->unit].swizzle_r;
7845       unsigned swz_g = emit->key.tex[swz->unit].swizzle_g;
7846       unsigned swz_b = emit->key.tex[swz->unit].swizzle_b;
7847       unsigned swz_a = emit->key.tex[swz->unit].swizzle_a;
7848       unsigned writemask_0 = 0, writemask_1 = 0;
7849       boolean int_tex = is_integer_type(emit->sampler_return_type[swz->unit]);
7850 
7851       /* Swizzle w/out zero/one terms */
7852       struct tgsi_full_src_register src_swizzled =
7853          swizzle_src(&swz->tmp_src,
7854                      swz_r < PIPE_SWIZZLE_0 ? swz_r : PIPE_SWIZZLE_X,
7855                      swz_g < PIPE_SWIZZLE_0 ? swz_g : PIPE_SWIZZLE_Y,
7856                      swz_b < PIPE_SWIZZLE_0 ? swz_b : PIPE_SWIZZLE_Z,
7857                      swz_a < PIPE_SWIZZLE_0 ? swz_a : PIPE_SWIZZLE_W);
7858 
7859       /* MOV dst, color(tmp).<swizzle> */
7860       emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
7861                            swz->inst_dst, &src_swizzled);
7862 
7863       /* handle swizzle zero terms */
7864       writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) |
7865                      ((swz_g == PIPE_SWIZZLE_0) << 1) |
7866                      ((swz_b == PIPE_SWIZZLE_0) << 2) |
7867                      ((swz_a == PIPE_SWIZZLE_0) << 3));
7868       writemask_0 &= swz->inst_dst->Register.WriteMask;
7869 
7870       if (writemask_0) {
7871          struct tgsi_full_src_register zero = int_tex ?
7872             make_immediate_reg_int(emit, 0) :
7873             make_immediate_reg_float(emit, 0.0f);
7874          struct tgsi_full_dst_register dst =
7875             writemask_dst(swz->inst_dst, writemask_0);
7876 
7877          /* MOV dst.writemask_0, {0,0,0,0} */
7878          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &zero);
7879       }
7880 
7881       /* handle swizzle one terms */
7882       writemask_1 = (((swz_r == PIPE_SWIZZLE_1) << 0) |
7883                      ((swz_g == PIPE_SWIZZLE_1) << 1) |
7884                      ((swz_b == PIPE_SWIZZLE_1) << 2) |
7885                      ((swz_a == PIPE_SWIZZLE_1) << 3));
7886       writemask_1 &= swz->inst_dst->Register.WriteMask;
7887 
7888       if (writemask_1) {
7889          struct tgsi_full_src_register one = int_tex ?
7890             make_immediate_reg_int(emit, 1) :
7891             make_immediate_reg_float(emit, 1.0f);
7892          struct tgsi_full_dst_register dst =
7893             writemask_dst(swz->inst_dst, writemask_1);
7894 
7895          /* MOV dst.writemask_1, {1,1,1,1} */
7896          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one);
7897       }
7898    }
7899 }
7900 
7901 
7902 /**
7903  * Emit code for TGSI_OPCODE_SAMPLE instruction.
7904  */
7905 static boolean
emit_sample(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7906 emit_sample(struct svga_shader_emitter_v10 *emit,
7907             const struct tgsi_full_instruction *inst)
7908 {
7909    const unsigned resource_unit = inst->Src[1].Register.Index;
7910    const unsigned sampler_unit = inst->Src[2].Register.Index;
7911    struct tgsi_full_src_register coord;
7912    int offsets[3];
7913    struct tex_swizzle_info swz_info;
7914 
7915    begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info);
7916 
7917    get_texel_offsets(emit, inst, offsets);
7918 
7919    coord = setup_texcoord(emit, resource_unit, &inst->Src[0]);
7920 
7921    /* SAMPLE dst, coord(s0), resource, sampler */
7922    begin_emit_instruction(emit);
7923 
7924    /* NOTE: for non-fragment shaders, we should use VGPU10_OPCODE_SAMPLE_L
7925     * with LOD=0.  But our virtual GPU accepts this as-is.
7926     */
7927    emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE,
7928                       inst->Instruction.Saturate, offsets);
7929    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
7930    emit_src_register(emit, &coord);
7931    emit_resource_register(emit, resource_unit);
7932    emit_sampler_register(emit, sampler_unit);
7933    end_emit_instruction(emit);
7934 
7935    end_tex_swizzle(emit, &swz_info);
7936 
7937    free_temp_indexes(emit);
7938 
7939    return TRUE;
7940 }
7941 
7942 
7943 /**
7944  * Check if a texture instruction is valid.
7945  * An example of an invalid texture instruction is doing shadow comparison
7946  * with an integer-valued texture.
7947  * If we detect an invalid texture instruction, we replace it with:
7948  *   MOV dst, {1,1,1,1};
7949  * \return TRUE if valid, FALSE if invalid.
7950  */
7951 static boolean
is_valid_tex_instruction(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7952 is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit,
7953                          const struct tgsi_full_instruction *inst)
7954 {
7955    const unsigned unit = inst->Src[1].Register.Index;
7956    const enum tgsi_texture_type target = inst->Texture.Texture;
7957    boolean valid = TRUE;
7958 
7959    if (tgsi_is_shadow_target(target) &&
7960        is_integer_type(emit->sampler_return_type[unit])) {
7961       debug_printf("Invalid SAMPLE_C with an integer texture!\n");
7962       valid = FALSE;
7963    }
7964    /* XXX might check for other conditions in the future here */
7965 
7966    if (!valid) {
7967       /* emit a MOV dst, {1,1,1,1} instruction. */
7968       struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
7969       begin_emit_instruction(emit);
7970       emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
7971       emit_dst_register(emit, &inst->Dst[0]);
7972       emit_src_register(emit, &one);
7973       end_emit_instruction(emit);
7974    }
7975 
7976    return valid;
7977 }
7978 
7979 
7980 /**
7981  * Emit code for TGSI_OPCODE_TEX (simple texture lookup)
7982  */
7983 static boolean
emit_tex(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)7984 emit_tex(struct svga_shader_emitter_v10 *emit,
7985          const struct tgsi_full_instruction *inst)
7986 {
7987    const uint unit = inst->Src[1].Register.Index;
7988    const enum tgsi_texture_type target = inst->Texture.Texture;
7989    VGPU10_OPCODE_TYPE opcode;
7990    struct tgsi_full_src_register coord;
7991    int offsets[3];
7992    struct tex_swizzle_info swz_info;
7993 
7994    /* check that the sampler returns a float */
7995    if (!is_valid_tex_instruction(emit, inst))
7996       return TRUE;
7997 
7998    begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
7999 
8000    get_texel_offsets(emit, inst, offsets);
8001 
8002    coord = setup_texcoord(emit, unit, &inst->Src[0]);
8003 
8004    /* SAMPLE dst, coord(s0), resource, sampler */
8005    begin_emit_instruction(emit);
8006 
8007    if (tgsi_is_shadow_target(target))
8008       opcode = VGPU10_OPCODE_SAMPLE_C;
8009    else
8010       opcode = VGPU10_OPCODE_SAMPLE;
8011 
8012    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8013    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8014    emit_src_register(emit, &coord);
8015    emit_resource_register(emit, unit);
8016    emit_sampler_register(emit, unit);
8017    if (opcode == VGPU10_OPCODE_SAMPLE_C) {
8018       emit_tex_compare_refcoord(emit, target, &coord);
8019    }
8020    end_emit_instruction(emit);
8021 
8022    end_tex_swizzle(emit, &swz_info);
8023 
8024    free_temp_indexes(emit);
8025 
8026    return TRUE;
8027 }
8028 
8029 /**
8030  * Emit code for TGSI_OPCODE_TG4 (texture lookup for texture gather)
8031  */
8032 static boolean
emit_tg4(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8033 emit_tg4(struct svga_shader_emitter_v10 *emit,
8034          const struct tgsi_full_instruction *inst)
8035 {
8036    const uint unit = inst->Src[2].Register.Index;
8037    struct tgsi_full_src_register src;
8038    struct tgsi_full_src_register offset_src, sampler, ref;
8039    int offsets[3];
8040 
8041    /* check that the sampler returns a float */
8042    if (!is_valid_tex_instruction(emit, inst))
8043       return TRUE;
8044 
8045    if (emit->version >= 50) {
8046       unsigned target = inst->Texture.Texture;
8047       int index = inst->Src[1].Register.Index;
8048       const union tgsi_immediate_data *imm = emit->immediates[index];
8049       int select_comp  = imm[inst->Src[1].Register.SwizzleX].Int;
8050       unsigned select_swizzle = PIPE_SWIZZLE_X;
8051 
8052       if (!tgsi_is_shadow_target(target)) {
8053          switch (select_comp) {
8054          case 0:
8055             select_swizzle = emit->key.tex[unit].swizzle_r;
8056             break;
8057          case 1:
8058             select_swizzle = emit->key.tex[unit].swizzle_g;
8059             break;
8060          case 2:
8061             select_swizzle = emit->key.tex[unit].swizzle_b;
8062             break;
8063          case 3:
8064             select_swizzle = emit->key.tex[unit].swizzle_a;
8065             break;
8066          default:
8067             assert(!"Unexpected component in texture gather swizzle");
8068          }
8069       }
8070       else {
8071          select_swizzle = emit->key.tex[unit].swizzle_r;
8072       }
8073 
8074       if (select_swizzle == PIPE_SWIZZLE_1) {
8075          src = make_immediate_reg_float(emit, 1.0);
8076          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8077          return TRUE;
8078       }
8079       else if (select_swizzle == PIPE_SWIZZLE_0) {
8080          src = make_immediate_reg_float(emit, 0.0);
8081          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8082          return TRUE;
8083       }
8084 
8085       src = setup_texcoord(emit, unit, &inst->Src[0]);
8086 
8087       /* GATHER4 dst, coord, resource, sampler */
8088       /* GATHER4_C dst, coord, resource, sampler ref */
8089       /* GATHER4_PO dst, coord, offset resource, sampler */
8090       /* GATHER4_PO_C dst, coord, offset resource, sampler, ref */
8091       begin_emit_instruction(emit);
8092       if (inst->Texture.NumOffsets == 1) {
8093          if (tgsi_is_shadow_target(target)) {
8094             emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO_C,
8095                         inst->Instruction.Saturate);
8096          }
8097          else {
8098             emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO,
8099                         inst->Instruction.Saturate);
8100          }
8101       }
8102       else {
8103          if (tgsi_is_shadow_target(target)) {
8104             emit_opcode(emit, VGPU10_OPCODE_GATHER4_C,
8105                         inst->Instruction.Saturate);
8106          }
8107          else {
8108             emit_opcode(emit, VGPU10_OPCODE_GATHER4,
8109                         inst->Instruction.Saturate);
8110          }
8111       }
8112 
8113       emit_dst_register(emit, &inst->Dst[0]);
8114       emit_src_register(emit, &src);
8115       if (inst->Texture.NumOffsets == 1) {
8116          /* offset */
8117          offset_src = make_src_reg(inst->TexOffsets[0].File,
8118                                    inst->TexOffsets[0].Index);
8119          offset_src = swizzle_src(&offset_src, inst->TexOffsets[0].SwizzleX,
8120                                   inst->TexOffsets[0].SwizzleY,
8121                                   inst->TexOffsets[0].SwizzleZ,
8122                                   TGSI_SWIZZLE_W);
8123          emit_src_register(emit, &offset_src);
8124       }
8125 
8126       /* resource */
8127       emit_resource_register(emit, unit);
8128 
8129       /* sampler */
8130       sampler = make_src_reg(TGSI_FILE_SAMPLER, unit);
8131       sampler.Register.SwizzleX =
8132       sampler.Register.SwizzleY =
8133       sampler.Register.SwizzleZ =
8134       sampler.Register.SwizzleW = select_swizzle;
8135       emit_src_register(emit, &sampler);
8136 
8137       if (tgsi_is_shadow_target(target)) {
8138          /* ref */
8139          if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
8140             ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8141             emit_tex_compare_refcoord(emit, target, &ref);
8142          }
8143          else {
8144             emit_tex_compare_refcoord(emit, target, &src);
8145          }
8146       }
8147 
8148       end_emit_instruction(emit);
8149       free_temp_indexes(emit);
8150    }
8151    else {
8152       /* Only a single channel is supported in SM4_1 and we report
8153        * PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 1.
8154        * Only the 0th component will be gathered.
8155        */
8156       switch (emit->key.tex[unit].swizzle_r) {
8157       case PIPE_SWIZZLE_X:
8158          get_texel_offsets(emit, inst, offsets);
8159          src = setup_texcoord(emit, unit, &inst->Src[0]);
8160 
8161          /* Gather dst, coord, resource, sampler */
8162          begin_emit_instruction(emit);
8163          emit_sample_opcode(emit, VGPU10_OPCODE_GATHER4,
8164                             inst->Instruction.Saturate, offsets);
8165          emit_dst_register(emit, &inst->Dst[0]);
8166          emit_src_register(emit, &src);
8167          emit_resource_register(emit, unit);
8168 
8169          /* sampler */
8170          sampler = make_src_reg(TGSI_FILE_SAMPLER, unit);
8171          sampler.Register.SwizzleX =
8172          sampler.Register.SwizzleY =
8173          sampler.Register.SwizzleZ =
8174          sampler.Register.SwizzleW = PIPE_SWIZZLE_X;
8175          emit_src_register(emit, &sampler);
8176 
8177          end_emit_instruction(emit);
8178          break;
8179       case PIPE_SWIZZLE_W:
8180       case PIPE_SWIZZLE_1:
8181          src = make_immediate_reg_float(emit, 1.0);
8182          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8183          break;
8184       case PIPE_SWIZZLE_Y:
8185       case PIPE_SWIZZLE_Z:
8186       case PIPE_SWIZZLE_0:
8187       default:
8188          src = make_immediate_reg_float(emit, 0.0);
8189          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src);
8190          break;
8191       }
8192    }
8193 
8194    return TRUE;
8195 }
8196 
8197 
8198 
8199 /**
8200  * Emit code for TGSI_OPCODE_TEX2 (texture lookup for shadow cube map arrays)
8201  */
8202 static boolean
emit_tex2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8203 emit_tex2(struct svga_shader_emitter_v10 *emit,
8204          const struct tgsi_full_instruction *inst)
8205 {
8206    const uint unit = inst->Src[2].Register.Index;
8207    unsigned target = inst->Texture.Texture;
8208    struct tgsi_full_src_register coord, ref;
8209    int offsets[3];
8210    struct tex_swizzle_info swz_info;
8211 
8212    /* check that the sampler returns a float */
8213    if (!is_valid_tex_instruction(emit, inst))
8214       return TRUE;
8215 
8216    begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
8217 
8218    get_texel_offsets(emit, inst, offsets);
8219 
8220    coord = setup_texcoord(emit, unit, &inst->Src[0]);
8221    ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8222 
8223    /* SAMPLE_C dst, coord, resource, sampler, ref */
8224    begin_emit_instruction(emit);
8225    emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_C,
8226                       inst->Instruction.Saturate, offsets);
8227    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8228    emit_src_register(emit, &coord);
8229    emit_resource_register(emit, unit);
8230    emit_sampler_register(emit, unit);
8231    emit_tex_compare_refcoord(emit, target, &ref);
8232    end_emit_instruction(emit);
8233 
8234    end_tex_swizzle(emit, &swz_info);
8235 
8236    free_temp_indexes(emit);
8237 
8238    return TRUE;
8239 }
8240 
8241 
8242 /**
8243  * Emit code for TGSI_OPCODE_TXP (projective texture)
8244  */
8245 static boolean
emit_txp(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8246 emit_txp(struct svga_shader_emitter_v10 *emit,
8247          const struct tgsi_full_instruction *inst)
8248 {
8249    const uint unit = inst->Src[1].Register.Index;
8250    const enum tgsi_texture_type target = inst->Texture.Texture;
8251    VGPU10_OPCODE_TYPE opcode;
8252    int offsets[3];
8253    unsigned tmp = get_temp_index(emit);
8254    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8255    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8256    struct tgsi_full_src_register src0_wwww =
8257       scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
8258    struct tgsi_full_src_register coord;
8259    struct tex_swizzle_info swz_info;
8260 
8261    /* check that the sampler returns a float */
8262    if (!is_valid_tex_instruction(emit, inst))
8263       return TRUE;
8264 
8265    begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
8266 
8267    get_texel_offsets(emit, inst, offsets);
8268 
8269    coord = setup_texcoord(emit, unit, &inst->Src[0]);
8270 
8271    /* DIV tmp, coord, coord.wwww */
8272    emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst,
8273                         &coord, &src0_wwww);
8274 
8275    /* SAMPLE dst, coord(tmp), resource, sampler */
8276    begin_emit_instruction(emit);
8277 
8278    if (tgsi_is_shadow_target(target))
8279       /* NOTE: for non-fragment shaders, we should use
8280        * VGPU10_OPCODE_SAMPLE_C_LZ, but our virtual GPU accepts this as-is.
8281        */
8282       opcode = VGPU10_OPCODE_SAMPLE_C;
8283    else
8284       opcode = VGPU10_OPCODE_SAMPLE;
8285 
8286    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8287    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8288    emit_src_register(emit, &tmp_src);  /* projected coord */
8289    emit_resource_register(emit, unit);
8290    emit_sampler_register(emit, unit);
8291    if (opcode == VGPU10_OPCODE_SAMPLE_C) {
8292       emit_tex_compare_refcoord(emit, target, &tmp_src);
8293    }
8294    end_emit_instruction(emit);
8295 
8296    end_tex_swizzle(emit, &swz_info);
8297 
8298    free_temp_indexes(emit);
8299 
8300    return TRUE;
8301 }
8302 
8303 
8304 /**
8305  * Emit code for TGSI_OPCODE_TXD (explicit derivatives)
8306  */
8307 static boolean
emit_txd(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8308 emit_txd(struct svga_shader_emitter_v10 *emit,
8309          const struct tgsi_full_instruction *inst)
8310 {
8311    const uint unit = inst->Src[3].Register.Index;
8312    const enum tgsi_texture_type target = inst->Texture.Texture;
8313    int offsets[3];
8314    struct tgsi_full_src_register coord;
8315    struct tex_swizzle_info swz_info;
8316 
8317    begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
8318                      &swz_info);
8319 
8320    get_texel_offsets(emit, inst, offsets);
8321 
8322    coord = setup_texcoord(emit, unit, &inst->Src[0]);
8323 
8324    /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */
8325    begin_emit_instruction(emit);
8326    emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D,
8327                       inst->Instruction.Saturate, offsets);
8328    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8329    emit_src_register(emit, &coord);
8330    emit_resource_register(emit, unit);
8331    emit_sampler_register(emit, unit);
8332    emit_src_register(emit, &inst->Src[1]);  /* Xderiv */
8333    emit_src_register(emit, &inst->Src[2]);  /* Yderiv */
8334    end_emit_instruction(emit);
8335 
8336    end_tex_swizzle(emit, &swz_info);
8337 
8338    free_temp_indexes(emit);
8339 
8340    return TRUE;
8341 }
8342 
8343 
8344 /**
8345  * Emit code for TGSI_OPCODE_TXF (texel fetch)
8346  */
8347 static boolean
emit_txf(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8348 emit_txf(struct svga_shader_emitter_v10 *emit,
8349          const struct tgsi_full_instruction *inst)
8350 {
8351    const uint unit = inst->Src[1].Register.Index;
8352    const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture)
8353       && emit->key.tex[unit].num_samples > 1;
8354    int offsets[3];
8355    struct tex_swizzle_info swz_info;
8356 
8357    begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info);
8358 
8359    get_texel_offsets(emit, inst, offsets);
8360 
8361    if (msaa) {
8362       assert(emit->key.tex[unit].num_samples > 1);
8363 
8364       /* Fetch one sample from an MSAA texture */
8365       struct tgsi_full_src_register sampleIndex =
8366          scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
8367       /* LD_MS dst, coord(s0), resource, sampleIndex */
8368       begin_emit_instruction(emit);
8369       emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS,
8370                          inst->Instruction.Saturate, offsets);
8371       emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8372       emit_src_register(emit, &inst->Src[0]);
8373       emit_resource_register(emit, unit);
8374       emit_src_register(emit, &sampleIndex);
8375       end_emit_instruction(emit);
8376    }
8377    else {
8378       /* Fetch one texel specified by integer coordinate */
8379       /* LD dst, coord(s0), resource */
8380       begin_emit_instruction(emit);
8381       emit_sample_opcode(emit, VGPU10_OPCODE_LD,
8382                          inst->Instruction.Saturate, offsets);
8383       emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8384       emit_src_register(emit, &inst->Src[0]);
8385       emit_resource_register(emit, unit);
8386       end_emit_instruction(emit);
8387    }
8388 
8389    end_tex_swizzle(emit, &swz_info);
8390 
8391    free_temp_indexes(emit);
8392 
8393    return TRUE;
8394 }
8395 
8396 
8397 /**
8398  * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias)
8399  * or TGSI_OPCODE_TXB2 (for cube shadow maps).
8400  */
8401 static boolean
emit_txl_txb(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8402 emit_txl_txb(struct svga_shader_emitter_v10 *emit,
8403              const struct tgsi_full_instruction *inst)
8404 {
8405    const enum tgsi_texture_type target = inst->Texture.Texture;
8406    VGPU10_OPCODE_TYPE opcode;
8407    unsigned unit;
8408    int offsets[3];
8409    struct tgsi_full_src_register coord, lod_bias;
8410    struct tex_swizzle_info swz_info;
8411 
8412    assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL ||
8413           inst->Instruction.Opcode == TGSI_OPCODE_TXB ||
8414           inst->Instruction.Opcode == TGSI_OPCODE_TXB2);
8415 
8416    if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) {
8417       lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8418       unit = inst->Src[2].Register.Index;
8419    }
8420    else {
8421       lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W);
8422       unit = inst->Src[1].Register.Index;
8423    }
8424 
8425    begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
8426                      &swz_info);
8427 
8428    get_texel_offsets(emit, inst, offsets);
8429 
8430    coord = setup_texcoord(emit, unit, &inst->Src[0]);
8431 
8432    /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */
8433    begin_emit_instruction(emit);
8434    if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
8435       opcode = VGPU10_OPCODE_SAMPLE_L;
8436    }
8437    else {
8438       opcode = VGPU10_OPCODE_SAMPLE_B;
8439    }
8440    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8441    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8442    emit_src_register(emit, &coord);
8443    emit_resource_register(emit, unit);
8444    emit_sampler_register(emit, unit);
8445    emit_src_register(emit, &lod_bias);
8446    end_emit_instruction(emit);
8447 
8448    end_tex_swizzle(emit, &swz_info);
8449 
8450    free_temp_indexes(emit);
8451 
8452    return TRUE;
8453 }
8454 
8455 
8456 /**
8457  * Emit code for TGSI_OPCODE_TXL2 (explicit LOD) for cubemap array.
8458  */
8459 static boolean
emit_txl2(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8460 emit_txl2(struct svga_shader_emitter_v10 *emit,
8461           const struct tgsi_full_instruction *inst)
8462 {
8463    unsigned target = inst->Texture.Texture;
8464    unsigned opcode, unit;
8465    int offsets[3];
8466    struct tgsi_full_src_register coord, lod;
8467    struct tex_swizzle_info swz_info;
8468 
8469    assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL2);
8470 
8471    lod = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X);
8472    unit = inst->Src[2].Register.Index;
8473 
8474    begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target),
8475                      &swz_info);
8476 
8477    get_texel_offsets(emit, inst, offsets);
8478 
8479    coord = setup_texcoord(emit, unit, &inst->Src[0]);
8480 
8481    /* SAMPLE_L dst, coord(s0), resource, sampler, lod(s3) */
8482    begin_emit_instruction(emit);
8483    opcode = VGPU10_OPCODE_SAMPLE_L;
8484    emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets);
8485    emit_dst_register(emit, get_tex_swizzle_dst(&swz_info));
8486    emit_src_register(emit, &coord);
8487    emit_resource_register(emit, unit);
8488    emit_sampler_register(emit, unit);
8489    emit_src_register(emit, &lod);
8490    end_emit_instruction(emit);
8491 
8492    end_tex_swizzle(emit, &swz_info);
8493 
8494    free_temp_indexes(emit);
8495 
8496    return TRUE;
8497 }
8498 
8499 
8500 /**
8501  * Emit code for TGSI_OPCODE_TXQ (texture query) instruction.
8502  */
8503 static boolean
emit_txq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8504 emit_txq(struct svga_shader_emitter_v10 *emit,
8505          const struct tgsi_full_instruction *inst)
8506 {
8507    const uint unit = inst->Src[1].Register.Index;
8508 
8509    if (emit->sampler_target[unit] == TGSI_TEXTURE_BUFFER) {
8510       /* RESINFO does not support querying texture buffers, so we instead
8511        * store texture buffer sizes in shader constants, then copy them to
8512        * implement TXQ instead of emitting RESINFO.
8513        * MOV dst, const[texture_buffer_size_index[unit]]
8514        */
8515       struct tgsi_full_src_register size_src =
8516          make_src_const_reg(emit->texture_buffer_size_index[unit]);
8517       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src);
8518    } else {
8519       /* RESINFO dst, srcMipLevel, resource */
8520       begin_emit_instruction(emit);
8521       emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT);
8522       emit_dst_register(emit, &inst->Dst[0]);
8523       emit_src_register(emit, &inst->Src[0]);
8524       emit_resource_register(emit, unit);
8525       end_emit_instruction(emit);
8526    }
8527 
8528    free_temp_indexes(emit);
8529 
8530    return TRUE;
8531 }
8532 
8533 
8534 /**
8535  * Does this opcode produce a double-precision result?
8536  * XXX perhaps move this to a TGSI utility.
8537  */
8538 static bool
opcode_has_dbl_dst(unsigned opcode)8539 opcode_has_dbl_dst(unsigned opcode)
8540 {
8541    switch (opcode) {
8542    case TGSI_OPCODE_F2D:
8543    case TGSI_OPCODE_DABS:
8544    case TGSI_OPCODE_DADD:
8545    case TGSI_OPCODE_DFRAC:
8546    case TGSI_OPCODE_DMAX:
8547    case TGSI_OPCODE_DMIN:
8548    case TGSI_OPCODE_DMUL:
8549    case TGSI_OPCODE_DNEG:
8550    case TGSI_OPCODE_I2D:
8551    case TGSI_OPCODE_U2D:
8552       // XXX more TBD
8553       return true;
8554    default:
8555       return false;
8556    }
8557 }
8558 
8559 
8560 /**
8561  * Does this opcode use double-precision source registers?
8562  */
8563 static bool
opcode_has_dbl_src(unsigned opcode)8564 opcode_has_dbl_src(unsigned opcode)
8565 {
8566    switch (opcode) {
8567    case TGSI_OPCODE_D2F:
8568    case TGSI_OPCODE_DABS:
8569    case TGSI_OPCODE_DADD:
8570    case TGSI_OPCODE_DFRAC:
8571    case TGSI_OPCODE_DMAX:
8572    case TGSI_OPCODE_DMIN:
8573    case TGSI_OPCODE_DMUL:
8574    case TGSI_OPCODE_DNEG:
8575    case TGSI_OPCODE_D2I:
8576    case TGSI_OPCODE_D2U:
8577       // XXX more TBD
8578       return true;
8579    default:
8580       return false;
8581    }
8582 }
8583 
8584 
8585 /**
8586  * Check that the swizzle for reading from a double-precision register
8587  * is valid.
8588  */
8589 static void
check_double_src_swizzle(const struct tgsi_full_src_register * reg)8590 check_double_src_swizzle(const struct tgsi_full_src_register *reg)
8591 {
8592    assert((reg->Register.SwizzleX == PIPE_SWIZZLE_X &&
8593            reg->Register.SwizzleY == PIPE_SWIZZLE_Y) ||
8594           (reg->Register.SwizzleX == PIPE_SWIZZLE_Z &&
8595            reg->Register.SwizzleY == PIPE_SWIZZLE_W));
8596 
8597    assert((reg->Register.SwizzleZ == PIPE_SWIZZLE_X &&
8598            reg->Register.SwizzleW == PIPE_SWIZZLE_Y) ||
8599           (reg->Register.SwizzleZ == PIPE_SWIZZLE_Z &&
8600            reg->Register.SwizzleW == PIPE_SWIZZLE_W));
8601 }
8602 
8603 
8604 /**
8605  * Check that the writemask for a double-precision instruction is valid.
8606  */
8607 static void
check_double_dst_writemask(const struct tgsi_full_instruction * inst)8608 check_double_dst_writemask(const struct tgsi_full_instruction *inst)
8609 {
8610    ASSERTED unsigned writemask = inst->Dst[0].Register.WriteMask;
8611 
8612    switch (inst->Instruction.Opcode) {
8613    case TGSI_OPCODE_DABS:
8614    case TGSI_OPCODE_DADD:
8615    case TGSI_OPCODE_DFRAC:
8616    case TGSI_OPCODE_DNEG:
8617    case TGSI_OPCODE_DMAD:
8618    case TGSI_OPCODE_DMAX:
8619    case TGSI_OPCODE_DMIN:
8620    case TGSI_OPCODE_DMUL:
8621    case TGSI_OPCODE_DRCP:
8622    case TGSI_OPCODE_DSQRT:
8623    case TGSI_OPCODE_F2D:
8624       assert(writemask == TGSI_WRITEMASK_XYZW ||
8625              writemask == TGSI_WRITEMASK_XY ||
8626              writemask == TGSI_WRITEMASK_ZW);
8627       break;
8628    case TGSI_OPCODE_DSEQ:
8629    case TGSI_OPCODE_DSGE:
8630    case TGSI_OPCODE_DSNE:
8631    case TGSI_OPCODE_DSLT:
8632    case TGSI_OPCODE_D2I:
8633    case TGSI_OPCODE_D2U:
8634       /* Write to 1 or 2 components only */
8635       assert(util_bitcount(writemask) <= 2);
8636       break;
8637    default:
8638       /* XXX this list may be incomplete */
8639       ;
8640    }
8641 }
8642 
8643 
8644 /**
8645  * Double-precision absolute value.
8646  */
8647 static boolean
emit_dabs(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8648 emit_dabs(struct svga_shader_emitter_v10 *emit,
8649           const struct tgsi_full_instruction *inst)
8650 {
8651    assert(emit->version >= 50);
8652    check_double_src_swizzle(&inst->Src[0]);
8653    check_double_dst_writemask(inst);
8654 
8655    struct tgsi_full_src_register abs_src = absolute_src(&inst->Src[0]);
8656 
8657    /* DMOV dst, |src| */
8658    emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &abs_src);
8659 
8660    return TRUE;
8661 }
8662 
8663 
8664 /**
8665  * Double-precision negation
8666  */
8667 static boolean
emit_dneg(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8668 emit_dneg(struct svga_shader_emitter_v10 *emit,
8669           const struct tgsi_full_instruction *inst)
8670 {
8671    assert(emit->version >= 50);
8672    check_double_src_swizzle(&inst->Src[0]);
8673    check_double_dst_writemask(inst);
8674 
8675    struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]);
8676 
8677    /* DMOV dst, -src */
8678    emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &neg_src);
8679 
8680    return TRUE;
8681 }
8682 
8683 
8684 /**
8685  * SM5 has no DMAD opcode.  Implement negation with DMUL/DADD.
8686  */
8687 static boolean
emit_dmad(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8688 emit_dmad(struct svga_shader_emitter_v10 *emit,
8689           const struct tgsi_full_instruction *inst)
8690 {
8691    assert(emit->version >= 50);
8692    check_double_src_swizzle(&inst->Src[0]);
8693    check_double_src_swizzle(&inst->Src[1]);
8694    check_double_src_swizzle(&inst->Src[2]);
8695    check_double_dst_writemask(inst);
8696 
8697    unsigned tmp = get_temp_index(emit);
8698    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8699    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8700 
8701    /* DMUL tmp, src[0], src[1] */
8702    emit_instruction_opn(emit, VGPU10_OPCODE_DMUL,
8703                         &tmp_dst, &inst->Src[0], &inst->Src[1], NULL,
8704                         FALSE, inst->Instruction.Precise);
8705 
8706    /* DADD dst, tmp, src[2] */
8707    emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
8708                         &inst->Dst[0], &tmp_src, &inst->Src[2], NULL,
8709                         inst->Instruction.Saturate, inst->Instruction.Precise);
8710    free_temp_indexes(emit);
8711 
8712    return TRUE;
8713 }
8714 
8715 
8716 /**
8717  * Double precision reciprocal square root
8718  */
8719 static boolean
emit_drsq(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src)8720 emit_drsq(struct svga_shader_emitter_v10 *emit,
8721           const struct tgsi_full_dst_register *dst,
8722           const struct tgsi_full_src_register *src)
8723 {
8724    assert(emit->version >= 50);
8725 
8726    VGPU10OpcodeToken0 token0;
8727    begin_emit_instruction(emit);
8728 
8729    token0.value = 0;
8730    token0.opcodeType = VGPU10_OPCODE_VMWARE;
8731    token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DRSQ;
8732    emit_dword(emit, token0.value);
8733 
8734    emit_dst_register(emit, dst);
8735 
8736    check_double_src_swizzle(src);
8737    emit_src_register(emit, src);
8738 
8739    end_emit_instruction(emit);
8740 
8741    return TRUE;
8742 }
8743 
8744 
8745 /**
8746  * There is no SM5 opcode for double precision square root.
8747  * It will be implemented with DRSQ.
8748  * dst = src * DRSQ(src)
8749  */
8750 static boolean
emit_dsqrt(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8751 emit_dsqrt(struct svga_shader_emitter_v10 *emit,
8752           const struct tgsi_full_instruction *inst)
8753 {
8754    assert(emit->version >= 50);
8755 
8756    check_double_src_swizzle(&inst->Src[0]);
8757 
8758    /* temporary register to hold the source */
8759    unsigned tmp = get_temp_index(emit);
8760    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8761    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8762 
8763    /* temporary register to hold the DEQ result */
8764    unsigned tmp_cond = get_temp_index(emit);
8765    struct tgsi_full_dst_register tmp_cond_dst = make_dst_temp_reg(tmp_cond);
8766    struct tgsi_full_dst_register tmp_cond_dst_xy =
8767       writemask_dst(&tmp_cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
8768    struct tgsi_full_src_register tmp_cond_src = make_src_temp_reg(tmp_cond);
8769    struct tgsi_full_src_register tmp_cond_src_xy =
8770          swizzle_src(&tmp_cond_src,
8771                      PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
8772                      PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y);
8773 
8774    /* The reciprocal square root of zero yields INF.
8775     * So if the source is 0, we replace it with 1 in the tmp register.
8776     * The later multiplication of zero in the original source will yield 0
8777     * in the result.
8778     */
8779 
8780    /* tmp1 = (src == 0) ? 1 : src;
8781     *   EQ tmp1, 0, src
8782     *   MOVC tmp, tmp1, 1.0, src
8783     */
8784    struct tgsi_full_src_register zero =
8785                make_immediate_reg_double(emit, 0);
8786 
8787    struct tgsi_full_src_register one =
8788                make_immediate_reg_double(emit, 1.0);
8789 
8790    emit_instruction_op2(emit, VGPU10_OPCODE_DEQ, &tmp_cond_dst_xy,
8791                         &zero, &inst->Src[0]);
8792    emit_instruction_op3(emit, VGPU10_OPCODE_DMOVC, &tmp_dst,
8793                         &tmp_cond_src_xy, &one, &inst->Src[0]);
8794 
8795    struct tgsi_full_dst_register tmp_rsq_dst = make_dst_temp_reg(tmp);
8796    struct tgsi_full_src_register tmp_rsq_src = make_src_temp_reg(tmp);
8797 
8798    /* DRSQ tmp_rsq, tmp */
8799    emit_drsq(emit, &tmp_rsq_dst, &tmp_src);
8800 
8801    /* DMUL dst, tmp_rsq, src[0] */
8802    emit_instruction_op2(emit, VGPU10_OPCODE_DMUL, &inst->Dst[0],
8803                         &tmp_rsq_src, &inst->Src[0]);
8804 
8805    free_temp_indexes(emit);
8806 
8807    return TRUE;
8808 }
8809 
8810 
8811 static boolean
emit_interp_offset(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8812 emit_interp_offset(struct svga_shader_emitter_v10 *emit,
8813                    const struct tgsi_full_instruction *inst)
8814 {
8815    assert(emit->version >= 50);
8816 
8817    /* The src1.xy offset is a float with values in the range [-0.5, 0.5]
8818     * where (0,0) is the center of the pixel.  We need to translate that
8819     * into an integer offset on a 16x16 grid in the range [-8/16, 7/16].
8820     * Also need to flip the Y axis (I think).
8821     */
8822    unsigned tmp = get_temp_index(emit);
8823    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
8824    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
8825    struct tgsi_full_dst_register tmp_dst_xy =
8826       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
8827    struct tgsi_full_src_register const16 =
8828       make_immediate_reg_float4(emit, 16.0f, -16.0, 0, 0);
8829 
8830    /* MUL tmp.xy, src1, {16, -16, 0, 0} */
8831    emit_instruction_op2(emit, VGPU10_OPCODE_MUL,
8832                         &tmp_dst_xy, &inst->Src[1], &const16);
8833 
8834    /* FTOI tmp.xy, tmp */
8835    emit_instruction_op1(emit, VGPU10_OPCODE_FTOI, &tmp_dst_xy, &tmp_src);
8836 
8837    /* EVAL_SNAPPED dst, src0, tmp */
8838    emit_instruction_op2(emit, VGPU10_OPCODE_EVAL_SNAPPED,
8839                         &inst->Dst[0], &inst->Src[0], &tmp_src);
8840 
8841    free_temp_indexes(emit);
8842 
8843    return TRUE;
8844 }
8845 
8846 
8847 /**
8848  * Emit a simple instruction (like ADD, MUL, MIN, etc).
8849  */
8850 static boolean
emit_simple(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8851 emit_simple(struct svga_shader_emitter_v10 *emit,
8852             const struct tgsi_full_instruction *inst)
8853 {
8854    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
8855    const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
8856    const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
8857    const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
8858    unsigned i;
8859 
8860    if (inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) {
8861       emit->current_loop_depth++;
8862    }
8863    else if (inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP) {
8864       emit->current_loop_depth--;
8865    }
8866 
8867    begin_emit_instruction(emit);
8868    emit_opcode_precise(emit, translate_opcode(inst->Instruction.Opcode),
8869                        inst->Instruction.Saturate,
8870                        inst->Instruction.Precise);
8871    for (i = 0; i < op->num_dst; i++) {
8872       if (dbl_dst) {
8873          check_double_dst_writemask(inst);
8874       }
8875       emit_dst_register(emit, &inst->Dst[i]);
8876    }
8877    for (i = 0; i < op->num_src; i++) {
8878       if (dbl_src) {
8879          check_double_src_swizzle(&inst->Src[i]);
8880       }
8881       emit_src_register(emit, &inst->Src[i]);
8882    }
8883    end_emit_instruction(emit);
8884 
8885    return TRUE;
8886 }
8887 
8888 
8889 /**
8890  * Emit MSB instruction (like IMSB, UMSB).
8891  *
8892  * GLSL returns the index starting from the LSB;
8893  * whereas in SM5, firstbit_hi/shi returns the index starting from the MSB.
8894  * To get correct location as per glsl from SM5 device, we should
8895  * return (31 - index) if returned index is not -1.
8896  */
8897 static boolean
emit_msb(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8898 emit_msb(struct svga_shader_emitter_v10 *emit,
8899          const struct tgsi_full_instruction *inst)
8900 {
8901    const struct tgsi_full_dst_register *index_dst = &inst->Dst[0];
8902 
8903    assert(index_dst->Register.File != TGSI_FILE_OUTPUT);
8904 
8905    struct tgsi_full_src_register index_src =
8906       make_src_reg(index_dst->Register.File, index_dst->Register.Index);
8907    struct tgsi_full_src_register imm31 =
8908       make_immediate_reg_int(emit, 31);
8909    imm31 = scalar_src(&imm31, TGSI_SWIZZLE_X);
8910    struct tgsi_full_src_register neg_one =
8911       make_immediate_reg_int(emit, -1);
8912    neg_one = scalar_src(&neg_one, TGSI_SWIZZLE_X);
8913    unsigned tmp = get_temp_index(emit);
8914    const struct tgsi_full_dst_register tmp_dst =
8915       make_dst_temp_reg(tmp);
8916    const struct tgsi_full_dst_register tmp_dst_x =
8917       writemask_dst(&tmp_dst, TGSI_WRITEMASK_X);
8918    const struct tgsi_full_src_register tmp_src_x =
8919        make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp, TGSI_SWIZZLE_X);
8920    int writemask = TGSI_WRITEMASK_X;
8921    int src_swizzle = TGSI_SWIZZLE_X;
8922    int dst_writemask = index_dst->Register.WriteMask;
8923 
8924    emit_simple(emit, inst);
8925 
8926    /* index conversion from SM5 to GLSL */
8927    while (writemask & dst_writemask) {
8928       struct tgsi_full_src_register index_src_comp =
8929          scalar_src(&index_src, src_swizzle);
8930       struct tgsi_full_dst_register index_dst_comp =
8931          writemask_dst(index_dst, writemask);
8932 
8933       /* check if index_src_comp != -1 */
8934       emit_instruction_op2(emit, VGPU10_OPCODE_INE,
8935                            &tmp_dst_x, &index_src_comp, &neg_one);
8936 
8937       /* if */
8938       emit_if(emit, &tmp_src_x);
8939 
8940       index_src_comp = negate_src(&index_src_comp);
8941       /* SUB DST, IMM{31}, DST */
8942       emit_instruction_op2(emit, VGPU10_OPCODE_IADD,
8943                            &index_dst_comp, &imm31, &index_src_comp);
8944 
8945       /* endif */
8946       emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
8947 
8948       writemask = writemask << 1;
8949       src_swizzle = src_swizzle + 1;
8950    }
8951    free_temp_indexes(emit);
8952    return TRUE;
8953 }
8954 
8955 
8956 /**
8957  * Emit a BFE instruction (like UBFE, IBFE).
8958  * tgsi representation:
8959  * U/IBFE dst, value, offset, width
8960  * SM5 representation:
8961  * U/IBFE dst, width, offset, value
8962  * Note: SM5 has width & offset range (0-31);
8963  *      whereas GLSL has width & offset range (0-32)
8964  */
8965 static boolean
emit_bfe(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)8966 emit_bfe(struct svga_shader_emitter_v10 *emit,
8967          const struct tgsi_full_instruction *inst)
8968 {
8969    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
8970    struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32);
8971    imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X);
8972    struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
8973    zero = scalar_src(&zero, TGSI_SWIZZLE_X);
8974 
8975    unsigned tmp1 = get_temp_index(emit);
8976    const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1);
8977    const struct tgsi_full_dst_register cond1_dst_x =
8978       writemask_dst(&cond1_dst, TGSI_WRITEMASK_X);
8979    const struct tgsi_full_src_register cond1_src_x =
8980       make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X);
8981 
8982    unsigned tmp2 = get_temp_index(emit);
8983    const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2);
8984    const struct tgsi_full_dst_register cond2_dst_x =
8985       writemask_dst(&cond2_dst, TGSI_WRITEMASK_X);
8986    const struct tgsi_full_src_register cond2_src_x =
8987       make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X);
8988 
8989    /**
8990     * In SM5, when width = 32  and offset = 0, it returns 0.
8991     * On the other hand GLSL, expects value to be copied as it is, to dst.
8992     */
8993 
8994    /* cond1 = width ! = 32 */
8995    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
8996                         &cond1_dst_x, &inst->Src[2], &imm32);
8997 
8998    /* cond2 = offset ! = 0 */
8999    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9000                         &cond2_dst_x, &inst->Src[1], &zero);
9001 
9002    /* cond 2 = cond1 & cond 2 */
9003    emit_instruction_op2(emit, VGPU10_OPCODE_AND, &cond2_dst_x,
9004                         &cond2_src_x,
9005                         &cond1_src_x);
9006    /* IF */
9007    emit_if(emit, &cond2_src_x);
9008 
9009    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
9010                         &inst->Src[0]);
9011 
9012    /* ELSE */
9013    emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
9014 
9015    /* U/IBFE dst, width, offset, value */
9016    emit_instruction_op3(emit, translate_opcode(opcode), &inst->Dst[0],
9017                         &inst->Src[2], &inst->Src[1], &inst->Src[0]);
9018 
9019    /* ENDIF */
9020    emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9021 
9022    free_temp_indexes(emit);
9023    return TRUE;
9024 }
9025 
9026 
9027 /**
9028  * Emit BFI  instruction
9029  * tgsi representation:
9030  * BFI dst, base, insert, offset, width
9031  * SM5 representation:
9032  * BFI dst, width, offset, insert, base
9033  * Note: SM5 has width & offset range (0-31);
9034  *      whereas GLSL has width & offset range (0-32)
9035  */
9036 static boolean
emit_bfi(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9037 emit_bfi(struct svga_shader_emitter_v10 *emit,
9038          const struct tgsi_full_instruction *inst)
9039 {
9040    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9041    struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32);
9042    imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X);
9043 
9044    struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0);
9045    zero = scalar_src(&zero, TGSI_SWIZZLE_X);
9046 
9047    unsigned tmp1 = get_temp_index(emit);
9048    const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1);
9049    const struct tgsi_full_dst_register cond1_dst_x =
9050       writemask_dst(&cond1_dst, TGSI_WRITEMASK_X);
9051    const struct tgsi_full_src_register cond1_src_x =
9052       make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X);
9053 
9054    unsigned tmp2 = get_temp_index(emit);
9055    const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2);
9056    const struct tgsi_full_dst_register cond2_dst_x =
9057       writemask_dst(&cond2_dst, TGSI_WRITEMASK_X);
9058    const struct tgsi_full_src_register cond2_src_x =
9059       make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X);
9060 
9061    /**
9062     * In SM5, when width = 32  and offset = 0, it returns 0.
9063     * On the other hand GLSL, expects insert to be copied as it is, to dst.
9064     */
9065 
9066    /* cond1 = width == 32 */
9067    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9068                         &cond1_dst_x, &inst->Src[3], &imm32);
9069 
9070    /* cond1 = offset == 0 */
9071    emit_instruction_op2(emit, VGPU10_OPCODE_IEQ,
9072                         &cond2_dst_x, &inst->Src[2], &zero);
9073 
9074    /* cond2 = cond1 & cond2 */
9075    emit_instruction_op2(emit, VGPU10_OPCODE_AND,
9076                         &cond2_dst_x, &cond2_src_x, &cond1_src_x);
9077 
9078    /* if */
9079    emit_if(emit, &cond2_src_x);
9080 
9081    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0],
9082                         &inst->Src[1]);
9083 
9084    /* else */
9085    emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
9086 
9087    /* BFI dst, width, offset, insert, base */
9088    begin_emit_instruction(emit);
9089    emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
9090    emit_dst_register(emit, &inst->Dst[0]);
9091    emit_src_register(emit, &inst->Src[3]);
9092    emit_src_register(emit, &inst->Src[2]);
9093    emit_src_register(emit, &inst->Src[1]);
9094    emit_src_register(emit, &inst->Src[0]);
9095    end_emit_instruction(emit);
9096 
9097    /* endif */
9098    emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
9099 
9100    free_temp_indexes(emit);
9101    return TRUE;
9102 }
9103 
9104 
9105 /**
9106  * We only special case the MOV instruction to try to detect constant
9107  * color writes in the fragment shader.
9108  */
9109 static boolean
emit_mov(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9110 emit_mov(struct svga_shader_emitter_v10 *emit,
9111          const struct tgsi_full_instruction *inst)
9112 {
9113    const struct tgsi_full_src_register *src = &inst->Src[0];
9114    const struct tgsi_full_dst_register *dst = &inst->Dst[0];
9115 
9116    if (emit->unit == PIPE_SHADER_FRAGMENT &&
9117        dst->Register.File == TGSI_FILE_OUTPUT &&
9118        dst->Register.Index == 0 &&
9119        src->Register.File == TGSI_FILE_CONSTANT &&
9120        !src->Register.Indirect) {
9121       emit->constant_color_output = TRUE;
9122    }
9123 
9124    return emit_simple(emit, inst);
9125 }
9126 
9127 
9128 /**
9129  * Emit a simple VGPU10 instruction which writes to multiple dest registers,
9130  * where TGSI only uses one dest register.
9131  */
9132 static boolean
emit_simple_1dst(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,unsigned dst_count,unsigned dst_index)9133 emit_simple_1dst(struct svga_shader_emitter_v10 *emit,
9134                  const struct tgsi_full_instruction *inst,
9135                  unsigned dst_count,
9136                  unsigned dst_index)
9137 {
9138    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9139    const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
9140    unsigned i;
9141 
9142    begin_emit_instruction(emit);
9143    emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate);
9144 
9145    for (i = 0; i < dst_count; i++) {
9146       if (i == dst_index) {
9147          emit_dst_register(emit, &inst->Dst[0]);
9148       } else {
9149          emit_null_dst_register(emit);
9150       }
9151    }
9152 
9153    for (i = 0; i < op->num_src; i++) {
9154       emit_src_register(emit, &inst->Src[i]);
9155    }
9156    end_emit_instruction(emit);
9157 
9158    return TRUE;
9159 }
9160 
9161 
9162 /**
9163  * Emit a vmware specific VGPU10 instruction.
9164  */
9165 static boolean
emit_vmware(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst,VGPU10_VMWARE_OPCODE_TYPE subopcode)9166 emit_vmware(struct svga_shader_emitter_v10 *emit,
9167             const struct tgsi_full_instruction *inst,
9168             VGPU10_VMWARE_OPCODE_TYPE subopcode)
9169 {
9170    VGPU10OpcodeToken0 token0;
9171    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9172    const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode);
9173    const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode);
9174    const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode);
9175 
9176    unsigned i;
9177 
9178    begin_emit_instruction(emit);
9179 
9180    assert((subopcode > 0 && emit->version >= 50) || subopcode == 0);
9181 
9182    token0.value = 0;
9183    token0.opcodeType = VGPU10_OPCODE_VMWARE;
9184    token0.vmwareOpcodeType = subopcode;
9185    emit_dword(emit, token0.value);
9186 
9187    if (subopcode == VGPU10_VMWARE_OPCODE_IDIV) {
9188       /* IDIV only uses the first dest register. */
9189       emit_dst_register(emit, &inst->Dst[0]);
9190       emit_null_dst_register(emit);
9191    } else {
9192       for (i = 0; i < op->num_dst; i++) {
9193          if (dbl_dst) {
9194             check_double_dst_writemask(inst);
9195          }
9196          emit_dst_register(emit, &inst->Dst[i]);
9197       }
9198    }
9199 
9200    for (i = 0; i < op->num_src; i++) {
9201       if (dbl_src) {
9202          check_double_src_swizzle(&inst->Src[i]);
9203       }
9204       emit_src_register(emit, &inst->Src[i]);
9205    }
9206    end_emit_instruction(emit);
9207 
9208    return TRUE;
9209 }
9210 
9211 
9212 /**
9213  * Translate a single TGSI instruction to VGPU10.
9214  */
9215 static boolean
emit_vgpu10_instruction(struct svga_shader_emitter_v10 * emit,unsigned inst_number,const struct tgsi_full_instruction * inst)9216 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit,
9217                         unsigned inst_number,
9218                         const struct tgsi_full_instruction *inst)
9219 {
9220    const enum tgsi_opcode opcode = inst->Instruction.Opcode;
9221 
9222    if (emit->skip_instruction)
9223       return TRUE;
9224 
9225    switch (opcode) {
9226    case TGSI_OPCODE_ADD:
9227    case TGSI_OPCODE_AND:
9228    case TGSI_OPCODE_BGNLOOP:
9229    case TGSI_OPCODE_BRK:
9230    case TGSI_OPCODE_CEIL:
9231    case TGSI_OPCODE_CONT:
9232    case TGSI_OPCODE_DDX:
9233    case TGSI_OPCODE_DDY:
9234    case TGSI_OPCODE_DIV:
9235    case TGSI_OPCODE_DP2:
9236    case TGSI_OPCODE_DP3:
9237    case TGSI_OPCODE_DP4:
9238    case TGSI_OPCODE_ELSE:
9239    case TGSI_OPCODE_ENDIF:
9240    case TGSI_OPCODE_ENDLOOP:
9241    case TGSI_OPCODE_ENDSUB:
9242    case TGSI_OPCODE_F2I:
9243    case TGSI_OPCODE_F2U:
9244    case TGSI_OPCODE_FLR:
9245    case TGSI_OPCODE_FRC:
9246    case TGSI_OPCODE_FSEQ:
9247    case TGSI_OPCODE_FSGE:
9248    case TGSI_OPCODE_FSLT:
9249    case TGSI_OPCODE_FSNE:
9250    case TGSI_OPCODE_I2F:
9251    case TGSI_OPCODE_IMAX:
9252    case TGSI_OPCODE_IMIN:
9253    case TGSI_OPCODE_INEG:
9254    case TGSI_OPCODE_ISGE:
9255    case TGSI_OPCODE_ISHR:
9256    case TGSI_OPCODE_ISLT:
9257    case TGSI_OPCODE_MAD:
9258    case TGSI_OPCODE_MAX:
9259    case TGSI_OPCODE_MIN:
9260    case TGSI_OPCODE_MUL:
9261    case TGSI_OPCODE_NOP:
9262    case TGSI_OPCODE_NOT:
9263    case TGSI_OPCODE_OR:
9264    case TGSI_OPCODE_UADD:
9265    case TGSI_OPCODE_USEQ:
9266    case TGSI_OPCODE_USGE:
9267    case TGSI_OPCODE_USLT:
9268    case TGSI_OPCODE_UMIN:
9269    case TGSI_OPCODE_UMAD:
9270    case TGSI_OPCODE_UMAX:
9271    case TGSI_OPCODE_ROUND:
9272    case TGSI_OPCODE_SQRT:
9273    case TGSI_OPCODE_SHL:
9274    case TGSI_OPCODE_TRUNC:
9275    case TGSI_OPCODE_U2F:
9276    case TGSI_OPCODE_UCMP:
9277    case TGSI_OPCODE_USHR:
9278    case TGSI_OPCODE_USNE:
9279    case TGSI_OPCODE_XOR:
9280    /* Begin SM5 opcodes */
9281    case TGSI_OPCODE_F2D:
9282    case TGSI_OPCODE_D2F:
9283    case TGSI_OPCODE_DADD:
9284    case TGSI_OPCODE_DMUL:
9285    case TGSI_OPCODE_DMAX:
9286    case TGSI_OPCODE_DMIN:
9287    case TGSI_OPCODE_DSGE:
9288    case TGSI_OPCODE_DSLT:
9289    case TGSI_OPCODE_DSEQ:
9290    case TGSI_OPCODE_DSNE:
9291    case TGSI_OPCODE_BREV:
9292    case TGSI_OPCODE_POPC:
9293    case TGSI_OPCODE_LSB:
9294    case TGSI_OPCODE_INTERP_CENTROID:
9295    case TGSI_OPCODE_INTERP_SAMPLE:
9296       /* simple instructions */
9297       return emit_simple(emit, inst);
9298    case TGSI_OPCODE_RET:
9299       if (emit->unit == PIPE_SHADER_TESS_CTRL &&
9300           !emit->tcs.control_point_phase) {
9301 
9302          /* store the tessellation levels in the patch constant phase only */
9303          store_tesslevels(emit);
9304       }
9305       return emit_simple(emit, inst);
9306 
9307    case TGSI_OPCODE_IMSB:
9308    case TGSI_OPCODE_UMSB:
9309       return emit_msb(emit, inst);
9310    case TGSI_OPCODE_IBFE:
9311    case TGSI_OPCODE_UBFE:
9312       return emit_bfe(emit, inst);
9313    case TGSI_OPCODE_BFI:
9314       return emit_bfi(emit, inst);
9315    case TGSI_OPCODE_MOV:
9316       return emit_mov(emit, inst);
9317    case TGSI_OPCODE_EMIT:
9318       return emit_vertex(emit, inst);
9319    case TGSI_OPCODE_ENDPRIM:
9320       return emit_endprim(emit, inst);
9321    case TGSI_OPCODE_IABS:
9322       return emit_iabs(emit, inst);
9323    case TGSI_OPCODE_ARL:
9324       /* fall-through */
9325    case TGSI_OPCODE_UARL:
9326       return emit_arl_uarl(emit, inst);
9327    case TGSI_OPCODE_BGNSUB:
9328       /* no-op */
9329       return TRUE;
9330    case TGSI_OPCODE_CAL:
9331       return emit_cal(emit, inst);
9332    case TGSI_OPCODE_CMP:
9333       return emit_cmp(emit, inst);
9334    case TGSI_OPCODE_COS:
9335       return emit_sincos(emit, inst);
9336    case TGSI_OPCODE_DST:
9337       return emit_dst(emit, inst);
9338    case TGSI_OPCODE_EX2:
9339       return emit_ex2(emit, inst);
9340    case TGSI_OPCODE_EXP:
9341       return emit_exp(emit, inst);
9342    case TGSI_OPCODE_IF:
9343       return emit_if(emit, &inst->Src[0]);
9344    case TGSI_OPCODE_KILL:
9345       return emit_kill(emit, inst);
9346    case TGSI_OPCODE_KILL_IF:
9347       return emit_kill_if(emit, inst);
9348    case TGSI_OPCODE_LG2:
9349       return emit_lg2(emit, inst);
9350    case TGSI_OPCODE_LIT:
9351       return emit_lit(emit, inst);
9352    case TGSI_OPCODE_LODQ:
9353       return emit_lodq(emit, inst);
9354    case TGSI_OPCODE_LOG:
9355       return emit_log(emit, inst);
9356    case TGSI_OPCODE_LRP:
9357       return emit_lrp(emit, inst);
9358    case TGSI_OPCODE_POW:
9359       return emit_pow(emit, inst);
9360    case TGSI_OPCODE_RCP:
9361       return emit_rcp(emit, inst);
9362    case TGSI_OPCODE_RSQ:
9363       return emit_rsq(emit, inst);
9364    case TGSI_OPCODE_SAMPLE:
9365       return emit_sample(emit, inst);
9366    case TGSI_OPCODE_SEQ:
9367       return emit_seq(emit, inst);
9368    case TGSI_OPCODE_SGE:
9369       return emit_sge(emit, inst);
9370    case TGSI_OPCODE_SGT:
9371       return emit_sgt(emit, inst);
9372    case TGSI_OPCODE_SIN:
9373       return emit_sincos(emit, inst);
9374    case TGSI_OPCODE_SLE:
9375       return emit_sle(emit, inst);
9376    case TGSI_OPCODE_SLT:
9377       return emit_slt(emit, inst);
9378    case TGSI_OPCODE_SNE:
9379       return emit_sne(emit, inst);
9380    case TGSI_OPCODE_SSG:
9381       return emit_ssg(emit, inst);
9382    case TGSI_OPCODE_ISSG:
9383       return emit_issg(emit, inst);
9384    case TGSI_OPCODE_TEX:
9385       return emit_tex(emit, inst);
9386    case TGSI_OPCODE_TG4:
9387       return emit_tg4(emit, inst);
9388    case TGSI_OPCODE_TEX2:
9389       return emit_tex2(emit, inst);
9390    case TGSI_OPCODE_TXP:
9391       return emit_txp(emit, inst);
9392    case TGSI_OPCODE_TXB:
9393    case TGSI_OPCODE_TXB2:
9394    case TGSI_OPCODE_TXL:
9395       return emit_txl_txb(emit, inst);
9396    case TGSI_OPCODE_TXD:
9397       return emit_txd(emit, inst);
9398    case TGSI_OPCODE_TXF:
9399       return emit_txf(emit, inst);
9400    case TGSI_OPCODE_TXL2:
9401       return emit_txl2(emit, inst);
9402    case TGSI_OPCODE_TXQ:
9403       return emit_txq(emit, inst);
9404    case TGSI_OPCODE_UIF:
9405       return emit_if(emit, &inst->Src[0]);
9406    case TGSI_OPCODE_UMUL_HI:
9407    case TGSI_OPCODE_IMUL_HI:
9408    case TGSI_OPCODE_UDIV:
9409       /* These cases use only the FIRST of two destination registers */
9410       return emit_simple_1dst(emit, inst, 2, 0);
9411    case TGSI_OPCODE_IDIV:
9412       return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_IDIV);
9413    case TGSI_OPCODE_UMUL:
9414    case TGSI_OPCODE_UMOD:
9415    case TGSI_OPCODE_MOD:
9416       /* These cases use only the SECOND of two destination registers */
9417       return emit_simple_1dst(emit, inst, 2, 1);
9418 
9419    /* Begin SM5 opcodes */
9420    case TGSI_OPCODE_DABS:
9421       return emit_dabs(emit, inst);
9422    case TGSI_OPCODE_DNEG:
9423       return emit_dneg(emit, inst);
9424    case TGSI_OPCODE_DRCP:
9425       return emit_simple(emit, inst);
9426    case TGSI_OPCODE_DSQRT:
9427       return emit_dsqrt(emit, inst);
9428    case TGSI_OPCODE_DMAD:
9429       return emit_dmad(emit, inst);
9430    case TGSI_OPCODE_DFRAC:
9431       return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_DFRC);
9432    case TGSI_OPCODE_D2I:
9433    case TGSI_OPCODE_D2U:
9434       return emit_simple(emit, inst);
9435    case TGSI_OPCODE_I2D:
9436    case TGSI_OPCODE_U2D:
9437       return emit_simple(emit, inst);
9438    case TGSI_OPCODE_DRSQ:
9439       return emit_drsq(emit, &inst->Dst[0], &inst->Src[0]);
9440    case TGSI_OPCODE_DDIV:
9441       return emit_simple(emit, inst);
9442    case TGSI_OPCODE_INTERP_OFFSET:
9443       return emit_interp_offset(emit, inst);
9444 
9445    /* The following opcodes should never be seen here.  We return zero
9446     * for all the PIPE_CAP_TGSI_DROUND_SUPPORTED, DFRACEXP_DLDEXP_SUPPORTED,
9447     * FMA_SUPPORTED, LDEXP_SUPPORTED queries.
9448     */
9449    case TGSI_OPCODE_FMA:
9450    case TGSI_OPCODE_LDEXP:
9451    case TGSI_OPCODE_DSSG:
9452    case TGSI_OPCODE_DFRACEXP:
9453    case TGSI_OPCODE_DLDEXP:
9454    case TGSI_OPCODE_DTRUNC:
9455    case TGSI_OPCODE_DCEIL:
9456    case TGSI_OPCODE_DFLR:
9457       debug_printf("Unexpected TGSI opcode %s.  "
9458                    "Should have been translated away by the GLSL compiler.\n",
9459                    tgsi_get_opcode_name(opcode));
9460       return FALSE;
9461 
9462    case TGSI_OPCODE_LOAD:
9463    case TGSI_OPCODE_STORE:
9464    case TGSI_OPCODE_ATOMAND:
9465    case TGSI_OPCODE_ATOMCAS:
9466    case TGSI_OPCODE_ATOMIMAX:
9467    case TGSI_OPCODE_ATOMIMIN:
9468    case TGSI_OPCODE_ATOMOR:
9469    case TGSI_OPCODE_ATOMUADD:
9470    case TGSI_OPCODE_ATOMUMAX:
9471    case TGSI_OPCODE_ATOMUMIN:
9472    case TGSI_OPCODE_ATOMXCHG:
9473    case TGSI_OPCODE_ATOMXOR:
9474       return FALSE;
9475    case TGSI_OPCODE_BARRIER:
9476       if (emit->unit == PIPE_SHADER_TESS_CTRL) {
9477          /* SM5 device doesn't support BARRIER in tcs . If barrier is used
9478           * in shader, don't do anything for this opcode and continue rest
9479           * of shader translation
9480           */
9481          pipe_debug_message(&emit->svga_debug_callback, INFO,
9482                             "barrier instruction is not supported in tessellation control shader\n");
9483          return TRUE;
9484       }
9485       else {
9486          return emit_simple(emit, inst);
9487       }
9488 
9489    case TGSI_OPCODE_END:
9490       if (!emit_post_helpers(emit))
9491          return FALSE;
9492       return emit_simple(emit, inst);
9493 
9494    default:
9495       debug_printf("Unimplemented tgsi instruction %s\n",
9496                    tgsi_get_opcode_name(opcode));
9497       return FALSE;
9498    }
9499 
9500    return TRUE;
9501 }
9502 
9503 
9504 /**
9505  * Emit the extra instructions to adjust the vertex position.
9506  * There are two possible adjustments:
9507  * 1. Converting from Gallium to VGPU10 coordinate space by applying the
9508  *    "prescale" and "pretranslate" values.
9509  * 2. Undoing the viewport transformation when we use the swtnl/draw path.
9510  * \param vs_pos_tmp_index  which temporary register contains the vertex pos.
9511  */
9512 static void
emit_vpos_instructions(struct svga_shader_emitter_v10 * emit)9513 emit_vpos_instructions(struct svga_shader_emitter_v10 *emit)
9514 {
9515    struct tgsi_full_src_register tmp_pos_src;
9516    struct tgsi_full_dst_register pos_dst;
9517    const unsigned vs_pos_tmp_index = emit->vposition.tmp_index;
9518 
9519    /* Don't bother to emit any extra vertex instructions if vertex position is
9520     * not written out
9521     */
9522    if (emit->vposition.out_index == INVALID_INDEX)
9523       return;
9524 
9525    /**
9526     * Reset the temporary vertex position register index
9527     * so that emit_dst_register() will use the real vertex position output
9528     */
9529    emit->vposition.tmp_index = INVALID_INDEX;
9530 
9531    tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index);
9532    pos_dst = make_dst_output_reg(emit->vposition.out_index);
9533 
9534    /* If non-adjusted vertex position register index
9535     * is valid, copy the vertex position from the temporary
9536     * vertex position register before it is modified by the
9537     * prescale computation.
9538     */
9539    if (emit->vposition.so_index != INVALID_INDEX) {
9540       struct tgsi_full_dst_register pos_so_dst =
9541          make_dst_output_reg(emit->vposition.so_index);
9542 
9543       /* MOV pos_so, tmp_pos */
9544       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, &tmp_pos_src);
9545    }
9546 
9547    if (emit->vposition.need_prescale) {
9548       /* This code adjusts the vertex position to match the VGPU10 convention.
9549        * If p is the position computed by the shader (usually by applying the
9550        * modelview and projection matrices), the new position q is computed by:
9551        *
9552        * q.x = p.w * trans.x + p.x * scale.x
9553        * q.y = p.w * trans.y + p.y * scale.y
9554        * q.z = p.w * trans.z + p.z * scale.z;
9555        * q.w = p.w * trans.w + p.w;
9556        */
9557       struct tgsi_full_src_register tmp_pos_src_w =
9558          scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
9559       struct tgsi_full_dst_register tmp_pos_dst =
9560          make_dst_temp_reg(vs_pos_tmp_index);
9561       struct tgsi_full_dst_register tmp_pos_dst_xyz =
9562          writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ);
9563 
9564       struct tgsi_full_src_register prescale_scale =
9565          make_src_temp_reg(emit->vposition.prescale_scale_index);
9566       struct tgsi_full_src_register prescale_trans =
9567          make_src_temp_reg(emit->vposition.prescale_trans_index);
9568 
9569       /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */
9570       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz,
9571                            &tmp_pos_src, &prescale_scale);
9572 
9573       /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */
9574       emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w,
9575                            &prescale_trans, &tmp_pos_src);
9576    }
9577    else if (emit->key.vs.undo_viewport) {
9578       /* This code computes the final vertex position from the temporary
9579        * vertex position by undoing the viewport transformation and the
9580        * divide-by-W operation (we convert window coords back to clip coords).
9581        * This is needed when we use the 'draw' module for fallbacks.
9582        * If p is the temp pos in window coords, then the NDC coord q is:
9583        *   q.x = (p.x - vp.x_trans) / vp.x_scale * p.w
9584        *   q.y = (p.y - vp.y_trans) / vp.y_scale * p.w
9585        *   q.z = p.z * p.w
9586        *   q.w = p.w
9587        * CONST[vs_viewport_index] contains:
9588        *   { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans }
9589        */
9590       struct tgsi_full_dst_register tmp_pos_dst =
9591          make_dst_temp_reg(vs_pos_tmp_index);
9592       struct tgsi_full_dst_register tmp_pos_dst_xy =
9593          writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY);
9594       struct tgsi_full_src_register tmp_pos_src_wwww =
9595          scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W);
9596 
9597       struct tgsi_full_dst_register pos_dst_xyz =
9598          writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ);
9599       struct tgsi_full_dst_register pos_dst_w =
9600          writemask_dst(&pos_dst, TGSI_WRITEMASK_W);
9601 
9602       struct tgsi_full_src_register vp_xyzw =
9603          make_src_const_reg(emit->vs.viewport_index);
9604       struct tgsi_full_src_register vp_zwww =
9605          swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
9606                      TGSI_SWIZZLE_W, TGSI_SWIZZLE_W);
9607 
9608       /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */
9609       emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy,
9610                            &tmp_pos_src, &vp_zwww);
9611 
9612       /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */
9613       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy,
9614                            &tmp_pos_src, &vp_xyzw);
9615 
9616       /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */
9617       emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz,
9618                            &tmp_pos_src, &tmp_pos_src_wwww);
9619 
9620       /* MOV pos.w, tmp_pos.w */
9621       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, &tmp_pos_src);
9622    }
9623    else if (vs_pos_tmp_index != INVALID_INDEX) {
9624       /* This code is to handle the case where the temporary vertex
9625        * position register is created when the vertex shader has stream
9626        * output and prescale is disabled because rasterization is to be
9627        * discarded.
9628        */
9629       struct tgsi_full_dst_register pos_dst =
9630          make_dst_output_reg(emit->vposition.out_index);
9631 
9632       /* MOV pos, tmp_pos */
9633       begin_emit_instruction(emit);
9634       emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
9635       emit_dst_register(emit, &pos_dst);
9636       emit_src_register(emit, &tmp_pos_src);
9637       end_emit_instruction(emit);
9638    }
9639 
9640    /* Restore original vposition.tmp_index value for the next GS vertex.
9641     * It doesn't matter for VS.
9642     */
9643    emit->vposition.tmp_index = vs_pos_tmp_index;
9644 }
9645 
9646 static void
emit_clipping_instructions(struct svga_shader_emitter_v10 * emit)9647 emit_clipping_instructions(struct svga_shader_emitter_v10 *emit)
9648 {
9649    if (emit->clip_mode == CLIP_DISTANCE) {
9650       /* Copy from copy distance temporary to CLIPDIST & the shadow copy */
9651       emit_clip_distance_instructions(emit);
9652 
9653    } else if (emit->clip_mode == CLIP_VERTEX &&
9654               emit->key.last_vertex_stage) {
9655       /* Convert TGSI CLIPVERTEX to CLIPDIST */
9656       emit_clip_vertex_instructions(emit);
9657    }
9658 
9659    /**
9660     * Emit vertex position and take care of legacy user planes only if
9661     * there is a valid vertex position register index.
9662     * This is to take care of the case
9663     * where the shader doesn't output vertex position. Then in
9664     * this case, don't bother to emit more vertex instructions.
9665     */
9666    if (emit->vposition.out_index == INVALID_INDEX)
9667       return;
9668 
9669    /**
9670     * Emit per-vertex clipping instructions for legacy user defined clip planes.
9671     * NOTE: we must emit the clip distance instructions before the
9672     * emit_vpos_instructions() call since the later function will change
9673     * the TEMP[vs_pos_tmp_index] value.
9674     */
9675    if (emit->clip_mode == CLIP_LEGACY && emit->key.last_vertex_stage) {
9676       /* Emit CLIPDIST for legacy user defined clip planes */
9677       emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index);
9678    }
9679 }
9680 
9681 
9682 /**
9683  * Emit extra per-vertex instructions.  This includes clip-coordinate
9684  * space conversion and computing clip distances.  This is called for
9685  * each GS emit-vertex instruction and at the end of VS translation.
9686  */
9687 static void
emit_vertex_instructions(struct svga_shader_emitter_v10 * emit)9688 emit_vertex_instructions(struct svga_shader_emitter_v10 *emit)
9689 {
9690    /* Emit clipping instructions based on clipping mode */
9691    emit_clipping_instructions(emit);
9692 
9693    /* Emit vertex position instructions */
9694    emit_vpos_instructions(emit);
9695 }
9696 
9697 
9698 /**
9699  * Translate the TGSI_OPCODE_EMIT GS instruction.
9700  */
9701 static boolean
emit_vertex(struct svga_shader_emitter_v10 * emit,const struct tgsi_full_instruction * inst)9702 emit_vertex(struct svga_shader_emitter_v10 *emit,
9703             const struct tgsi_full_instruction *inst)
9704 {
9705    unsigned ret = TRUE;
9706 
9707    assert(emit->unit == PIPE_SHADER_GEOMETRY);
9708 
9709    /**
9710     * Emit the viewport array index for the first vertex.
9711     */
9712    if (emit->gs.viewport_index_out_index != INVALID_INDEX) {
9713       struct tgsi_full_dst_register viewport_index_out =
9714          make_dst_output_reg(emit->gs.viewport_index_out_index);
9715       struct tgsi_full_dst_register viewport_index_out_x =
9716          writemask_dst(&viewport_index_out, TGSI_WRITEMASK_X);
9717       struct tgsi_full_src_register viewport_index_tmp =
9718          make_src_temp_reg(emit->gs.viewport_index_tmp_index);
9719 
9720       /* Set the out index to INVALID_INDEX, so it will not
9721        * be assigned to a temp again in emit_dst_register, and
9722        * the viewport index will not be assigned again in the
9723        * subsequent vertices.
9724        */
9725       emit->gs.viewport_index_out_index = INVALID_INDEX;
9726       emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
9727                            &viewport_index_out_x, &viewport_index_tmp);
9728    }
9729 
9730    /**
9731     * Find the stream index associated with this emit vertex instruction.
9732     */
9733    assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE);
9734    unsigned streamIndex = find_stream_index(emit, &inst->Src[0]);
9735 
9736    /**
9737     * According to the ARB_gpu_shader5 spec, the built-in geometry shader
9738     * outputs are always associated with vertex stream zero.
9739     * So emit the extra vertex instructions for position or clip distance
9740     * for stream zero only.
9741     */
9742    if (streamIndex == 0) {
9743       /**
9744        * Before emitting vertex instructions, emit the temporaries for
9745        * the prescale constants based on the viewport index if needed.
9746        */
9747       if (emit->vposition.need_prescale && !emit->vposition.have_prescale)
9748          emit_temp_prescale_instructions(emit);
9749 
9750       emit_vertex_instructions(emit);
9751    }
9752 
9753    begin_emit_instruction(emit);
9754    if (emit->version >= 50) {
9755       if (emit->info.num_stream_output_components[streamIndex] == 0) {
9756          /**
9757           * If there is no output for this stream, discard this instruction.
9758           */
9759          emit->discard_instruction = TRUE;
9760       }
9761       else {
9762          emit_opcode(emit, VGPU10_OPCODE_EMIT_STREAM, FALSE);
9763          emit_stream_register(emit, streamIndex);
9764       }
9765    }
9766    else {
9767       emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE);
9768    }
9769    end_emit_instruction(emit);
9770 
9771    return ret;
9772 }
9773 
9774 
9775 /**
9776  * Emit the extra code to convert from VGPU10's boolean front-face
9777  * register to TGSI's signed front-face register.
9778  *
9779  * TODO: Make temporary front-face register a scalar.
9780  */
9781 static void
emit_frontface_instructions(struct svga_shader_emitter_v10 * emit)9782 emit_frontface_instructions(struct svga_shader_emitter_v10 *emit)
9783 {
9784    assert(emit->unit == PIPE_SHADER_FRAGMENT);
9785 
9786    if (emit->fs.face_input_index != INVALID_INDEX) {
9787       /* convert vgpu10 boolean face register to gallium +/-1 value */
9788       struct tgsi_full_dst_register tmp_dst =
9789          make_dst_temp_reg(emit->fs.face_tmp_index);
9790       struct tgsi_full_src_register one =
9791          make_immediate_reg_float(emit, 1.0f);
9792       struct tgsi_full_src_register neg_one =
9793          make_immediate_reg_float(emit, -1.0f);
9794 
9795       /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */
9796       begin_emit_instruction(emit);
9797       emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE);
9798       emit_dst_register(emit, &tmp_dst);
9799       emit_face_register(emit);
9800       emit_src_register(emit, &one);
9801       emit_src_register(emit, &neg_one);
9802       end_emit_instruction(emit);
9803    }
9804 }
9805 
9806 
9807 /**
9808  * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w.
9809  */
9810 static void
emit_fragcoord_instructions(struct svga_shader_emitter_v10 * emit)9811 emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit)
9812 {
9813    assert(emit->unit == PIPE_SHADER_FRAGMENT);
9814 
9815    if (emit->fs.fragcoord_input_index != INVALID_INDEX) {
9816       struct tgsi_full_dst_register tmp_dst =
9817          make_dst_temp_reg(emit->fs.fragcoord_tmp_index);
9818       struct tgsi_full_dst_register tmp_dst_xyz =
9819          writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ);
9820       struct tgsi_full_dst_register tmp_dst_w =
9821          writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
9822       struct tgsi_full_src_register one =
9823          make_immediate_reg_float(emit, 1.0f);
9824       struct tgsi_full_src_register fragcoord =
9825          make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index);
9826 
9827       /* save the input index */
9828       unsigned fragcoord_input_index = emit->fs.fragcoord_input_index;
9829       /* set to invalid to prevent substitution in emit_src_register() */
9830       emit->fs.fragcoord_input_index = INVALID_INDEX;
9831 
9832       /* MOV fragcoord_tmp.xyz, fragcoord.xyz */
9833       begin_emit_instruction(emit);
9834       emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE);
9835       emit_dst_register(emit, &tmp_dst_xyz);
9836       emit_src_register(emit, &fragcoord);
9837       end_emit_instruction(emit);
9838 
9839       /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */
9840       begin_emit_instruction(emit);
9841       emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE);
9842       emit_dst_register(emit, &tmp_dst_w);
9843       emit_src_register(emit, &one);
9844       emit_src_register(emit, &fragcoord);
9845       end_emit_instruction(emit);
9846 
9847       /* restore saved value */
9848       emit->fs.fragcoord_input_index = fragcoord_input_index;
9849    }
9850 }
9851 
9852 
9853 /**
9854  * Emit the extra code to get the current sample position value and
9855  * put it into a temp register.
9856  */
9857 static void
emit_sample_position_instructions(struct svga_shader_emitter_v10 * emit)9858 emit_sample_position_instructions(struct svga_shader_emitter_v10 *emit)
9859 {
9860    assert(emit->unit == PIPE_SHADER_FRAGMENT);
9861 
9862    if (emit->fs.sample_pos_sys_index != INVALID_INDEX) {
9863       assert(emit->version >= 41);
9864 
9865       struct tgsi_full_dst_register tmp_dst =
9866          make_dst_temp_reg(emit->fs.sample_pos_tmp_index);
9867       struct tgsi_full_src_register half =
9868          make_immediate_reg_float4(emit, 0.5, 0.5, 0.0, 0.0);
9869 
9870       struct tgsi_full_src_register tmp_src =
9871          make_src_temp_reg(emit->fs.sample_pos_tmp_index);
9872       struct tgsi_full_src_register sample_index_reg =
9873          make_src_scalar_reg(TGSI_FILE_SYSTEM_VALUE,
9874                              emit->fs.sample_id_sys_index, TGSI_SWIZZLE_X);
9875 
9876       /* The first src register is a shader resource (if we want a
9877        * multisampled resource sample position) or the rasterizer register
9878        * (if we want the current sample position in the color buffer).  We
9879        * want the later.
9880        */
9881 
9882       /* SAMPLE_POS dst, RASTERIZER, sampleIndex */
9883       begin_emit_instruction(emit);
9884       emit_opcode(emit, VGPU10_OPCODE_SAMPLE_POS, FALSE);
9885       emit_dst_register(emit, &tmp_dst);
9886       emit_rasterizer_register(emit);
9887       emit_src_register(emit, &sample_index_reg);
9888       end_emit_instruction(emit);
9889 
9890       /* Convert from D3D coords to GL coords by adding 0.5 bias */
9891       /* ADD dst, dst, half */
9892       begin_emit_instruction(emit);
9893       emit_opcode(emit, VGPU10_OPCODE_ADD, FALSE);
9894       emit_dst_register(emit, &tmp_dst);
9895       emit_src_register(emit, &tmp_src);
9896       emit_src_register(emit, &half);
9897       end_emit_instruction(emit);
9898    }
9899 }
9900 
9901 
9902 /**
9903  * Emit extra instructions to adjust VS inputs/attributes.  This can
9904  * mean casting a vertex attribute from int to float or setting the
9905  * W component to 1, or both.
9906  */
9907 static void
emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 * emit)9908 emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit)
9909 {
9910    const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1;
9911    const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof;
9912    const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof;
9913    const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra;
9914    const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm;
9915    const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled;
9916    const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled;
9917 
9918    unsigned adjust_mask = (save_w_1_mask |
9919                            save_itof_mask |
9920                            save_utof_mask |
9921                            save_is_bgra_mask |
9922                            save_puint_to_snorm_mask |
9923                            save_puint_to_uscaled_mask |
9924                            save_puint_to_sscaled_mask);
9925 
9926    assert(emit->unit == PIPE_SHADER_VERTEX);
9927 
9928    if (adjust_mask) {
9929       struct tgsi_full_src_register one =
9930          make_immediate_reg_float(emit, 1.0f);
9931 
9932       struct tgsi_full_src_register one_int =
9933          make_immediate_reg_int(emit, 1);
9934 
9935       /* We need to turn off these bitmasks while emitting the
9936        * instructions below, then restore them afterward.
9937        */
9938       emit->key.vs.adjust_attrib_w_1 = 0;
9939       emit->key.vs.adjust_attrib_itof = 0;
9940       emit->key.vs.adjust_attrib_utof = 0;
9941       emit->key.vs.attrib_is_bgra = 0;
9942       emit->key.vs.attrib_puint_to_snorm = 0;
9943       emit->key.vs.attrib_puint_to_uscaled = 0;
9944       emit->key.vs.attrib_puint_to_sscaled = 0;
9945 
9946       while (adjust_mask) {
9947          unsigned index = u_bit_scan(&adjust_mask);
9948 
9949          /* skip the instruction if this vertex attribute is not being used */
9950          if (emit->info.input_usage_mask[index] == 0)
9951             continue;
9952 
9953          unsigned tmp = emit->vs.adjusted_input[index];
9954          struct tgsi_full_src_register input_src =
9955             make_src_reg(TGSI_FILE_INPUT, index);
9956 
9957          struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
9958          struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
9959          struct tgsi_full_dst_register tmp_dst_w =
9960             writemask_dst(&tmp_dst, TGSI_WRITEMASK_W);
9961 
9962          /* ITOF/UTOF/MOV tmp, input[index] */
9963          if (save_itof_mask & (1 << index)) {
9964             emit_instruction_op1(emit, VGPU10_OPCODE_ITOF,
9965                                  &tmp_dst, &input_src);
9966          }
9967          else if (save_utof_mask & (1 << index)) {
9968             emit_instruction_op1(emit, VGPU10_OPCODE_UTOF,
9969                                  &tmp_dst, &input_src);
9970          }
9971          else if (save_puint_to_snorm_mask & (1 << index)) {
9972             emit_puint_to_snorm(emit, &tmp_dst, &input_src);
9973          }
9974          else if (save_puint_to_uscaled_mask & (1 << index)) {
9975             emit_puint_to_uscaled(emit, &tmp_dst, &input_src);
9976          }
9977          else if (save_puint_to_sscaled_mask & (1 << index)) {
9978             emit_puint_to_sscaled(emit, &tmp_dst, &input_src);
9979          }
9980          else {
9981             assert((save_w_1_mask | save_is_bgra_mask) & (1 << index));
9982             emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
9983                                  &tmp_dst, &input_src);
9984          }
9985 
9986          if (save_is_bgra_mask & (1 << index)) {
9987             emit_swap_r_b(emit, &tmp_dst, &tmp_src);
9988          }
9989 
9990          if (save_w_1_mask & (1 << index)) {
9991             /* MOV tmp.w, 1.0 */
9992             if (emit->key.vs.attrib_is_pure_int & (1 << index)) {
9993                emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
9994                                     &tmp_dst_w, &one_int);
9995             }
9996             else {
9997                emit_instruction_op1(emit, VGPU10_OPCODE_MOV,
9998                                     &tmp_dst_w, &one);
9999             }
10000          }
10001       }
10002 
10003       emit->key.vs.adjust_attrib_w_1 = save_w_1_mask;
10004       emit->key.vs.adjust_attrib_itof = save_itof_mask;
10005       emit->key.vs.adjust_attrib_utof = save_utof_mask;
10006       emit->key.vs.attrib_is_bgra = save_is_bgra_mask;
10007       emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask;
10008       emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask;
10009       emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask;
10010    }
10011 }
10012 
10013 
10014 /* Find zero-value immedate for default layer index */
10015 static void
emit_default_layer_instructions(struct svga_shader_emitter_v10 * emit)10016 emit_default_layer_instructions(struct svga_shader_emitter_v10 *emit)
10017 {
10018    assert(emit->unit == PIPE_SHADER_FRAGMENT);
10019 
10020    /* immediate for default layer index 0 */
10021    if (emit->fs.layer_input_index != INVALID_INDEX) {
10022       union tgsi_immediate_data imm;
10023       imm.Int = 0;
10024       emit->fs.layer_imm_index = find_immediate(emit, imm, 0);
10025    }
10026 }
10027 
10028 
10029 static void
emit_temp_prescale_from_cbuf(struct svga_shader_emitter_v10 * emit,unsigned cbuf_index,struct tgsi_full_dst_register * scale,struct tgsi_full_dst_register * translate)10030 emit_temp_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit,
10031                              unsigned cbuf_index,
10032                              struct tgsi_full_dst_register *scale,
10033                              struct tgsi_full_dst_register *translate)
10034 {
10035    struct tgsi_full_src_register scale_cbuf = make_src_const_reg(cbuf_index);
10036    struct tgsi_full_src_register trans_cbuf = make_src_const_reg(cbuf_index+1);
10037 
10038    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, scale, &scale_cbuf);
10039    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, translate, &trans_cbuf);
10040 }
10041 
10042 
10043 /**
10044  * A recursive helper function to find the prescale from the constant buffer
10045  */
10046 static void
find_prescale_from_cbuf(struct svga_shader_emitter_v10 * emit,unsigned index,unsigned num_prescale,struct tgsi_full_src_register * vp_index,struct tgsi_full_dst_register * scale,struct tgsi_full_dst_register * translate,struct tgsi_full_src_register * tmp_src,struct tgsi_full_dst_register * tmp_dst)10047 find_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit,
10048                         unsigned index, unsigned num_prescale,
10049                         struct tgsi_full_src_register *vp_index,
10050                         struct tgsi_full_dst_register *scale,
10051                         struct tgsi_full_dst_register *translate,
10052                         struct tgsi_full_src_register *tmp_src,
10053                         struct tgsi_full_dst_register *tmp_dst)
10054 {
10055    if (num_prescale == 0)
10056       return;
10057 
10058    if (index > 0) {
10059       /* ELSE */
10060       emit_instruction_op0(emit, VGPU10_OPCODE_ELSE);
10061    }
10062 
10063    struct tgsi_full_src_register index_src =
10064 	                            make_immediate_reg_int(emit, index);
10065 
10066    if (index == 0) {
10067       /* GE tmp, vp_index, index */
10068       emit_instruction_op2(emit, VGPU10_OPCODE_GE, tmp_dst,
10069                            vp_index, &index_src);
10070    } else {
10071       /* EQ tmp, vp_index, index */
10072       emit_instruction_op2(emit, VGPU10_OPCODE_EQ, tmp_dst,
10073                            vp_index, &index_src);
10074    }
10075 
10076    /* IF tmp */
10077    emit_if(emit, tmp_src);
10078    emit_temp_prescale_from_cbuf(emit,
10079                                 emit->vposition.prescale_cbuf_index + 2 * index,
10080                                 scale, translate);
10081 
10082    find_prescale_from_cbuf(emit, index+1, num_prescale-1,
10083                            vp_index, scale, translate,
10084                            tmp_src, tmp_dst);
10085 
10086    /* ENDIF */
10087    emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF);
10088 }
10089 
10090 
10091 /**
10092  * This helper function emits instructions to set the prescale
10093  * and translate temporaries to the correct constants from the
10094  * constant buffer according to the designated viewport.
10095  */
10096 static void
emit_temp_prescale_instructions(struct svga_shader_emitter_v10 * emit)10097 emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit)
10098 {
10099    struct tgsi_full_dst_register prescale_scale =
10100          make_dst_temp_reg(emit->vposition.prescale_scale_index);
10101    struct tgsi_full_dst_register prescale_translate =
10102          make_dst_temp_reg(emit->vposition.prescale_trans_index);
10103 
10104    unsigned prescale_cbuf_index = emit->vposition.prescale_cbuf_index;
10105 
10106    if (emit->vposition.num_prescale == 1) {
10107       emit_temp_prescale_from_cbuf(emit,
10108                                    prescale_cbuf_index,
10109                                    &prescale_scale, &prescale_translate);
10110    } else {
10111       /**
10112        * Since SM5 device does not support dynamic indexing, we need
10113        * to do the if-else to find the prescale constants for the
10114        * specified viewport.
10115        */
10116       struct tgsi_full_src_register vp_index_src =
10117          make_src_temp_reg(emit->gs.viewport_index_tmp_index);
10118 
10119       struct tgsi_full_src_register vp_index_src_x =
10120          scalar_src(&vp_index_src, TGSI_SWIZZLE_X);
10121 
10122       unsigned tmp = get_temp_index(emit);
10123       struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
10124       struct tgsi_full_src_register tmp_src_x =
10125                 scalar_src(&tmp_src, TGSI_SWIZZLE_X);
10126       struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
10127 
10128       find_prescale_from_cbuf(emit, 0, emit->vposition.num_prescale,
10129                               &vp_index_src_x,
10130 		              &prescale_scale, &prescale_translate,
10131                               &tmp_src_x, &tmp_dst);
10132    }
10133 
10134    /* Mark prescale temporaries are emitted */
10135    emit->vposition.have_prescale = 1;
10136 }
10137 
10138 
10139 /**
10140  * A helper function to emit an instruction in a vertex shader to add a bias
10141  * to the VertexID system value. This patches the VertexID in the SVGA vertex
10142  * shader to include the base vertex of an indexed primitive or the start index
10143  * of a non-indexed primitive.
10144  */
10145 static void
emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10 * emit)10146 emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10 *emit)
10147 {
10148    struct tgsi_full_src_register vertex_id_bias_index =
10149       make_src_const_reg(emit->vs.vertex_id_bias_index);
10150    struct tgsi_full_src_register vertex_id_sys_src =
10151       make_src_reg(TGSI_FILE_SYSTEM_VALUE, emit->vs.vertex_id_sys_index);
10152    struct tgsi_full_src_register vertex_id_sys_src_x =
10153       scalar_src(&vertex_id_sys_src, TGSI_SWIZZLE_X);
10154    struct tgsi_full_dst_register vertex_id_tmp_dst =
10155       make_dst_temp_reg(emit->vs.vertex_id_tmp_index);
10156 
10157    /* IADD vertex_id_tmp, vertex_id_sys, vertex_id_bias */
10158    unsigned vertex_id_tmp_index = emit->vs.vertex_id_tmp_index;
10159    emit->vs.vertex_id_tmp_index = INVALID_INDEX;
10160    emit_instruction_opn(emit, VGPU10_OPCODE_IADD, &vertex_id_tmp_dst,
10161                         &vertex_id_sys_src_x, &vertex_id_bias_index, NULL, FALSE,
10162                         FALSE);
10163    emit->vs.vertex_id_tmp_index = vertex_id_tmp_index;
10164 }
10165 
10166 /**
10167  * Hull Shader must have control point outputs. But tessellation
10168  * control shader can return without writing to control point output.
10169  * In this case, the control point output is assumed to be passthrough
10170  * from the control point input.
10171  * This helper function is to write out a control point output first in case
10172  * the tessellation control shader returns before writing a
10173  * control point output.
10174  */
10175 static void
emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 * emit)10176 emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 *emit)
10177 {
10178    assert(emit->unit == PIPE_SHADER_TESS_CTRL);
10179    assert(emit->tcs.control_point_phase);
10180    assert(emit->tcs.control_point_input_index != INVALID_INDEX);
10181    assert(emit->tcs.control_point_out_index != INVALID_INDEX);
10182    assert(emit->tcs.invocation_id_sys_index != INVALID_INDEX);
10183 
10184    /* UARL ADDR[INDEX].x INVOCATION.xxxx */
10185 
10186    struct tgsi_full_src_register invocation_src;
10187    struct tgsi_full_dst_register addr_dst;
10188    struct tgsi_full_dst_register addr_dst_x;
10189    unsigned addr_tmp;
10190 
10191    addr_tmp = emit->address_reg_index[emit->tcs.control_point_addr_index];
10192    addr_dst = make_dst_temp_reg(addr_tmp);
10193    addr_dst_x = writemask_dst(&addr_dst, TGSI_WRITEMASK_X);
10194 
10195    invocation_src = make_src_reg(TGSI_FILE_SYSTEM_VALUE,
10196                                  emit->tcs.invocation_id_sys_index);
10197 
10198    begin_emit_instruction(emit);
10199    emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE);
10200    emit_dst_register(emit, &addr_dst_x);
10201    emit_src_register(emit, &invocation_src);
10202    end_emit_instruction(emit);
10203 
10204 
10205    /* MOV OUTPUT INPUT[ADDR[INDEX].x][POSITION] */
10206 
10207    struct tgsi_full_src_register input_control_point;
10208    struct tgsi_full_dst_register output_control_point;
10209 
10210    input_control_point = make_src_reg(TGSI_FILE_INPUT,
10211                                       emit->tcs.control_point_input_index);
10212    input_control_point.Register.Dimension = 1;
10213    input_control_point.Dimension.Indirect = 1;
10214    input_control_point.DimIndirect.File = TGSI_FILE_ADDRESS;
10215    input_control_point.DimIndirect.Index = emit->tcs.control_point_addr_index;
10216    output_control_point =
10217       make_dst_output_reg(emit->tcs.control_point_out_index);
10218 
10219    begin_emit_instruction(emit);
10220    emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE);
10221    emit_dst_register(emit, &output_control_point);
10222    emit_src_register(emit, &input_control_point);
10223    end_emit_instruction(emit);
10224 }
10225 
10226 /**
10227  * This functions constructs temporary tessfactor from VGPU10*_TESSFACTOR
10228  * values in domain shader. SM5 has tessfactors as floating point values where
10229  * as tgsi emit them as vector. This function allows to construct temp
10230  * tessfactor vector similar to TGSI_SEMANTIC_TESSINNER/OUTER filled with
10231  * values from VGPU10*_TESSFACTOR. Use this constructed vector whenever
10232  * TGSI_SEMANTIC_TESSINNER/OUTER is used in shader.
10233  */
10234 static void
emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 * emit)10235 emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 *emit)
10236 {
10237    struct tgsi_full_src_register src;
10238    struct tgsi_full_dst_register dst;
10239 
10240    if (emit->tes.inner.tgsi_index != INVALID_INDEX) {
10241       dst = make_dst_temp_reg(emit->tes.inner.temp_index);
10242 
10243       switch (emit->tes.prim_mode) {
10244       case PIPE_PRIM_QUADS:
10245          src = make_src_scalar_reg(TGSI_FILE_INPUT,
10246                   emit->tes.inner.in_index + 1, TGSI_SWIZZLE_X);
10247          dst = writemask_dst(&dst, TGSI_WRITEMASK_Y);
10248          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10249          /* fallthrough */
10250       case PIPE_PRIM_TRIANGLES:
10251          src = make_src_scalar_reg(TGSI_FILE_INPUT,
10252                   emit->tes.inner.in_index, TGSI_SWIZZLE_X);
10253          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
10254          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10255          break;
10256       case PIPE_PRIM_LINES:
10257          /**
10258           * As per SM5 spec, InsideTessFactor for isolines are unused.
10259           * In fact glsl tessInnerLevel for isolines doesn't mean anything but if
10260           * any application try to read tessInnerLevel in TES when primitive type
10261           * is isolines, then instead of driver throwing segfault for accesing it,
10262           * return atleast vec(1.0f)
10263           */
10264          src = make_immediate_reg_float(emit, 1.0f);
10265          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10266          break;
10267       default:
10268          break;
10269       }
10270    }
10271 
10272    if (emit->tes.outer.tgsi_index != INVALID_INDEX) {
10273       dst = make_dst_temp_reg(emit->tes.outer.temp_index);
10274 
10275       switch (emit->tes.prim_mode) {
10276       case PIPE_PRIM_QUADS:
10277          src = make_src_scalar_reg(TGSI_FILE_INPUT,
10278                   emit->tes.outer.in_index + 3, TGSI_SWIZZLE_X);
10279          dst = writemask_dst(&dst, TGSI_WRITEMASK_W);
10280          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10281          /* fallthrough */
10282       case PIPE_PRIM_TRIANGLES:
10283          src = make_src_scalar_reg(TGSI_FILE_INPUT,
10284                   emit->tes.outer.in_index + 2, TGSI_SWIZZLE_X);
10285          dst = writemask_dst(&dst, TGSI_WRITEMASK_Z);
10286          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10287          /* fallthrough */
10288       case PIPE_PRIM_LINES:
10289          src = make_src_scalar_reg(TGSI_FILE_INPUT,
10290                   emit->tes.outer.in_index + 1, TGSI_SWIZZLE_X);
10291          dst = writemask_dst(&dst, TGSI_WRITEMASK_Y);
10292          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10293 
10294          src = make_src_scalar_reg(TGSI_FILE_INPUT,
10295                   emit->tes.outer.in_index , TGSI_SWIZZLE_X);
10296          dst = writemask_dst(&dst, TGSI_WRITEMASK_X);
10297          emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10298 
10299          break;
10300       default:
10301          break;
10302       }
10303    }
10304 }
10305 
10306 
10307 static void
emit_initialize_temp_instruction(struct svga_shader_emitter_v10 * emit)10308 emit_initialize_temp_instruction(struct svga_shader_emitter_v10 *emit)
10309 {
10310    struct tgsi_full_src_register src;
10311    struct tgsi_full_dst_register dst;
10312    unsigned vgpu10_temp_index = remap_temp_index(emit, TGSI_FILE_TEMPORARY,
10313                                                  emit->initialize_temp_index);
10314    src = make_immediate_reg_float(emit, 0.0f);
10315    dst = make_dst_temp_reg(vgpu10_temp_index);
10316    emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src);
10317    emit->temp_map[emit->initialize_temp_index].initialized = TRUE;
10318    emit->initialize_temp_index = INVALID_INDEX;
10319 }
10320 
10321 
10322 /**
10323  * Emit any extra/helper declarations/code that we might need between
10324  * the declaration section and code section.
10325  */
10326 static boolean
emit_pre_helpers(struct svga_shader_emitter_v10 * emit)10327 emit_pre_helpers(struct svga_shader_emitter_v10 *emit)
10328 {
10329    /* Properties */
10330    if (emit->unit == PIPE_SHADER_GEOMETRY)
10331       emit_property_instructions(emit);
10332    else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
10333       emit_hull_shader_declarations(emit);
10334 
10335       /* Save the position of the first instruction token so that we can
10336        * do a second pass of the instructions for the patch constant phase.
10337        */
10338       emit->tcs.instruction_token_pos = emit->cur_tgsi_token;
10339       emit->tcs.fork_phase_add_signature = FALSE;
10340 
10341       if (!emit_hull_shader_control_point_phase(emit)) {
10342          emit->skip_instruction = TRUE;
10343          return TRUE;
10344       }
10345 
10346       /* Set the current tcs phase to control point phase */
10347       emit->tcs.control_point_phase = TRUE;
10348    }
10349    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
10350       emit_domain_shader_declarations(emit);
10351    }
10352 
10353    /* Declare inputs */
10354    if (!emit_input_declarations(emit))
10355       return FALSE;
10356 
10357    /* Declare outputs */
10358    if (!emit_output_declarations(emit))
10359       return FALSE;
10360 
10361    /* Declare temporary registers */
10362    emit_temporaries_declaration(emit);
10363 
10364    /* For PIPE_SHADER_TESS_CTRL, constants, samplers, resources and immediates
10365     * will already be declared in hs_decls (emit_hull_shader_declarations)
10366     */
10367    if (emit->unit != PIPE_SHADER_TESS_CTRL) {
10368       /* Declare constant registers */
10369       emit_constant_declaration(emit);
10370 
10371       /* Declare samplers and resources */
10372       emit_sampler_declarations(emit);
10373       emit_resource_declarations(emit);
10374 
10375       alloc_common_immediates(emit);
10376       /* Now, emit the constant block containing all the immediates
10377        * declared by shader, as well as the extra ones seen above.
10378        */
10379    }
10380 
10381    if (emit->unit != PIPE_SHADER_FRAGMENT) {
10382       /*
10383        * Declare clip distance output registers for ClipVertex or
10384        * user defined planes
10385        */
10386       emit_clip_distance_declarations(emit);
10387    }
10388 
10389    if (emit->unit == PIPE_SHADER_FRAGMENT &&
10390        emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
10391       float alpha = emit->key.fs.alpha_ref;
10392       emit->fs.alpha_ref_index =
10393          alloc_immediate_float4(emit, alpha, alpha, alpha, alpha);
10394    }
10395 
10396    if (emit->unit != PIPE_SHADER_TESS_CTRL) {
10397       /**
10398        * For PIPE_SHADER_TESS_CTRL, immediates are already declared in
10399        * hs_decls
10400        */
10401       emit_vgpu10_immediates_block(emit);
10402    }
10403    else {
10404       emit_tcs_default_control_point_output(emit);
10405    }
10406 
10407    if (emit->unit == PIPE_SHADER_FRAGMENT) {
10408       emit_frontface_instructions(emit);
10409       emit_fragcoord_instructions(emit);
10410       emit_sample_position_instructions(emit);
10411       emit_default_layer_instructions(emit);
10412    }
10413    else if (emit->unit == PIPE_SHADER_VERTEX) {
10414       emit_vertex_attrib_instructions(emit);
10415 
10416       if (emit->info.uses_vertexid)
10417          emit_vertex_id_nobase_instruction(emit);
10418    }
10419    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
10420       emit_temp_tessfactor_instructions(emit);
10421    }
10422 
10423    /**
10424     * For geometry shader that writes to viewport index, the prescale
10425     * temporaries will be done at the first vertex emission.
10426     */
10427    if (emit->vposition.need_prescale && emit->vposition.num_prescale == 1)
10428       emit_temp_prescale_instructions(emit);
10429 
10430    return TRUE;
10431 }
10432 
10433 
10434 /**
10435  * The device has no direct support for the pipe_blend_state::alpha_to_one
10436  * option so we implement it here with shader code.
10437  *
10438  * Note that this is kind of pointless, actually.  Here we're clobbering
10439  * the alpha value with 1.0.  So if alpha-to-coverage is enabled, we'll wind
10440  * up with 100% coverage.  That's almost certainly not what the user wants.
10441  * The work-around is to add extra shader code to compute coverage from alpha
10442  * and write it to the coverage output register (if the user's shader doesn't
10443  * do so already).  We'll probably do that in the future.
10444  */
10445 static void
emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)10446 emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 *emit,
10447                                unsigned fs_color_tmp_index)
10448 {
10449    struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f);
10450    unsigned i;
10451 
10452    /* Note: it's not 100% clear from the spec if we're supposed to clobber
10453     * the alpha for all render targets.  But that's what NVIDIA does and
10454     * that's what Piglit tests.
10455     */
10456    for (i = 0; i < emit->fs.num_color_outputs; i++) {
10457       struct tgsi_full_dst_register color_dst;
10458 
10459       if (fs_color_tmp_index != INVALID_INDEX && i == 0) {
10460          /* write to the temp color register */
10461          color_dst = make_dst_temp_reg(fs_color_tmp_index);
10462       }
10463       else {
10464          /* write directly to the color[i] output */
10465          color_dst = make_dst_output_reg(emit->fs.color_out_index[i]);
10466       }
10467 
10468       color_dst = writemask_dst(&color_dst, TGSI_WRITEMASK_W);
10469 
10470       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &one);
10471    }
10472 }
10473 
10474 
10475 /**
10476  * Emit alpha test code.  This compares TEMP[fs_color_tmp_index].w
10477  * against the alpha reference value and discards the fragment if the
10478  * comparison fails.
10479  */
10480 static void
emit_alpha_test_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)10481 emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit,
10482                              unsigned fs_color_tmp_index)
10483 {
10484    /* compare output color's alpha to alpha ref and kill */
10485    unsigned tmp = get_temp_index(emit);
10486    struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp);
10487    struct tgsi_full_src_register tmp_src_x =
10488       scalar_src(&tmp_src, TGSI_SWIZZLE_X);
10489    struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp);
10490    struct tgsi_full_src_register color_src =
10491       make_src_temp_reg(fs_color_tmp_index);
10492    struct tgsi_full_src_register color_src_w =
10493       scalar_src(&color_src, TGSI_SWIZZLE_W);
10494    struct tgsi_full_src_register ref_src =
10495       make_src_immediate_reg(emit->fs.alpha_ref_index);
10496    struct tgsi_full_dst_register color_dst =
10497       make_dst_output_reg(emit->fs.color_out_index[0]);
10498 
10499    assert(emit->unit == PIPE_SHADER_FRAGMENT);
10500 
10501    /* dst = src0 'alpha_func' src1 */
10502    emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst,
10503                    &color_src_w, &ref_src);
10504 
10505    /* DISCARD if dst.x == 0 */
10506    begin_emit_instruction(emit);
10507    emit_discard_opcode(emit, FALSE);  /* discard if src0.x is zero */
10508    emit_src_register(emit, &tmp_src_x);
10509    end_emit_instruction(emit);
10510 
10511    /* If we don't need to broadcast the color below, emit the final color here.
10512     */
10513    if (emit->key.fs.write_color0_to_n_cbufs <= 1) {
10514       /* MOV output.color, tempcolor */
10515       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src);
10516    }
10517 
10518    free_temp_indexes(emit);
10519 }
10520 
10521 
10522 /**
10523  * Emit instructions for writing a single color output to multiple
10524  * color buffers.
10525  * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or
10526  * when key.fs.white_fragments is true).
10527  * property is set and the number of render targets is greater than one.
10528  * \param fs_color_tmp_index  index of the temp register that holds the
10529  *                            color to broadcast.
10530  */
10531 static void
emit_broadcast_color_instructions(struct svga_shader_emitter_v10 * emit,unsigned fs_color_tmp_index)10532 emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit,
10533                                  unsigned fs_color_tmp_index)
10534 {
10535    const unsigned n = emit->key.fs.write_color0_to_n_cbufs;
10536    unsigned i;
10537    struct tgsi_full_src_register color_src;
10538 
10539    if (emit->key.fs.white_fragments) {
10540       /* set all color outputs to white */
10541       color_src = make_immediate_reg_float(emit, 1.0f);
10542    }
10543    else {
10544       /* set all color outputs to TEMP[fs_color_tmp_index] */
10545       assert(fs_color_tmp_index != INVALID_INDEX);
10546       color_src = make_src_temp_reg(fs_color_tmp_index);
10547    }
10548 
10549    assert(emit->unit == PIPE_SHADER_FRAGMENT);
10550 
10551    for (i = 0; i < n; i++) {
10552       unsigned output_reg = emit->fs.color_out_index[i];
10553       struct tgsi_full_dst_register color_dst =
10554          make_dst_output_reg(output_reg);
10555 
10556       /* Fill in this semantic here since we'll use it later in
10557        * emit_dst_register().
10558        */
10559       emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR;
10560 
10561       /* MOV output.color[i], tempcolor */
10562       emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src);
10563    }
10564 }
10565 
10566 
10567 /**
10568  * Emit extra helper code after the original shader code, but before the
10569  * last END/RET instruction.
10570  * For vertex shaders this means emitting the extra code to apply the
10571  * prescale scale/translation.
10572  */
10573 static boolean
emit_post_helpers(struct svga_shader_emitter_v10 * emit)10574 emit_post_helpers(struct svga_shader_emitter_v10 *emit)
10575 {
10576    if (emit->unit == PIPE_SHADER_VERTEX) {
10577       emit_vertex_instructions(emit);
10578    }
10579    else if (emit->unit == PIPE_SHADER_FRAGMENT) {
10580       const unsigned fs_color_tmp_index = emit->fs.color_tmp_index;
10581 
10582       assert(!(emit->key.fs.white_fragments &&
10583                emit->key.fs.write_color0_to_n_cbufs == 0));
10584 
10585       /* We no longer want emit_dst_register() to substitute the
10586        * temporary fragment color register for the real color output.
10587        */
10588       emit->fs.color_tmp_index = INVALID_INDEX;
10589 
10590       if (emit->key.fs.alpha_to_one) {
10591          emit_alpha_to_one_instructions(emit, fs_color_tmp_index);
10592       }
10593       if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) {
10594          emit_alpha_test_instructions(emit, fs_color_tmp_index);
10595       }
10596       if (emit->key.fs.write_color0_to_n_cbufs > 1 ||
10597           emit->key.fs.white_fragments) {
10598          emit_broadcast_color_instructions(emit, fs_color_tmp_index);
10599       }
10600    }
10601    else if (emit->unit == PIPE_SHADER_TESS_CTRL) {
10602       if (!emit->tcs.control_point_phase) {
10603          /* store the tessellation levels in the patch constant phase only */
10604          store_tesslevels(emit);
10605       }
10606       else {
10607          emit_clipping_instructions(emit);
10608       }
10609    }
10610    else if (emit->unit == PIPE_SHADER_TESS_EVAL) {
10611       emit_vertex_instructions(emit);
10612    }
10613 
10614    return TRUE;
10615 }
10616 
10617 
10618 /**
10619  * Translate the TGSI tokens into VGPU10 tokens.
10620  */
10621 static boolean
emit_vgpu10_instructions(struct svga_shader_emitter_v10 * emit,const struct tgsi_token * tokens)10622 emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit,
10623                          const struct tgsi_token *tokens)
10624 {
10625    struct tgsi_parse_context parse;
10626    boolean ret = TRUE;
10627    boolean pre_helpers_emitted = FALSE;
10628    unsigned inst_number = 0;
10629 
10630    tgsi_parse_init(&parse, tokens);
10631 
10632    while (!tgsi_parse_end_of_tokens(&parse)) {
10633 
10634       /* Save the current tgsi token starting position */
10635       emit->cur_tgsi_token = parse.Position;
10636 
10637       tgsi_parse_token(&parse);
10638 
10639       switch (parse.FullToken.Token.Type) {
10640       case TGSI_TOKEN_TYPE_IMMEDIATE:
10641          ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate);
10642          if (!ret)
10643             goto done;
10644          break;
10645 
10646       case TGSI_TOKEN_TYPE_DECLARATION:
10647          ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration);
10648          if (!ret)
10649             goto done;
10650          break;
10651 
10652       case TGSI_TOKEN_TYPE_INSTRUCTION:
10653          if (!pre_helpers_emitted) {
10654             ret = emit_pre_helpers(emit);
10655             if (!ret)
10656                goto done;
10657             pre_helpers_emitted = TRUE;
10658          }
10659          ret = emit_vgpu10_instruction(emit, inst_number++,
10660                                        &parse.FullToken.FullInstruction);
10661 
10662          /* Usually this applies to TCS only. If shader is reading control
10663           * point outputs in control point phase, we should reemit all
10664           * instructions which are writting into control point output in
10665           * control phase to store results into temporaries.
10666           */
10667          if (emit->reemit_instruction) {
10668             assert(emit->unit == PIPE_SHADER_TESS_CTRL);
10669             ret = emit_vgpu10_instruction(emit, inst_number,
10670                                           &parse.FullToken.FullInstruction);
10671          }
10672          else if (emit->initialize_temp_index != INVALID_INDEX) {
10673             emit_initialize_temp_instruction(emit);
10674             emit->initialize_temp_index = INVALID_INDEX;
10675             ret = emit_vgpu10_instruction(emit, inst_number - 1,
10676                                           &parse.FullToken.FullInstruction);
10677          }
10678 
10679          if (!ret)
10680             goto done;
10681          break;
10682 
10683       case TGSI_TOKEN_TYPE_PROPERTY:
10684          ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty);
10685          if (!ret)
10686             goto done;
10687          break;
10688 
10689       default:
10690          break;
10691       }
10692    }
10693 
10694    if (emit->unit == PIPE_SHADER_TESS_CTRL) {
10695       ret = emit_hull_shader_patch_constant_phase(emit, &parse);
10696    }
10697 
10698 done:
10699    tgsi_parse_free(&parse);
10700    return ret;
10701 }
10702 
10703 
10704 /**
10705  * Emit the first VGPU10 shader tokens.
10706  */
10707 static boolean
emit_vgpu10_header(struct svga_shader_emitter_v10 * emit)10708 emit_vgpu10_header(struct svga_shader_emitter_v10 *emit)
10709 {
10710    VGPU10ProgramToken ptoken;
10711 
10712    /* First token: VGPU10ProgramToken  (version info, program type (VS,GS,PS)) */
10713    ptoken.value = 0; /* init whole token to zero */
10714    ptoken.majorVersion = emit->version / 10;
10715    ptoken.minorVersion = emit->version % 10;
10716    ptoken.programType = translate_shader_type(emit->unit);
10717    if (!emit_dword(emit, ptoken.value))
10718       return FALSE;
10719 
10720    /* Second token: total length of shader, in tokens.  We can't fill this
10721     * in until we're all done.  Emit zero for now.
10722     */
10723    if (!emit_dword(emit, 0))
10724       return FALSE;
10725 
10726    if (emit->version >= 50) {
10727       VGPU10OpcodeToken0 token;
10728 
10729       if (emit->unit == PIPE_SHADER_TESS_CTRL) {
10730          /* For hull shader, we need to start the declarations phase first before
10731           * emitting any declarations including the global flags.
10732           */
10733          token.value = 0;
10734          token.opcodeType = VGPU10_OPCODE_HS_DECLS;
10735          begin_emit_instruction(emit);
10736          emit_dword(emit, token.value);
10737          end_emit_instruction(emit);
10738       }
10739 
10740       /* Emit global flags */
10741       token.value = 0;    /* init whole token to zero */
10742       token.opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
10743       token.enableDoublePrecisionFloatOps = 1;  /* set bit */
10744       token.instructionLength = 1;
10745       if (!emit_dword(emit, token.value))
10746          return FALSE;
10747    }
10748 
10749    if (emit->version >= 40) {
10750       VGPU10OpcodeToken0 token;
10751 
10752       /* Reserved for global flag such as refactoringAllowed.
10753        * If the shader does not use the precise qualifier, we will set the
10754        * refactoringAllowed global flag; otherwise, we will leave the reserved
10755        * token to NOP.
10756        */
10757       emit->reserved_token = (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0);
10758       token.value = 0;
10759       token.opcodeType = VGPU10_OPCODE_NOP;
10760       token.instructionLength = 1;
10761       if (!emit_dword(emit, token.value))
10762          return FALSE;
10763    }
10764 
10765    return TRUE;
10766 }
10767 
10768 
10769 static boolean
emit_vgpu10_tail(struct svga_shader_emitter_v10 * emit)10770 emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit)
10771 {
10772    VGPU10ProgramToken *tokens;
10773 
10774    /* Replace the second token with total shader length */
10775    tokens = (VGPU10ProgramToken *) emit->buf;
10776    tokens[1].value = emit_get_num_tokens(emit);
10777 
10778    if (emit->version >= 40 && !emit->uses_precise_qualifier) {
10779       /* Replace the reserved token with the RefactoringAllowed global flag */
10780       VGPU10OpcodeToken0 *ptoken;
10781 
10782       ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token];
10783       assert(ptoken->opcodeType == VGPU10_OPCODE_NOP);
10784       ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS;
10785       ptoken->refactoringAllowed = 1;
10786    }
10787 
10788    return TRUE;
10789 }
10790 
10791 
10792 /**
10793  * Modify the FS to read the BCOLORs and use the FACE register
10794  * to choose between the front/back colors.
10795  */
10796 static const struct tgsi_token *
transform_fs_twoside(const struct tgsi_token * tokens)10797 transform_fs_twoside(const struct tgsi_token *tokens)
10798 {
10799    if (0) {
10800       debug_printf("Before tgsi_add_two_side ------------------\n");
10801       tgsi_dump(tokens,0);
10802    }
10803    tokens = tgsi_add_two_side(tokens);
10804    if (0) {
10805       debug_printf("After tgsi_add_two_side ------------------\n");
10806       tgsi_dump(tokens, 0);
10807    }
10808    return tokens;
10809 }
10810 
10811 
10812 /**
10813  * Modify the FS to do polygon stipple.
10814  */
10815 static const struct tgsi_token *
transform_fs_pstipple(struct svga_shader_emitter_v10 * emit,const struct tgsi_token * tokens)10816 transform_fs_pstipple(struct svga_shader_emitter_v10 *emit,
10817                       const struct tgsi_token *tokens)
10818 {
10819    const struct tgsi_token *new_tokens;
10820    unsigned unit;
10821 
10822    if (0) {
10823       debug_printf("Before pstipple ------------------\n");
10824       tgsi_dump(tokens,0);
10825    }
10826 
10827    new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
10828                                                      TGSI_FILE_INPUT);
10829 
10830    emit->fs.pstipple_sampler_unit = unit;
10831 
10832    /* Setup texture state for stipple */
10833    emit->sampler_target[unit] = TGSI_TEXTURE_2D;
10834    emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
10835    emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
10836    emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
10837    emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
10838 
10839    if (0) {
10840       debug_printf("After pstipple ------------------\n");
10841       tgsi_dump(new_tokens, 0);
10842    }
10843 
10844    return new_tokens;
10845 }
10846 
10847 /**
10848  * Modify the FS to support anti-aliasing point.
10849  */
10850 static const struct tgsi_token *
transform_fs_aapoint(const struct tgsi_token * tokens,int aa_coord_index)10851 transform_fs_aapoint(const struct tgsi_token *tokens,
10852                      int aa_coord_index)
10853 {
10854    if (0) {
10855       debug_printf("Before tgsi_add_aa_point ------------------\n");
10856       tgsi_dump(tokens,0);
10857    }
10858    tokens = tgsi_add_aa_point(tokens, aa_coord_index);
10859    if (0) {
10860       debug_printf("After tgsi_add_aa_point ------------------\n");
10861       tgsi_dump(tokens, 0);
10862    }
10863    return tokens;
10864 }
10865 
10866 
10867 /**
10868  * A helper function to determine the shader in the previous stage and
10869  * then call the linker function to determine the input mapping for this
10870  * shader to match the output indices from the shader in the previous stage.
10871  */
10872 static void
compute_input_mapping(struct svga_context * svga,struct svga_shader_emitter_v10 * emit,enum pipe_shader_type unit)10873 compute_input_mapping(struct svga_context *svga,
10874                       struct svga_shader_emitter_v10 *emit,
10875                       enum pipe_shader_type unit)
10876 {
10877    struct svga_shader *prevShader = NULL;   /* shader in the previous stage */
10878 
10879    if (unit == PIPE_SHADER_FRAGMENT) {
10880       prevShader = svga->curr.gs ?
10881          &svga->curr.gs->base : (svga->curr.tes ?
10882          &svga->curr.tes->base : &svga->curr.vs->base);
10883    } else if (unit == PIPE_SHADER_GEOMETRY) {
10884       prevShader = svga->curr.tes ? &svga->curr.tes->base : &svga->curr.vs->base;
10885    } else if (unit == PIPE_SHADER_TESS_EVAL) {
10886       assert(svga->curr.tcs);
10887       prevShader = &svga->curr.tcs->base;
10888    } else if (unit == PIPE_SHADER_TESS_CTRL) {
10889       assert(svga->curr.vs);
10890       prevShader = &svga->curr.vs->base;
10891    }
10892 
10893    if (prevShader != NULL) {
10894       svga_link_shaders(&prevShader->info, &emit->info, &emit->linkage);
10895       emit->prevShaderInfo = &prevShader->info;
10896    }
10897    else {
10898       /**
10899        * Since vertex shader does not need to go through the linker to
10900        * establish the input map, we need to make sure the highest index
10901        * of input registers is set properly here.
10902        */
10903       emit->linkage.input_map_max = MAX2((int)emit->linkage.input_map_max,
10904                                          emit->info.file_max[TGSI_FILE_INPUT]);
10905    }
10906 }
10907 
10908 
10909 /**
10910  * Copies the shader signature info to the shader variant
10911  */
10912 static void
copy_shader_signature(struct svga_shader_signature * sgn,struct svga_shader_variant * variant)10913 copy_shader_signature(struct svga_shader_signature *sgn,
10914                       struct svga_shader_variant *variant)
10915 {
10916    SVGA3dDXShaderSignatureHeader *header = &sgn->header;
10917 
10918    /* Calculate the signature length */
10919    variant->signatureLen = sizeof(SVGA3dDXShaderSignatureHeader) +
10920                            (header->numInputSignatures +
10921                             header->numOutputSignatures +
10922                             header->numPatchConstantSignatures) *
10923                            sizeof(SVGA3dDXShaderSignatureEntry);
10924 
10925    /* Allocate buffer for the signature info */
10926    variant->signature =
10927       (SVGA3dDXShaderSignatureHeader *)CALLOC(1, variant->signatureLen);
10928 
10929    char *sgnBuf = (char *)variant->signature;
10930    unsigned sgnLen;
10931 
10932    /* Copy the signature info to the shader variant structure */
10933    memcpy(sgnBuf, &sgn->header, sizeof(SVGA3dDXShaderSignatureHeader));
10934    sgnBuf += sizeof(SVGA3dDXShaderSignatureHeader);
10935 
10936    if (header->numInputSignatures) {
10937       sgnLen =
10938          header->numInputSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
10939       memcpy(sgnBuf, &sgn->inputs[0], sgnLen);
10940       sgnBuf += sgnLen;
10941    }
10942 
10943    if (header->numOutputSignatures) {
10944       sgnLen =
10945          header->numOutputSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
10946       memcpy(sgnBuf, &sgn->outputs[0], sgnLen);
10947       sgnBuf += sgnLen;
10948    }
10949 
10950    if (header->numPatchConstantSignatures) {
10951       sgnLen =
10952          header->numPatchConstantSignatures * sizeof(SVGA3dDXShaderSignatureEntry);
10953       memcpy(sgnBuf, &sgn->patchConstants[0], sgnLen);
10954    }
10955 }
10956 
10957 
10958 /**
10959  * This is the main entrypoint for the TGSI -> VPGU10 translator.
10960  */
10961 struct svga_shader_variant *
svga_tgsi_vgpu10_translate(struct svga_context * svga,const struct svga_shader * shader,const struct svga_compile_key * key,enum pipe_shader_type unit)10962 svga_tgsi_vgpu10_translate(struct svga_context *svga,
10963                            const struct svga_shader *shader,
10964                            const struct svga_compile_key *key,
10965                            enum pipe_shader_type unit)
10966 {
10967    struct svga_shader_variant *variant = NULL;
10968    struct svga_shader_emitter_v10 *emit;
10969    const struct tgsi_token *tokens = shader->tokens;
10970 
10971    (void) make_immediate_reg_double;   /* unused at this time */
10972 
10973    assert(unit == PIPE_SHADER_VERTEX ||
10974           unit == PIPE_SHADER_GEOMETRY ||
10975           unit == PIPE_SHADER_FRAGMENT ||
10976           unit == PIPE_SHADER_TESS_CTRL ||
10977           unit == PIPE_SHADER_TESS_EVAL ||
10978           unit == PIPE_SHADER_COMPUTE);
10979 
10980    /* These two flags cannot be used together */
10981    assert(key->vs.need_prescale + key->vs.undo_viewport <= 1);
10982 
10983    SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_TGSIVGPU10TRANSLATE);
10984    /*
10985     * Setup the code emitter
10986     */
10987    emit = alloc_emitter();
10988    if (!emit)
10989       goto done;
10990 
10991    emit->unit = unit;
10992    if (svga_have_sm5(svga)) {
10993       emit->version = 50;
10994    } else if (svga_have_sm4_1(svga)) {
10995       emit->version = 41;
10996    } else {
10997       emit->version = 40;
10998    }
10999 
11000    emit->signature.header.headerVersion = SVGADX_SIGNATURE_HEADER_VERSION_0;
11001 
11002    emit->key = *key;
11003 
11004    emit->vposition.need_prescale = (emit->key.vs.need_prescale ||
11005                                     emit->key.gs.need_prescale ||
11006                                     emit->key.tes.need_prescale);
11007 
11008    /* Determine how many prescale factors in the constant buffer */
11009    emit->vposition.num_prescale = 1;
11010    if (emit->vposition.need_prescale && emit->key.gs.writes_viewport_index) {
11011       assert(emit->unit == PIPE_SHADER_GEOMETRY);
11012       emit->vposition.num_prescale = emit->key.gs.num_prescale;
11013    }
11014 
11015    emit->vposition.tmp_index = INVALID_INDEX;
11016    emit->vposition.so_index = INVALID_INDEX;
11017    emit->vposition.out_index = INVALID_INDEX;
11018 
11019    emit->vs.vertex_id_sys_index = INVALID_INDEX;
11020    emit->vs.vertex_id_tmp_index = INVALID_INDEX;
11021    emit->vs.vertex_id_bias_index = INVALID_INDEX;
11022 
11023    emit->fs.color_tmp_index = INVALID_INDEX;
11024    emit->fs.face_input_index = INVALID_INDEX;
11025    emit->fs.fragcoord_input_index = INVALID_INDEX;
11026    emit->fs.sample_id_sys_index = INVALID_INDEX;
11027    emit->fs.sample_pos_sys_index = INVALID_INDEX;
11028    emit->fs.sample_mask_in_sys_index = INVALID_INDEX;
11029    emit->fs.layer_input_index = INVALID_INDEX;
11030    emit->fs.layer_imm_index = INVALID_INDEX;
11031 
11032    emit->gs.prim_id_index = INVALID_INDEX;
11033    emit->gs.invocation_id_sys_index = INVALID_INDEX;
11034    emit->gs.viewport_index_out_index = INVALID_INDEX;
11035    emit->gs.viewport_index_tmp_index = INVALID_INDEX;
11036 
11037    emit->tcs.vertices_per_patch_index = INVALID_INDEX;
11038    emit->tcs.invocation_id_sys_index = INVALID_INDEX;
11039    emit->tcs.control_point_input_index = INVALID_INDEX;
11040    emit->tcs.control_point_addr_index = INVALID_INDEX;
11041    emit->tcs.control_point_out_index = INVALID_INDEX;
11042    emit->tcs.control_point_tmp_index = INVALID_INDEX;
11043    emit->tcs.control_point_out_count = 0;
11044    emit->tcs.inner.out_index = INVALID_INDEX;
11045    emit->tcs.inner.out_index = INVALID_INDEX;
11046    emit->tcs.inner.temp_index = INVALID_INDEX;
11047    emit->tcs.inner.tgsi_index = INVALID_INDEX;
11048    emit->tcs.outer.out_index = INVALID_INDEX;
11049    emit->tcs.outer.temp_index = INVALID_INDEX;
11050    emit->tcs.outer.tgsi_index = INVALID_INDEX;
11051    emit->tcs.patch_generic_out_count = 0;
11052    emit->tcs.patch_generic_out_index = INVALID_INDEX;
11053    emit->tcs.patch_generic_tmp_index = INVALID_INDEX;
11054    emit->tcs.prim_id_index = INVALID_INDEX;
11055 
11056    emit->tes.tesscoord_sys_index = INVALID_INDEX;
11057    emit->tes.inner.in_index = INVALID_INDEX;
11058    emit->tes.inner.temp_index = INVALID_INDEX;
11059    emit->tes.inner.tgsi_index = INVALID_INDEX;
11060    emit->tes.outer.in_index = INVALID_INDEX;
11061    emit->tes.outer.temp_index = INVALID_INDEX;
11062    emit->tes.outer.tgsi_index = INVALID_INDEX;
11063    emit->tes.prim_id_index = INVALID_INDEX;
11064 
11065    emit->clip_dist_out_index = INVALID_INDEX;
11066    emit->clip_dist_tmp_index = INVALID_INDEX;
11067    emit->clip_dist_so_index = INVALID_INDEX;
11068    emit->clip_vertex_out_index = INVALID_INDEX;
11069    emit->clip_vertex_tmp_index = INVALID_INDEX;
11070    emit->svga_debug_callback = svga->debug.callback;
11071 
11072    emit->index_range.start_index = INVALID_INDEX;
11073    emit->index_range.count = 0;
11074    emit->index_range.required = FALSE;
11075    emit->index_range.operandType = VGPU10_NUM_OPERANDS;
11076    emit->index_range.dim = 0;
11077    emit->index_range.size = 0;
11078 
11079    emit->current_loop_depth = 0;
11080 
11081    emit->initialize_temp_index = INVALID_INDEX;
11082 
11083    if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) {
11084       emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS;
11085    }
11086 
11087    if (unit == PIPE_SHADER_FRAGMENT) {
11088       if (key->fs.light_twoside) {
11089          tokens = transform_fs_twoside(tokens);
11090       }
11091       if (key->fs.pstipple) {
11092          const struct tgsi_token *new_tokens =
11093             transform_fs_pstipple(emit, tokens);
11094          if (tokens != shader->tokens) {
11095             /* free the two-sided shader tokens */
11096             tgsi_free_tokens(tokens);
11097          }
11098          tokens = new_tokens;
11099       }
11100       if (key->fs.aa_point) {
11101          tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index);
11102       }
11103    }
11104 
11105    if (SVGA_DEBUG & DEBUG_TGSI) {
11106       debug_printf("#####################################\n");
11107       debug_printf("### TGSI Shader %u\n", shader->id);
11108       tgsi_dump(tokens, 0);
11109    }
11110 
11111    /**
11112     * Rescan the header if the token string is different from the one
11113     * included in the shader; otherwise, the header info is already up-to-date
11114     */
11115    if (tokens != shader->tokens) {
11116       tgsi_scan_shader(tokens, &emit->info);
11117    } else {
11118       emit->info = shader->info;
11119    }
11120 
11121    emit->num_outputs = emit->info.num_outputs;
11122 
11123    /**
11124     * Compute input mapping to match the outputs from shader
11125     * in the previous stage
11126     */
11127    compute_input_mapping(svga, emit, unit);
11128 
11129    determine_clipping_mode(emit);
11130 
11131    if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX ||
11132        unit == PIPE_SHADER_TESS_CTRL || unit == PIPE_SHADER_TESS_EVAL) {
11133       if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) {
11134          /* if there is stream output declarations associated
11135           * with this shader or the shader writes to ClipDistance
11136           * then reserve extra registers for the non-adjusted vertex position
11137           * and the ClipDistance shadow copy.
11138           */
11139          emit->vposition.so_index = emit->num_outputs++;
11140 
11141          if (emit->clip_mode == CLIP_DISTANCE) {
11142             emit->clip_dist_so_index = emit->num_outputs++;
11143             if (emit->info.num_written_clipdistance > 4)
11144                emit->num_outputs++;
11145          }
11146       }
11147    }
11148 
11149    /*
11150     * Do actual shader translation.
11151     */
11152    if (!emit_vgpu10_header(emit)) {
11153       debug_printf("svga: emit VGPU10 header failed\n");
11154       goto cleanup;
11155    }
11156 
11157    if (!emit_vgpu10_instructions(emit, tokens)) {
11158       debug_printf("svga: emit VGPU10 instructions failed\n");
11159       goto cleanup;
11160    }
11161 
11162    if (!emit_vgpu10_tail(emit)) {
11163       debug_printf("svga: emit VGPU10 tail failed\n");
11164       goto cleanup;
11165    }
11166 
11167    if (emit->register_overflow) {
11168       goto cleanup;
11169    }
11170 
11171    /*
11172     * Create, initialize the 'variant' object.
11173     */
11174    variant = svga_new_shader_variant(svga, unit);
11175    if (!variant)
11176       goto cleanup;
11177 
11178    variant->shader = shader;
11179    variant->nr_tokens = emit_get_num_tokens(emit);
11180    variant->tokens = (const unsigned *)emit->buf;
11181 
11182    /* Copy shader signature info to the shader variant */
11183    if (svga_have_sm5(svga)) {
11184       copy_shader_signature(&emit->signature, variant);
11185    }
11186 
11187    emit->buf = NULL;  /* buffer is no longer owed by emitter context */
11188    memcpy(&variant->key, key, sizeof(*key));
11189    variant->id = UTIL_BITMASK_INVALID_INDEX;
11190 
11191    /* The extra constant starting offset starts with the number of
11192     * shader constants declared in the shader.
11193     */
11194    variant->extra_const_start = emit->num_shader_consts[0];
11195    if (key->gs.wide_point) {
11196       /**
11197        * The extra constant added in the transformed shader
11198        * for inverse viewport scale is to be supplied by the driver.
11199        * So the extra constant starting offset needs to be reduced by 1.
11200        */
11201       assert(variant->extra_const_start > 0);
11202       variant->extra_const_start--;
11203    }
11204 
11205    if (unit == PIPE_SHADER_FRAGMENT) {
11206       struct svga_fs_variant *fs_variant = svga_fs_variant(variant);
11207 
11208       fs_variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit;
11209 
11210       /* If there was exactly one write to a fragment shader output register
11211        * and it came from a constant buffer, we know all fragments will have
11212        * the same color (except for blending).
11213        */
11214       fs_variant->constant_color_output =
11215          emit->constant_color_output && emit->num_output_writes == 1;
11216 
11217       /** keep track in the variant if flat interpolation is used
11218        *  for any of the varyings.
11219        */
11220       fs_variant->uses_flat_interp = emit->uses_flat_interp;
11221 
11222       fs_variant->fs_shadow_compare_units = emit->fs.shadow_compare_units;
11223    }
11224    else if (unit == PIPE_SHADER_TESS_EVAL) {
11225       struct svga_tes_variant *tes_variant = svga_tes_variant(variant);
11226 
11227       /* Keep track in the tes variant some of the layout parameters.
11228        * These parameters will be referenced by the tcs to emit
11229        * the necessary declarations for the hull shader.
11230        */
11231       tes_variant->prim_mode = emit->tes.prim_mode;
11232       tes_variant->spacing = emit->tes.spacing;
11233       tes_variant->vertices_order_cw = emit->tes.vertices_order_cw;
11234       tes_variant->point_mode = emit->tes.point_mode;
11235    }
11236 
11237 
11238    if (tokens != shader->tokens) {
11239       tgsi_free_tokens(tokens);
11240    }
11241 
11242 cleanup:
11243    free_emitter(emit);
11244 
11245 done:
11246    SVGA_STATS_TIME_POP(svga_sws(svga));
11247    return variant;
11248 }
11249