• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2014-2015 Broadcom
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "nir_to_rc.h"
7 #include "compiler/nir/nir.h"
8 #include "compiler/nir/nir_deref.h"
9 #include "compiler/nir/nir_legacy.h"
10 #include "compiler/nir/nir_worklist.h"
11 #include "pipe/p_screen.h"
12 #include "pipe/p_state.h"
13 #include "tgsi/tgsi_dump.h"
14 #include "tgsi/tgsi_from_mesa.h"
15 #include "tgsi/tgsi_info.h"
16 #include "tgsi/tgsi_parse.h"
17 #include "tgsi/tgsi_ureg.h"
18 #include "tgsi/tgsi_util.h"
19 #include "util/u_debug.h"
20 #include "util/u_dynarray.h"
21 #include "util/u_math.h"
22 #include "util/u_memory.h"
23 #include "r300_nir.h"
24 #include "r300_screen.h"
25 
26 struct ntr_insn {
27    enum tgsi_opcode opcode;
28    struct ureg_dst dst[2];
29    struct ureg_src src[4];
30    enum tgsi_texture_type tex_target;
31    enum tgsi_return_type tex_return_type;
32    struct tgsi_texture_offset tex_offset[4];
33 
34    unsigned mem_qualifier;
35    enum pipe_format mem_format;
36 
37    bool is_tex : 1;
38    bool precise : 1;
39 };
40 
41 struct ntr_block {
42    /* Array of struct ntr_insn */
43    struct util_dynarray insns;
44    int start_ip;
45    int end_ip;
46 };
47 
48 struct ntr_reg_interval {
49    uint32_t start, end;
50 };
51 
52 struct ntr_compile {
53    nir_shader *s;
54    nir_function_impl *impl;
55    struct pipe_screen *screen;
56    struct ureg_program *ureg;
57 
58    /* Options */
59    bool lower_fabs;
60 
61    bool addr_declared[3];
62    struct ureg_dst addr_reg[3];
63 
64    /* if condition set up at the end of a block, for ntr_emit_if(). */
65    struct ureg_src if_cond;
66 
67    /* TGSI temps for our NIR SSA and register values. */
68    struct ureg_dst *reg_temp;
69    struct ureg_src *ssa_temp;
70 
71    struct ntr_reg_interval *liveness;
72 
73    /* Map from nir_block to ntr_block */
74    struct hash_table *blocks;
75    struct ntr_block *cur_block;
76    unsigned current_if_else;
77    unsigned cf_label;
78 
79    /* Whether we're currently emitting instructiosn for a precise NIR instruction. */
80    bool precise;
81 
82    unsigned num_temps;
83    unsigned first_non_array_temp;
84 
85    /* Mappings from driver_location to TGSI input/output number.
86     *
87     * We'll be declaring TGSI input/outputs in an arbitrary order, and they get
88     * their numbers assigned incrementally, unlike inputs or constants.
89     */
90    struct ureg_src *input_index_map;
91    uint64_t centroid_inputs;
92 
93    uint32_t first_ubo;
94 };
95 
96 static struct ureg_dst
ntr_temp(struct ntr_compile * c)97 ntr_temp(struct ntr_compile *c)
98 {
99    return ureg_dst_register(TGSI_FILE_TEMPORARY, c->num_temps++);
100 }
101 
102 static struct ntr_block *
ntr_block_from_nir(struct ntr_compile * c,struct nir_block * block)103 ntr_block_from_nir(struct ntr_compile *c, struct nir_block *block)
104 {
105    struct hash_entry *entry = _mesa_hash_table_search(c->blocks, block);
106    return entry->data;
107 }
108 
109 static void ntr_emit_cf_list(struct ntr_compile *c, struct exec_list *list);
110 static void ntr_emit_cf_list_ureg(struct ntr_compile *c, struct exec_list *list);
111 
112 static struct ntr_insn *
ntr_insn(struct ntr_compile * c,enum tgsi_opcode opcode,struct ureg_dst dst,struct ureg_src src0,struct ureg_src src1,struct ureg_src src2,struct ureg_src src3)113 ntr_insn(struct ntr_compile *c, enum tgsi_opcode opcode, struct ureg_dst dst, struct ureg_src src0,
114          struct ureg_src src1, struct ureg_src src2, struct ureg_src src3)
115 {
116    struct ntr_insn insn = {
117       .opcode = opcode,
118       .dst = {dst, ureg_dst_undef()},
119       .src = {src0, src1, src2, src3},
120       .precise = c->precise,
121    };
122    util_dynarray_append(&c->cur_block->insns, struct ntr_insn, insn);
123    return util_dynarray_top_ptr(&c->cur_block->insns, struct ntr_insn);
124 }
125 
126 #define OP00(op)                                                                                   \
127    static inline void ntr_##op(struct ntr_compile *c)                                              \
128    {                                                                                               \
129       ntr_insn(c, TGSI_OPCODE_##op, ureg_dst_undef(), ureg_src_undef(), ureg_src_undef(),          \
130                ureg_src_undef(), ureg_src_undef());                                                \
131    }
132 
133 #define OP01(op)                                                                                   \
134    static inline void ntr_##op(struct ntr_compile *c, struct ureg_src src0)                        \
135    {                                                                                               \
136       ntr_insn(c, TGSI_OPCODE_##op, ureg_dst_undef(), src0, ureg_src_undef(), ureg_src_undef(),    \
137                ureg_src_undef());                                                                  \
138    }
139 
140 #define OP10(op)                                                                                   \
141    static inline void ntr_##op(struct ntr_compile *c, struct ureg_dst dst)                         \
142    {                                                                                               \
143       ntr_insn(c, TGSI_OPCODE_##op, dst, ureg_src_undef(), ureg_src_undef(), ureg_src_undef(),     \
144                ureg_src_undef());                                                                  \
145    }
146 
147 #define OP11(op)                                                                                   \
148    static inline void ntr_##op(struct ntr_compile *c, struct ureg_dst dst, struct ureg_src src0)   \
149    {                                                                                               \
150       ntr_insn(c, TGSI_OPCODE_##op, dst, src0, ureg_src_undef(), ureg_src_undef(),                 \
151                ureg_src_undef());                                                                  \
152    }
153 
154 #define OP12(op)                                                                                   \
155    static inline void ntr_##op(struct ntr_compile *c, struct ureg_dst dst, struct ureg_src src0,   \
156                                struct ureg_src src1)                                               \
157    {                                                                                               \
158       ntr_insn(c, TGSI_OPCODE_##op, dst, src0, src1, ureg_src_undef(), ureg_src_undef());          \
159    }
160 
161 #define OP13(op)                                                                                   \
162    static inline void ntr_##op(struct ntr_compile *c, struct ureg_dst dst, struct ureg_src src0,   \
163                                struct ureg_src src1, struct ureg_src src2)                         \
164    {                                                                                               \
165       ntr_insn(c, TGSI_OPCODE_##op, dst, src0, src1, src2, ureg_src_undef());                      \
166    }
167 
168 #define OP14(op)                                                                                   \
169    static inline void ntr_##op(struct ntr_compile *c, struct ureg_dst dst, struct ureg_src src0,   \
170                                struct ureg_src src1, struct ureg_src src2, struct ureg_src src3)   \
171    {                                                                                               \
172       ntr_insn(c, TGSI_OPCODE_##op, dst, src0, src1, src2, src3);                                  \
173    }
174 
175 /* We hand-craft our tex instructions */
176 #define OP12_TEX(op)
177 #define OP14_TEX(op)
178 
179 /* Use a template include to generate a correctly-typed ntr_OP()
180  * function for each TGSI opcode:
181  */
182 #include "gallium/auxiliary/tgsi/tgsi_opcode_tmp.h"
183 
184 /**
185  * Interprets a nir_load_const used as a NIR src as a uint.
186  *
187  * For non-native-integers drivers, nir_load_const_instrs used by an integer ALU
188  * instruction (or in a phi-web used by an integer ALU instruction) were
189  * converted to floats and the ALU instruction swapped to the float equivalent.
190  * However, this means that integer load_consts used by intrinsics (which don't
191  * normally get that conversion) may have been reformatted to be floats.  Given
192  * that all of our intrinsic nir_src_as_uint() calls are expected to be small,
193  * we can just look and see if they look like floats and convert them back to
194  * ints.
195  */
196 static uint32_t
ntr_src_as_uint(struct ntr_compile * c,nir_src src)197 ntr_src_as_uint(struct ntr_compile *c, nir_src src)
198 {
199    uint32_t val = nir_src_as_uint(src);
200    if (val >= fui(1.0))
201       val = (uint32_t)uif(val);
202    return val;
203 }
204 
205 /* Per-channel masks of def/use within the block, and the per-channel
206  * livein/liveout for the block as a whole.
207  */
208 struct ntr_live_reg_block_state {
209    uint8_t *def, *use, *livein, *liveout, *defin, *defout;
210 };
211 
212 struct ntr_live_reg_state {
213    unsigned bitset_words;
214 
215    struct ntr_reg_interval *regs;
216 
217    /* Used in propagate_across_edge() */
218    BITSET_WORD *tmp_live;
219 
220    struct ntr_live_reg_block_state *blocks;
221 
222    nir_block_worklist worklist;
223 };
224 
225 static void
ntr_live_reg_mark_use(struct ntr_compile * c,struct ntr_live_reg_block_state * bs,int ip,unsigned index,unsigned used_mask)226 ntr_live_reg_mark_use(struct ntr_compile *c, struct ntr_live_reg_block_state *bs, int ip,
227                       unsigned index, unsigned used_mask)
228 {
229    bs->use[index] |= used_mask & ~bs->def[index];
230 
231    c->liveness[index].start = MIN2(c->liveness[index].start, ip);
232    c->liveness[index].end = MAX2(c->liveness[index].end, ip);
233 }
234 static void
ntr_live_reg_setup_def_use(struct ntr_compile * c,nir_function_impl * impl,struct ntr_live_reg_state * state)235 ntr_live_reg_setup_def_use(struct ntr_compile *c, nir_function_impl *impl,
236                            struct ntr_live_reg_state *state)
237 {
238    for (int i = 0; i < impl->num_blocks; i++) {
239       state->blocks[i].def = rzalloc_array(state->blocks, uint8_t, c->num_temps);
240       state->blocks[i].defin = rzalloc_array(state->blocks, uint8_t, c->num_temps);
241       state->blocks[i].defout = rzalloc_array(state->blocks, uint8_t, c->num_temps);
242       state->blocks[i].use = rzalloc_array(state->blocks, uint8_t, c->num_temps);
243       state->blocks[i].livein = rzalloc_array(state->blocks, uint8_t, c->num_temps);
244       state->blocks[i].liveout = rzalloc_array(state->blocks, uint8_t, c->num_temps);
245    }
246 
247    int ip = 0;
248    nir_foreach_block (block, impl) {
249       struct ntr_live_reg_block_state *bs = &state->blocks[block->index];
250       struct ntr_block *ntr_block = ntr_block_from_nir(c, block);
251 
252       ntr_block->start_ip = ip;
253 
254       util_dynarray_foreach (&ntr_block->insns, struct ntr_insn, insn) {
255          const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(insn->opcode);
256 
257          /* Set up use[] for the srcs.
258           *
259           * Uses are the channels of the reg read in the block that don't have a
260           * preceding def to screen them off.  Note that we don't do per-element
261           * tracking of array regs, so they're never screened off.
262           */
263          for (int i = 0; i < opcode_info->num_src; i++) {
264             if (insn->src[i].File != TGSI_FILE_TEMPORARY)
265                continue;
266             int index = insn->src[i].Index;
267 
268             uint32_t used_mask = tgsi_util_get_src_usage_mask(
269                insn->opcode, i, insn->dst->WriteMask, insn->src[i].SwizzleX, insn->src[i].SwizzleY,
270                insn->src[i].SwizzleZ, insn->src[i].SwizzleW, insn->tex_target, insn->tex_target);
271 
272             assert(!insn->src[i].Indirect || index < c->first_non_array_temp);
273             ntr_live_reg_mark_use(c, bs, ip, index, used_mask);
274          }
275 
276          if (insn->is_tex) {
277             for (int i = 0; i < ARRAY_SIZE(insn->tex_offset); i++) {
278                if (insn->tex_offset[i].File == TGSI_FILE_TEMPORARY)
279                   ntr_live_reg_mark_use(c, bs, ip, insn->tex_offset[i].Index, 0xf);
280             }
281          }
282 
283          /* Set up def[] for the srcs.
284           *
285           * Defs are the unconditionally-written (not R/M/W) channels of the reg in
286           * the block that don't have a preceding use.
287           */
288          for (int i = 0; i < opcode_info->num_dst; i++) {
289             if (insn->dst[i].File != TGSI_FILE_TEMPORARY)
290                continue;
291             int index = insn->dst[i].Index;
292             uint32_t writemask = insn->dst[i].WriteMask;
293 
294             bs->def[index] |= writemask & ~bs->use[index];
295             bs->defout[index] |= writemask;
296 
297             assert(!insn->dst[i].Indirect || index < c->first_non_array_temp);
298             c->liveness[index].start = MIN2(c->liveness[index].start, ip);
299             c->liveness[index].end = MAX2(c->liveness[index].end, ip);
300          }
301          ip++;
302       }
303 
304       ntr_block->end_ip = ip;
305    }
306 }
307 
308 static void
ntr_live_regs(struct ntr_compile * c,nir_function_impl * impl)309 ntr_live_regs(struct ntr_compile *c, nir_function_impl *impl)
310 {
311    nir_metadata_require(impl, nir_metadata_block_index);
312 
313    c->liveness = rzalloc_array(c, struct ntr_reg_interval, c->num_temps);
314 
315    struct ntr_live_reg_state state = {
316       .blocks = rzalloc_array(impl, struct ntr_live_reg_block_state, impl->num_blocks),
317    };
318 
319    /* The intervals start out with start > end (indicating unused) */
320    for (int i = 0; i < c->num_temps; i++)
321       c->liveness[i].start = ~0;
322 
323    ntr_live_reg_setup_def_use(c, impl, &state);
324 
325    /* Make a forward-order worklist of all the blocks. */
326    nir_block_worklist_init(&state.worklist, impl->num_blocks, NULL);
327    nir_foreach_block (block, impl) {
328       nir_block_worklist_push_tail(&state.worklist, block);
329    }
330 
331    /* Propagate defin/defout down the CFG to calculate the live variables
332     * potentially defined along any possible control flow path.  We'll use this
333     * to keep things like conditional defs of the reg (or array regs where we
334     * don't track defs!) from making the reg's live range extend back to the
335     * start of the program.
336     */
337    while (!nir_block_worklist_is_empty(&state.worklist)) {
338       nir_block *block = nir_block_worklist_pop_head(&state.worklist);
339       for (int j = 0; j < ARRAY_SIZE(block->successors); j++) {
340          nir_block *succ = block->successors[j];
341          if (!succ || succ->index == impl->num_blocks)
342             continue;
343 
344          for (int i = 0; i < c->num_temps; i++) {
345             uint8_t new_def =
346                state.blocks[block->index].defout[i] & ~state.blocks[succ->index].defin[i];
347 
348             if (new_def) {
349                state.blocks[succ->index].defin[i] |= new_def;
350                state.blocks[succ->index].defout[i] |= new_def;
351                nir_block_worklist_push_tail(&state.worklist, succ);
352             }
353          }
354       }
355    }
356 
357    /* Make a reverse-order worklist of all the blocks. */
358    nir_foreach_block (block, impl) {
359       nir_block_worklist_push_head(&state.worklist, block);
360    }
361 
362    /* We're now ready to work through the worklist and update the liveness sets
363     * of each of the blocks.  As long as we keep the worklist up-to-date as we
364     * go, everything will get covered.
365     */
366    while (!nir_block_worklist_is_empty(&state.worklist)) {
367       /* We pop them off in the reverse order we pushed them on.  This way
368        * the first walk of the instructions is backwards so we only walk
369        * once in the case of no control flow.
370        */
371       nir_block *block = nir_block_worklist_pop_head(&state.worklist);
372       struct ntr_block *ntr_block = ntr_block_from_nir(c, block);
373       struct ntr_live_reg_block_state *bs = &state.blocks[block->index];
374 
375       for (int i = 0; i < c->num_temps; i++) {
376          /* Collect livein from our successors to include in our liveout. */
377          for (int j = 0; j < ARRAY_SIZE(block->successors); j++) {
378             nir_block *succ = block->successors[j];
379             if (!succ || succ->index == impl->num_blocks)
380                continue;
381             struct ntr_live_reg_block_state *sbs = &state.blocks[succ->index];
382 
383             uint8_t new_liveout = sbs->livein[i] & ~bs->liveout[i];
384             if (new_liveout) {
385                if (state.blocks[block->index].defout[i])
386                   c->liveness[i].end = MAX2(c->liveness[i].end, ntr_block->end_ip);
387                bs->liveout[i] |= sbs->livein[i];
388             }
389          }
390 
391          /* Propagate use requests from either our block's uses or our
392           * non-screened-off liveout up to our predecessors.
393           */
394          uint8_t new_livein = ((bs->use[i] | (bs->liveout[i] & ~bs->def[i])) & ~bs->livein[i]);
395          if (new_livein) {
396             bs->livein[i] |= new_livein;
397             set_foreach (block->predecessors, entry) {
398                nir_block *pred = (void *)entry->key;
399                nir_block_worklist_push_tail(&state.worklist, pred);
400             }
401 
402             if (new_livein & state.blocks[block->index].defin[i])
403                c->liveness[i].start = MIN2(c->liveness[i].start, ntr_block->start_ip);
404          }
405       }
406    }
407 
408    ralloc_free(state.blocks);
409    nir_block_worklist_fini(&state.worklist);
410 }
411 
412 static void
ntr_ra_check(struct ntr_compile * c,unsigned * ra_map,BITSET_WORD * released,int ip,unsigned index)413 ntr_ra_check(struct ntr_compile *c, unsigned *ra_map, BITSET_WORD *released, int ip, unsigned index)
414 {
415    if (index < c->first_non_array_temp)
416       return;
417 
418    if (c->liveness[index].start == ip && ra_map[index] == ~0)
419       ra_map[index] = ureg_DECL_temporary(c->ureg).Index;
420 
421    if (c->liveness[index].end == ip && !BITSET_TEST(released, index)) {
422       ureg_release_temporary(c->ureg, ureg_dst_register(TGSI_FILE_TEMPORARY, ra_map[index]));
423       BITSET_SET(released, index);
424    }
425 }
426 
427 static void
ntr_allocate_regs(struct ntr_compile * c,nir_function_impl * impl)428 ntr_allocate_regs(struct ntr_compile *c, nir_function_impl *impl)
429 {
430    ntr_live_regs(c, impl);
431 
432    unsigned *ra_map = ralloc_array(c, unsigned, c->num_temps);
433    unsigned *released = rzalloc_array(c, BITSET_WORD, BITSET_WORDS(c->num_temps));
434 
435    /* No RA on NIR array regs */
436    for (int i = 0; i < c->first_non_array_temp; i++)
437       ra_map[i] = i;
438 
439    for (int i = c->first_non_array_temp; i < c->num_temps; i++)
440       ra_map[i] = ~0;
441 
442    int ip = 0;
443    nir_foreach_block (block, impl) {
444       struct ntr_block *ntr_block = ntr_block_from_nir(c, block);
445 
446       for (int i = 0; i < c->num_temps; i++)
447          ntr_ra_check(c, ra_map, released, ip, i);
448 
449       util_dynarray_foreach (&ntr_block->insns, struct ntr_insn, insn) {
450          const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(insn->opcode);
451 
452          for (int i = 0; i < opcode_info->num_src; i++) {
453             if (insn->src[i].File == TGSI_FILE_TEMPORARY) {
454                ntr_ra_check(c, ra_map, released, ip, insn->src[i].Index);
455                insn->src[i].Index = ra_map[insn->src[i].Index];
456             }
457          }
458 
459          if (insn->is_tex) {
460             for (int i = 0; i < ARRAY_SIZE(insn->tex_offset); i++) {
461                if (insn->tex_offset[i].File == TGSI_FILE_TEMPORARY) {
462                   ntr_ra_check(c, ra_map, released, ip, insn->tex_offset[i].Index);
463                   insn->tex_offset[i].Index = ra_map[insn->tex_offset[i].Index];
464                }
465             }
466          }
467 
468          for (int i = 0; i < opcode_info->num_dst; i++) {
469             if (insn->dst[i].File == TGSI_FILE_TEMPORARY) {
470                ntr_ra_check(c, ra_map, released, ip, insn->dst[i].Index);
471                insn->dst[i].Index = ra_map[insn->dst[i].Index];
472             }
473          }
474          ip++;
475       }
476 
477       for (int i = 0; i < c->num_temps; i++)
478          ntr_ra_check(c, ra_map, released, ip, i);
479    }
480 }
481 
482 static void
ntr_allocate_regs_unoptimized(struct ntr_compile * c,nir_function_impl * impl)483 ntr_allocate_regs_unoptimized(struct ntr_compile *c, nir_function_impl *impl)
484 {
485    for (int i = c->first_non_array_temp; i < c->num_temps; i++)
486       ureg_DECL_temporary(c->ureg);
487 }
488 
489 /* TGSI varying declarations have a component usage mask associated (used by
490  * r600 and svga).
491  */
492 static uint32_t
ntr_tgsi_var_usage_mask(const struct nir_variable * var)493 ntr_tgsi_var_usage_mask(const struct nir_variable *var)
494 {
495    const struct glsl_type *type_without_array = glsl_without_array(var->type);
496    unsigned num_components = glsl_get_vector_elements(type_without_array);
497    if (num_components == 0) /* structs */
498       num_components = 4;
499 
500    return u_bit_consecutive(var->data.location_frac, num_components);
501 }
502 
503 static struct ureg_dst
ntr_output_decl(struct ntr_compile * c,nir_intrinsic_instr * instr,uint32_t * frac)504 ntr_output_decl(struct ntr_compile *c, nir_intrinsic_instr *instr, uint32_t *frac)
505 {
506    nir_io_semantics semantics = nir_intrinsic_io_semantics(instr);
507    int base = nir_intrinsic_base(instr);
508    *frac = nir_intrinsic_component(instr);
509 
510    struct ureg_dst out;
511    if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
512       unsigned semantic_name, semantic_index;
513       tgsi_get_gl_frag_result_semantic(semantics.location, &semantic_name, &semantic_index);
514       semantic_index += semantics.dual_source_blend_index;
515 
516       switch (semantics.location) {
517       case FRAG_RESULT_DEPTH:
518          *frac = 2; /* z write is the to the .z channel in TGSI */
519          break;
520       case FRAG_RESULT_STENCIL:
521          *frac = 1;
522          break;
523       default:
524          break;
525       }
526 
527       out = ureg_DECL_output(c->ureg, semantic_name, semantic_index);
528    } else {
529       unsigned semantic_name, semantic_index;
530 
531       tgsi_get_gl_varying_semantic(semantics.location, true, &semantic_name, &semantic_index);
532 
533       uint32_t usage_mask = u_bit_consecutive(*frac, instr->num_components);
534       uint32_t gs_streams = semantics.gs_streams;
535       for (int i = 0; i < 4; i++) {
536          if (!(usage_mask & (1 << i)))
537             gs_streams &= ~(0x3 << 2 * i);
538       }
539 
540       /* No driver appears to use array_id of outputs. */
541       unsigned array_id = 0;
542 
543       /* This bit is lost in the i/o semantics, but it's unused in in-tree
544        * drivers.
545        */
546       bool invariant = semantics.invariant;
547 
548       out = ureg_DECL_output_layout(c->ureg, semantic_name, semantic_index, gs_streams, base,
549                                     usage_mask, array_id, semantics.num_slots, invariant);
550    }
551 
552    unsigned write_mask;
553    if (nir_intrinsic_has_write_mask(instr))
554       write_mask = nir_intrinsic_write_mask(instr);
555    else
556       write_mask = ((1 << instr->num_components) - 1) << *frac;
557 
558    write_mask = write_mask << *frac;
559    return ureg_writemask(out, write_mask);
560 }
561 
562 static bool
ntr_try_store_in_tgsi_output_with_use(struct ntr_compile * c,struct ureg_dst * dst,nir_src * src)563 ntr_try_store_in_tgsi_output_with_use(struct ntr_compile *c, struct ureg_dst *dst, nir_src *src)
564 {
565    *dst = ureg_dst_undef();
566 
567    if (nir_src_is_if(src))
568       return false;
569 
570    if (nir_src_parent_instr(src)->type != nir_instr_type_intrinsic)
571       return false;
572 
573    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(nir_src_parent_instr(src));
574    if (intr->intrinsic != nir_intrinsic_store_output || !nir_src_is_const(intr->src[1])) {
575       return false;
576    }
577 
578    uint32_t frac;
579    *dst = ntr_output_decl(c, intr, &frac);
580    dst->Index += ntr_src_as_uint(c, intr->src[1]);
581 
582    return frac == 0;
583 }
584 
585 /* If this reg is used only for storing an output, then in the simple
586  * cases we can write directly to the TGSI output instead of having
587  * store_output emit its own MOV.
588  */
589 static bool
ntr_try_store_reg_in_tgsi_output(struct ntr_compile * c,struct ureg_dst * dst,nir_intrinsic_instr * reg_decl)590 ntr_try_store_reg_in_tgsi_output(struct ntr_compile *c, struct ureg_dst *dst,
591                                  nir_intrinsic_instr *reg_decl)
592 {
593    assert(reg_decl->intrinsic == nir_intrinsic_decl_reg);
594 
595    *dst = ureg_dst_undef();
596 
597    /* Look for a single use for try_store_in_tgsi_output */
598    nir_src *use = NULL;
599    nir_foreach_reg_load (src, reg_decl) {
600       nir_intrinsic_instr *load = nir_instr_as_intrinsic(nir_src_parent_instr(src));
601       nir_foreach_use_including_if (load_use, &load->def) {
602          /* We can only have one use */
603          if (use != NULL)
604             return false;
605 
606          use = load_use;
607       }
608    }
609 
610    if (use == NULL)
611       return false;
612 
613    return ntr_try_store_in_tgsi_output_with_use(c, dst, use);
614 }
615 
616 /* If this SSA def is used only for storing an output, then in the simple
617  * cases we can write directly to the TGSI output instead of having
618  * store_output emit its own MOV.
619  */
620 static bool
ntr_try_store_ssa_in_tgsi_output(struct ntr_compile * c,struct ureg_dst * dst,nir_def * def)621 ntr_try_store_ssa_in_tgsi_output(struct ntr_compile *c, struct ureg_dst *dst, nir_def *def)
622 {
623    *dst = ureg_dst_undef();
624 
625    if (!list_is_singular(&def->uses))
626       return false;
627 
628    nir_foreach_use_including_if (use, def) {
629       return ntr_try_store_in_tgsi_output_with_use(c, dst, use);
630    }
631    unreachable("We have one use");
632 }
633 
634 static void
ntr_setup_inputs(struct ntr_compile * c)635 ntr_setup_inputs(struct ntr_compile *c)
636 {
637    if (c->s->info.stage != MESA_SHADER_FRAGMENT)
638       return;
639 
640    unsigned num_inputs = 0;
641    int num_input_arrays = 0;
642 
643    nir_foreach_shader_in_variable (var, c->s) {
644       const struct glsl_type *type = var->type;
645       unsigned array_len = glsl_count_attribute_slots(type, false);
646 
647       num_inputs = MAX2(num_inputs, var->data.driver_location + array_len);
648    }
649 
650    c->input_index_map = ralloc_array(c, struct ureg_src, num_inputs);
651 
652    nir_foreach_shader_in_variable (var, c->s) {
653       const struct glsl_type *type = var->type;
654       unsigned array_len = glsl_count_attribute_slots(type, false);
655 
656       unsigned interpolation = TGSI_INTERPOLATE_CONSTANT;
657       unsigned sample_loc;
658       struct ureg_src decl;
659 
660       if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
661          interpolation = tgsi_get_interp_mode(
662             var->data.interpolation,
663             var->data.location == VARYING_SLOT_COL0 || var->data.location == VARYING_SLOT_COL1);
664 
665          if (var->data.location == VARYING_SLOT_POS)
666             interpolation = TGSI_INTERPOLATE_LINEAR;
667       }
668 
669       unsigned semantic_name, semantic_index;
670       tgsi_get_gl_varying_semantic(var->data.location, true, &semantic_name, &semantic_index);
671 
672       if (var->data.sample) {
673          sample_loc = TGSI_INTERPOLATE_LOC_SAMPLE;
674       } else if (var->data.centroid) {
675          sample_loc = TGSI_INTERPOLATE_LOC_CENTROID;
676          c->centroid_inputs |= (BITSET_MASK(array_len) << var->data.driver_location);
677       } else {
678          sample_loc = TGSI_INTERPOLATE_LOC_CENTER;
679       }
680 
681       unsigned array_id = 0;
682       if (glsl_type_is_array(type))
683          array_id = ++num_input_arrays;
684 
685       uint32_t usage_mask = ntr_tgsi_var_usage_mask(var);
686 
687       decl = ureg_DECL_fs_input_centroid_layout(
688          c->ureg, semantic_name, semantic_index, interpolation, sample_loc,
689          var->data.driver_location, usage_mask, array_id, array_len);
690 
691       if (semantic_name == TGSI_SEMANTIC_FACE) {
692          struct ureg_dst temp = ntr_temp(c);
693          /* tgsi docs say that floating point FACE will be positive for
694           * frontface and negative for backface, but realistically
695           * GLSL-to-TGSI had been doing MOV_SAT to turn it into 0.0 vs 1.0.
696           * Copy that behavior, since some drivers (r300) have been doing a
697           * 0.0 vs 1.0 backface (and I don't think anybody has a non-1.0
698           * front face).
699           */
700          temp.Saturate = true;
701          ntr_MOV(c, temp, decl);
702          decl = ureg_src(temp);
703       }
704 
705       for (unsigned i = 0; i < array_len; i++) {
706          c->input_index_map[var->data.driver_location + i] = decl;
707          c->input_index_map[var->data.driver_location + i].Index += i;
708       }
709    }
710 }
711 
712 static int
ntr_sort_by_location(const nir_variable * a,const nir_variable * b)713 ntr_sort_by_location(const nir_variable *a, const nir_variable *b)
714 {
715    return a->data.location - b->data.location;
716 }
717 
718 /**
719  * Workaround for virglrenderer requiring that TGSI FS output color variables
720  * are declared in order.  Besides, it's a lot nicer to read the TGSI this way.
721  */
722 static void
ntr_setup_outputs(struct ntr_compile * c)723 ntr_setup_outputs(struct ntr_compile *c)
724 {
725    if (c->s->info.stage != MESA_SHADER_FRAGMENT)
726       return;
727 
728    nir_sort_variables_with_modes(c->s, ntr_sort_by_location, nir_var_shader_out);
729 
730    nir_foreach_shader_out_variable (var, c->s) {
731       if (var->data.location == FRAG_RESULT_COLOR)
732          ureg_property(c->ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
733 
734       unsigned semantic_name, semantic_index;
735       tgsi_get_gl_frag_result_semantic(var->data.location, &semantic_name, &semantic_index);
736 
737       (void)ureg_DECL_output(c->ureg, semantic_name, semantic_index);
738    }
739 }
740 
741 static enum tgsi_texture_type
tgsi_texture_type_from_sampler_dim(enum glsl_sampler_dim dim,bool is_array,bool is_shadow)742 tgsi_texture_type_from_sampler_dim(enum glsl_sampler_dim dim, bool is_array, bool is_shadow)
743 {
744    switch (dim) {
745    case GLSL_SAMPLER_DIM_1D:
746       if (is_shadow)
747          return is_array ? TGSI_TEXTURE_SHADOW1D_ARRAY : TGSI_TEXTURE_SHADOW1D;
748       else
749          return is_array ? TGSI_TEXTURE_1D_ARRAY : TGSI_TEXTURE_1D;
750    case GLSL_SAMPLER_DIM_2D:
751    case GLSL_SAMPLER_DIM_EXTERNAL:
752       if (is_shadow)
753          return is_array ? TGSI_TEXTURE_SHADOW2D_ARRAY : TGSI_TEXTURE_SHADOW2D;
754       else
755          return is_array ? TGSI_TEXTURE_2D_ARRAY : TGSI_TEXTURE_2D;
756    case GLSL_SAMPLER_DIM_3D:
757       return TGSI_TEXTURE_3D;
758    case GLSL_SAMPLER_DIM_CUBE:
759       if (is_shadow)
760          return is_array ? TGSI_TEXTURE_SHADOWCUBE_ARRAY : TGSI_TEXTURE_SHADOWCUBE;
761       else
762          return is_array ? TGSI_TEXTURE_CUBE_ARRAY : TGSI_TEXTURE_CUBE;
763    case GLSL_SAMPLER_DIM_RECT:
764       if (is_shadow)
765          return TGSI_TEXTURE_SHADOWRECT;
766       else
767          return TGSI_TEXTURE_RECT;
768    case GLSL_SAMPLER_DIM_MS:
769       return is_array ? TGSI_TEXTURE_2D_ARRAY_MSAA : TGSI_TEXTURE_2D_MSAA;
770    case GLSL_SAMPLER_DIM_BUF:
771       return TGSI_TEXTURE_BUFFER;
772    default:
773       unreachable("unknown sampler dim");
774    }
775 }
776 
777 static enum tgsi_return_type
tgsi_return_type_from_base_type(enum glsl_base_type type)778 tgsi_return_type_from_base_type(enum glsl_base_type type)
779 {
780    switch (type) {
781    case GLSL_TYPE_INT:
782       return TGSI_RETURN_TYPE_SINT;
783    case GLSL_TYPE_UINT:
784       return TGSI_RETURN_TYPE_UINT;
785    case GLSL_TYPE_FLOAT:
786       return TGSI_RETURN_TYPE_FLOAT;
787    default:
788       unreachable("unexpected texture type");
789    }
790 }
791 
792 static void
ntr_setup_uniforms(struct ntr_compile * c)793 ntr_setup_uniforms(struct ntr_compile *c)
794 {
795    nir_foreach_uniform_variable (var, c->s) {
796       if (glsl_type_is_sampler(glsl_without_array(var->type)) ||
797           glsl_type_is_texture(glsl_without_array(var->type))) {
798          /* Don't use this size for the check for samplers -- arrays of structs
799           * containing samplers should be ignored, and just the separate lowered
800           * sampler uniform decl used.
801           */
802          int size = glsl_type_get_sampler_count(var->type) + glsl_type_get_texture_count(var->type);
803 
804          const struct glsl_type *stype = glsl_without_array(var->type);
805          enum tgsi_texture_type target = tgsi_texture_type_from_sampler_dim(
806             glsl_get_sampler_dim(stype), glsl_sampler_type_is_array(stype),
807             glsl_sampler_type_is_shadow(stype));
808          enum tgsi_return_type ret_type =
809             tgsi_return_type_from_base_type(glsl_get_sampler_result_type(stype));
810          for (int i = 0; i < size; i++) {
811             ureg_DECL_sampler_view(c->ureg, var->data.binding + i, target, ret_type, ret_type,
812                                    ret_type, ret_type);
813             ureg_DECL_sampler(c->ureg, var->data.binding + i);
814          }
815 
816          /* lower_uniforms_to_ubo lowered non-sampler uniforms to UBOs, so CB0
817           * size declaration happens with other UBOs below.
818           */
819       }
820    }
821 
822    c->first_ubo = ~0;
823 
824    unsigned ubo_sizes[PIPE_MAX_CONSTANT_BUFFERS] = {0};
825    nir_foreach_variable_with_modes (var, c->s, nir_var_mem_ubo) {
826       int ubo = var->data.driver_location;
827       if (ubo == -1)
828          continue;
829 
830       if (!(ubo == 0 && c->s->info.first_ubo_is_default_ubo))
831          c->first_ubo = MIN2(c->first_ubo, ubo);
832 
833       unsigned size = glsl_get_explicit_size(var->interface_type, false);
834       ubo_sizes[ubo] = size;
835    }
836 
837    for (int i = 0; i < ARRAY_SIZE(ubo_sizes); i++) {
838       if (ubo_sizes[i])
839          ureg_DECL_constant2D(c->ureg, 0, DIV_ROUND_UP(ubo_sizes[i], 16) - 1, i);
840    }
841 }
842 
843 static void
ntr_setup_registers(struct ntr_compile * c)844 ntr_setup_registers(struct ntr_compile *c)
845 {
846    assert(c->num_temps == 0);
847 
848    nir_foreach_reg_decl_safe (nir_reg, nir_shader_get_entrypoint(c->s)) {
849       /* Permanently allocate all the array regs at the start. */
850       unsigned num_array_elems = nir_intrinsic_num_array_elems(nir_reg);
851       unsigned index = nir_reg->def.index;
852 
853       if (num_array_elems != 0) {
854          struct ureg_dst decl = ureg_DECL_array_temporary(c->ureg, num_array_elems, true);
855          c->reg_temp[index] = decl;
856          assert(c->num_temps == decl.Index);
857          c->num_temps += num_array_elems;
858       }
859    }
860    c->first_non_array_temp = c->num_temps;
861 
862    /* After that, allocate non-array regs in our virtual space that we'll
863     * register-allocate before ureg emit.
864     */
865    nir_foreach_reg_decl_safe (nir_reg, nir_shader_get_entrypoint(c->s)) {
866       unsigned num_array_elems = nir_intrinsic_num_array_elems(nir_reg);
867       unsigned num_components = nir_intrinsic_num_components(nir_reg);
868       unsigned index = nir_reg->def.index;
869 
870       /* We already handled arrays */
871       if (num_array_elems == 0) {
872          struct ureg_dst decl;
873          uint32_t write_mask = BITFIELD_MASK(num_components);
874 
875          if (!ntr_try_store_reg_in_tgsi_output(c, &decl, nir_reg)) {
876             decl = ureg_writemask(ntr_temp(c), write_mask);
877          }
878          c->reg_temp[index] = decl;
879       }
880    }
881 }
882 
883 static struct ureg_src
ntr_get_load_const_src(struct ntr_compile * c,nir_load_const_instr * instr)884 ntr_get_load_const_src(struct ntr_compile *c, nir_load_const_instr *instr)
885 {
886    int num_components = instr->def.num_components;
887 
888    float values[4];
889    assert(instr->def.bit_size == 32);
890    for (int i = 0; i < num_components; i++)
891       values[i] = uif(instr->value[i].u32);
892 
893    return ureg_DECL_immediate(c->ureg, values, num_components);
894 }
895 
896 static struct ureg_src
ntr_reladdr(struct ntr_compile * c,struct ureg_src addr,int addr_index)897 ntr_reladdr(struct ntr_compile *c, struct ureg_src addr, int addr_index)
898 {
899    assert(addr_index < ARRAY_SIZE(c->addr_reg));
900 
901    for (int i = 0; i <= addr_index; i++) {
902       if (!c->addr_declared[i]) {
903          c->addr_reg[i] = ureg_writemask(ureg_DECL_address(c->ureg), TGSI_WRITEMASK_X);
904          c->addr_declared[i] = true;
905       }
906    }
907 
908    ntr_ARL(c, c->addr_reg[addr_index], addr);
909    return ureg_scalar(ureg_src(c->addr_reg[addr_index]), 0);
910 }
911 
912 /* Forward declare for recursion with indirects */
913 static struct ureg_src ntr_get_src(struct ntr_compile *c, nir_src src);
914 
915 static struct ureg_src
ntr_get_chased_src(struct ntr_compile * c,nir_legacy_src * src)916 ntr_get_chased_src(struct ntr_compile *c, nir_legacy_src *src)
917 {
918    if (src->is_ssa) {
919       if (src->ssa->parent_instr->type == nir_instr_type_load_const)
920          return ntr_get_load_const_src(c, nir_instr_as_load_const(src->ssa->parent_instr));
921 
922       return c->ssa_temp[src->ssa->index];
923    } else {
924       struct ureg_dst reg_temp = c->reg_temp[src->reg.handle->index];
925       reg_temp.Index += src->reg.base_offset;
926 
927       if (src->reg.indirect) {
928          struct ureg_src offset = ntr_get_src(c, nir_src_for_ssa(src->reg.indirect));
929          return ureg_src_indirect(ureg_src(reg_temp), ntr_reladdr(c, offset, 0));
930       } else {
931          return ureg_src(reg_temp);
932       }
933    }
934 }
935 
936 static struct ureg_src
ntr_get_src(struct ntr_compile * c,nir_src src)937 ntr_get_src(struct ntr_compile *c, nir_src src)
938 {
939    nir_legacy_src chased = nir_legacy_chase_src(&src);
940    return ntr_get_chased_src(c, &chased);
941 }
942 
943 static struct ureg_src
ntr_get_alu_src(struct ntr_compile * c,nir_alu_instr * instr,int i)944 ntr_get_alu_src(struct ntr_compile *c, nir_alu_instr *instr, int i)
945 {
946    /* We only support 32-bit float modifiers.  The only other modifier type
947     * officially supported by TGSI is 32-bit integer negates, but even those are
948     * broken on virglrenderer, so skip lowering all integer and f64 float mods.
949     *
950     * The lower_fabs requests that we not have native source modifiers
951     * for fabs, and instead emit MAX(a,-a) for nir_op_fabs.
952     */
953    nir_legacy_alu_src src = nir_legacy_chase_alu_src(&instr->src[i], !c->lower_fabs);
954    struct ureg_src usrc = ntr_get_chased_src(c, &src.src);
955 
956    usrc = ureg_swizzle(usrc, src.swizzle[0], src.swizzle[1], src.swizzle[2], src.swizzle[3]);
957 
958    if (src.fabs)
959       usrc = ureg_abs(usrc);
960    if (src.fneg)
961       usrc = ureg_negate(usrc);
962 
963    return usrc;
964 }
965 
966 /* Reswizzles a source so that the unset channels in the write mask still refer
967  * to one of the channels present in the write mask.
968  */
969 static struct ureg_src
ntr_swizzle_for_write_mask(struct ureg_src src,uint32_t write_mask)970 ntr_swizzle_for_write_mask(struct ureg_src src, uint32_t write_mask)
971 {
972    assert(write_mask);
973    int first_chan = ffs(write_mask) - 1;
974    return ureg_swizzle(src, (write_mask & TGSI_WRITEMASK_X) ? TGSI_SWIZZLE_X : first_chan,
975                        (write_mask & TGSI_WRITEMASK_Y) ? TGSI_SWIZZLE_Y : first_chan,
976                        (write_mask & TGSI_WRITEMASK_Z) ? TGSI_SWIZZLE_Z : first_chan,
977                        (write_mask & TGSI_WRITEMASK_W) ? TGSI_SWIZZLE_W : first_chan);
978 }
979 
980 static struct ureg_dst
ntr_get_ssa_def_decl(struct ntr_compile * c,nir_def * ssa)981 ntr_get_ssa_def_decl(struct ntr_compile *c, nir_def *ssa)
982 {
983    uint32_t writemask;
984    /* Fix writemask for nir_intrinsic_load_ubo_vec4 according to uses. */
985    if (ssa->parent_instr->type == nir_instr_type_intrinsic &&
986        nir_instr_as_intrinsic(ssa->parent_instr)->intrinsic == nir_intrinsic_load_ubo_vec4)
987       writemask = nir_def_components_read(ssa);
988    else
989       writemask = BITSET_MASK(ssa->num_components);
990 
991    struct ureg_dst dst;
992    if (!ntr_try_store_ssa_in_tgsi_output(c, &dst, ssa))
993       dst = ntr_temp(c);
994 
995    c->ssa_temp[ssa->index] = ntr_swizzle_for_write_mask(ureg_src(dst), writemask);
996 
997    return ureg_writemask(dst, writemask);
998 }
999 
1000 static struct ureg_dst
ntr_get_chased_dest_decl(struct ntr_compile * c,nir_legacy_dest * dest)1001 ntr_get_chased_dest_decl(struct ntr_compile *c, nir_legacy_dest *dest)
1002 {
1003    if (dest->is_ssa)
1004       return ntr_get_ssa_def_decl(c, dest->ssa);
1005    else
1006       return c->reg_temp[dest->reg.handle->index];
1007 }
1008 
1009 static struct ureg_dst
ntr_get_chased_dest(struct ntr_compile * c,nir_legacy_dest * dest)1010 ntr_get_chased_dest(struct ntr_compile *c, nir_legacy_dest *dest)
1011 {
1012    struct ureg_dst dst = ntr_get_chased_dest_decl(c, dest);
1013 
1014    if (!dest->is_ssa) {
1015       dst.Index += dest->reg.base_offset;
1016 
1017       if (dest->reg.indirect) {
1018          struct ureg_src offset = ntr_get_src(c, nir_src_for_ssa(dest->reg.indirect));
1019          dst = ureg_dst_indirect(dst, ntr_reladdr(c, offset, 0));
1020       }
1021    }
1022 
1023    return dst;
1024 }
1025 
1026 static struct ureg_dst
ntr_get_dest(struct ntr_compile * c,nir_def * def)1027 ntr_get_dest(struct ntr_compile *c, nir_def *def)
1028 {
1029    nir_legacy_dest chased = nir_legacy_chase_dest(def);
1030    return ntr_get_chased_dest(c, &chased);
1031 }
1032 
1033 static struct ureg_dst
ntr_get_alu_dest(struct ntr_compile * c,nir_def * def)1034 ntr_get_alu_dest(struct ntr_compile *c, nir_def *def)
1035 {
1036    nir_legacy_alu_dest chased = nir_legacy_chase_alu_dest(def);
1037    struct ureg_dst dst = ntr_get_chased_dest(c, &chased.dest);
1038 
1039    if (chased.fsat)
1040       dst.Saturate = true;
1041 
1042    /* Only registers get write masks */
1043    if (chased.dest.is_ssa)
1044       return dst;
1045 
1046    return ureg_writemask(dst, chased.write_mask);
1047 }
1048 
1049 /* For an SSA dest being populated by a constant src, replace the storage with
1050  * a copy of the ureg_src.
1051  */
1052 static void
ntr_store_def(struct ntr_compile * c,nir_def * def,struct ureg_src src)1053 ntr_store_def(struct ntr_compile *c, nir_def *def, struct ureg_src src)
1054 {
1055    if (!src.Indirect && !src.DimIndirect) {
1056       switch (src.File) {
1057       case TGSI_FILE_IMMEDIATE:
1058       case TGSI_FILE_INPUT:
1059       case TGSI_FILE_CONSTANT:
1060       case TGSI_FILE_SYSTEM_VALUE:
1061          c->ssa_temp[def->index] = src;
1062          return;
1063       }
1064    }
1065 
1066    ntr_MOV(c, ntr_get_ssa_def_decl(c, def), src);
1067 }
1068 
1069 static void
ntr_store(struct ntr_compile * c,nir_def * def,struct ureg_src src)1070 ntr_store(struct ntr_compile *c, nir_def *def, struct ureg_src src)
1071 {
1072    nir_legacy_dest chased = nir_legacy_chase_dest(def);
1073 
1074    if (chased.is_ssa)
1075       ntr_store_def(c, chased.ssa, src);
1076    else {
1077       struct ureg_dst dst = ntr_get_chased_dest(c, &chased);
1078       ntr_MOV(c, dst, src);
1079    }
1080 }
1081 
1082 static void
ntr_emit_scalar(struct ntr_compile * c,unsigned tgsi_op,struct ureg_dst dst,struct ureg_src src0,struct ureg_src src1)1083 ntr_emit_scalar(struct ntr_compile *c, unsigned tgsi_op, struct ureg_dst dst, struct ureg_src src0,
1084                 struct ureg_src src1)
1085 {
1086    unsigned i;
1087 
1088    /* POW is the only 2-operand scalar op. */
1089    if (tgsi_op != TGSI_OPCODE_POW)
1090       src1 = src0;
1091 
1092    for (i = 0; i < 4; i++) {
1093       if (dst.WriteMask & (1 << i)) {
1094          ntr_insn(c, tgsi_op, ureg_writemask(dst, 1 << i), ureg_scalar(src0, i),
1095                   ureg_scalar(src1, i), ureg_src_undef(), ureg_src_undef());
1096       }
1097    }
1098 }
1099 
1100 static void
ntr_emit_alu(struct ntr_compile * c,nir_alu_instr * instr)1101 ntr_emit_alu(struct ntr_compile *c, nir_alu_instr *instr)
1102 {
1103    struct ureg_src src[4];
1104    struct ureg_dst dst;
1105    unsigned i;
1106    int num_srcs = nir_op_infos[instr->op].num_inputs;
1107 
1108    /* Don't try to translate folded fsat since their source won't be valid */
1109    if (instr->op == nir_op_fsat && nir_legacy_fsat_folds(instr))
1110       return;
1111 
1112    c->precise = instr->exact;
1113 
1114    assert(num_srcs <= ARRAY_SIZE(src));
1115    for (i = 0; i < num_srcs; i++)
1116       src[i] = ntr_get_alu_src(c, instr, i);
1117    for (; i < ARRAY_SIZE(src); i++)
1118       src[i] = ureg_src_undef();
1119 
1120    dst = ntr_get_alu_dest(c, &instr->def);
1121 
1122    static enum tgsi_opcode op_map[] = {
1123       [nir_op_mov] = TGSI_OPCODE_MOV,
1124 
1125       [nir_op_fdot2_replicated] = TGSI_OPCODE_DP2,
1126       [nir_op_fdot3_replicated] = TGSI_OPCODE_DP3,
1127       [nir_op_fdot4_replicated] = TGSI_OPCODE_DP4,
1128       [nir_op_ffloor] = TGSI_OPCODE_FLR,
1129       [nir_op_ffract] = TGSI_OPCODE_FRC,
1130       [nir_op_fceil] = TGSI_OPCODE_CEIL,
1131       [nir_op_fround_even] = TGSI_OPCODE_ROUND,
1132 
1133       [nir_op_slt] = TGSI_OPCODE_SLT,
1134       [nir_op_sge] = TGSI_OPCODE_SGE,
1135       [nir_op_seq] = TGSI_OPCODE_SEQ,
1136       [nir_op_sne] = TGSI_OPCODE_SNE,
1137 
1138       [nir_op_ftrunc] = TGSI_OPCODE_TRUNC,
1139       [nir_op_fadd] = TGSI_OPCODE_ADD,
1140       [nir_op_fmul] = TGSI_OPCODE_MUL,
1141 
1142       [nir_op_fmin] = TGSI_OPCODE_MIN,
1143       [nir_op_fmax] = TGSI_OPCODE_MAX,
1144       [nir_op_ffma] = TGSI_OPCODE_MAD,
1145    };
1146 
1147    if (instr->op < ARRAY_SIZE(op_map) && op_map[instr->op] > 0) {
1148       /* The normal path for NIR to TGSI ALU op translation */
1149       ntr_insn(c, op_map[instr->op], dst, src[0], src[1], src[2], src[3]);
1150    } else {
1151       /* Special cases for NIR to TGSI ALU op translation. */
1152 
1153       /* TODO: Use something like the ntr_store() path for the MOV calls so we
1154        * don't emit extra MOVs for swizzles/srcmods of inputs/const/imm.
1155        */
1156 
1157       switch (instr->op) {
1158       case nir_op_fabs:
1159          /* Try to eliminate */
1160          if (!c->lower_fabs && nir_legacy_float_mod_folds(instr))
1161             break;
1162 
1163          if (c->lower_fabs)
1164             ntr_MAX(c, dst, src[0], ureg_negate(src[0]));
1165          else
1166             ntr_MOV(c, dst, ureg_abs(src[0]));
1167          break;
1168 
1169       case nir_op_fsat:
1170          ntr_MOV(c, ureg_saturate(dst), src[0]);
1171          break;
1172 
1173       case nir_op_fneg:
1174          /* Try to eliminate */
1175          if (nir_legacy_float_mod_folds(instr))
1176             break;
1177 
1178          ntr_MOV(c, dst, ureg_negate(src[0]));
1179          break;
1180 
1181          /* NOTE: TGSI 32-bit math ops have the old "one source channel
1182           * replicated to all dst channels" behavior, while 64 is normal mapping
1183           * of src channels to dst.
1184           */
1185       case nir_op_frcp:
1186          ntr_emit_scalar(c, TGSI_OPCODE_RCP, dst, src[0], ureg_src_undef());
1187          break;
1188 
1189       case nir_op_frsq:
1190          ntr_emit_scalar(c, TGSI_OPCODE_RSQ, dst, src[0], ureg_src_undef());
1191          break;
1192 
1193       case nir_op_fexp2:
1194          ntr_emit_scalar(c, TGSI_OPCODE_EX2, dst, src[0], ureg_src_undef());
1195          break;
1196 
1197       case nir_op_flog2:
1198          ntr_emit_scalar(c, TGSI_OPCODE_LG2, dst, src[0], ureg_src_undef());
1199          break;
1200 
1201       case nir_op_fsin:
1202          ntr_emit_scalar(c, TGSI_OPCODE_SIN, dst, src[0], ureg_src_undef());
1203          break;
1204 
1205       case nir_op_fcos:
1206          ntr_emit_scalar(c, TGSI_OPCODE_COS, dst, src[0], ureg_src_undef());
1207          break;
1208 
1209       case nir_op_fsub:
1210          ntr_ADD(c, dst, src[0], ureg_negate(src[1]));
1211          break;
1212 
1213       case nir_op_fmod:
1214          unreachable("should be handled by .lower_fmod = true");
1215          break;
1216 
1217       case nir_op_fpow:
1218          ntr_emit_scalar(c, TGSI_OPCODE_POW, dst, src[0], src[1]);
1219          break;
1220 
1221       case nir_op_flrp:
1222          ntr_LRP(c, dst, src[2], src[1], src[0]);
1223          break;
1224 
1225       case nir_op_fcsel:
1226          /* Implement this as CMP(-abs(src0), src1, src2). */
1227          ntr_CMP(c, dst, ureg_negate(ureg_abs(src[0])), src[1], src[2]);
1228          break;
1229 
1230       case nir_op_fcsel_gt:
1231          ntr_CMP(c, dst, ureg_negate(src[0]), src[1], src[2]);
1232          break;
1233 
1234       case nir_op_fcsel_ge:
1235          /* Implement this as if !(src0 < 0.0) was identical to src0 >= 0.0. */
1236          ntr_CMP(c, dst, src[0], src[2], src[1]);
1237          break;
1238 
1239       case nir_op_vec4:
1240       case nir_op_vec3:
1241       case nir_op_vec2:
1242          unreachable("covered by nir_lower_vec_to_movs()");
1243 
1244       default:
1245          fprintf(stderr, "Unknown NIR opcode: %s\n", nir_op_infos[instr->op].name);
1246          unreachable("Unknown NIR opcode");
1247       }
1248    }
1249 
1250    c->precise = false;
1251 }
1252 
1253 static struct ureg_src
ntr_ureg_src_indirect(struct ntr_compile * c,struct ureg_src usrc,nir_src src,int addr_reg)1254 ntr_ureg_src_indirect(struct ntr_compile *c, struct ureg_src usrc, nir_src src, int addr_reg)
1255 {
1256    if (nir_src_is_const(src)) {
1257       usrc.Index += ntr_src_as_uint(c, src);
1258       return usrc;
1259    } else {
1260       return ureg_src_indirect(usrc, ntr_reladdr(c, ntr_get_src(c, src), addr_reg));
1261    }
1262 }
1263 
1264 static struct ureg_dst
ntr_ureg_dst_indirect(struct ntr_compile * c,struct ureg_dst dst,nir_src src)1265 ntr_ureg_dst_indirect(struct ntr_compile *c, struct ureg_dst dst, nir_src src)
1266 {
1267    if (nir_src_is_const(src)) {
1268       dst.Index += ntr_src_as_uint(c, src);
1269       return dst;
1270    } else {
1271       return ureg_dst_indirect(dst, ntr_reladdr(c, ntr_get_src(c, src), 0));
1272    }
1273 }
1274 
1275 static struct ureg_dst
ntr_ureg_dst_dimension_indirect(struct ntr_compile * c,struct ureg_dst udst,nir_src src)1276 ntr_ureg_dst_dimension_indirect(struct ntr_compile *c, struct ureg_dst udst, nir_src src)
1277 {
1278    if (nir_src_is_const(src)) {
1279       return ureg_dst_dimension(udst, ntr_src_as_uint(c, src));
1280    } else {
1281       return ureg_dst_dimension_indirect(udst, ntr_reladdr(c, ntr_get_src(c, src), 1), 0);
1282    }
1283 }
1284 /* Some load operations in NIR will have a fractional offset that we need to
1285  * swizzle down before storing to the result register.
1286  */
1287 static struct ureg_src
ntr_shift_by_frac(struct ureg_src src,unsigned frac,unsigned num_components)1288 ntr_shift_by_frac(struct ureg_src src, unsigned frac, unsigned num_components)
1289 {
1290    return ureg_swizzle(src, frac, frac + MIN2(num_components - 1, 1),
1291                        frac + MIN2(num_components - 1, 2), frac + MIN2(num_components - 1, 3));
1292 }
1293 
1294 static void
ntr_emit_load_ubo(struct ntr_compile * c,nir_intrinsic_instr * instr)1295 ntr_emit_load_ubo(struct ntr_compile *c, nir_intrinsic_instr *instr)
1296 {
1297    struct ureg_src src = ureg_src_register(TGSI_FILE_CONSTANT, 0);
1298 
1299    struct ureg_dst addr_temp = ureg_dst_undef();
1300 
1301    if (nir_src_is_const(instr->src[0])) {
1302       src = ureg_src_dimension(src, ntr_src_as_uint(c, instr->src[0]));
1303    } else {
1304       /* virglrenderer requires that indirect UBO references have the UBO
1305        * array's base index in the Index field, not added to the indirect
1306        * address.
1307        *
1308        * Many nir intrinsics have a base address const value for the start of
1309        * their array indirection, but load_ubo doesn't.  We fake it by
1310        * subtracting it off here.
1311        */
1312       addr_temp = ntr_temp(c);
1313       ntr_UADD(c, addr_temp, ntr_get_src(c, instr->src[0]), ureg_imm1i(c->ureg, -c->first_ubo));
1314       src = ureg_src_dimension_indirect(src, ntr_reladdr(c, ureg_src(addr_temp), 1), c->first_ubo);
1315    }
1316 
1317    /* !pipe_caps.load_constbuf: Just emit it as a vec4 reference to the const
1318     * file.
1319     */
1320    src.Index = nir_intrinsic_base(instr);
1321 
1322    if (nir_src_is_const(instr->src[1])) {
1323       src.Index += ntr_src_as_uint(c, instr->src[1]);
1324    } else {
1325       src = ureg_src_indirect(src, ntr_reladdr(c, ntr_get_src(c, instr->src[1]), 0));
1326    }
1327 
1328    int start_component = nir_intrinsic_component(instr);
1329 
1330    src = ntr_shift_by_frac(src, start_component, instr->num_components);
1331 
1332    ntr_store(c, &instr->def, src);
1333 }
1334 
1335 static void
ntr_emit_load_input(struct ntr_compile * c,nir_intrinsic_instr * instr)1336 ntr_emit_load_input(struct ntr_compile *c, nir_intrinsic_instr *instr)
1337 {
1338    uint32_t frac = nir_intrinsic_component(instr);
1339    uint32_t num_components = instr->num_components;
1340    unsigned base = nir_intrinsic_base(instr);
1341    struct ureg_src input;
1342    nir_io_semantics semantics = nir_intrinsic_io_semantics(instr);
1343 
1344    if (c->s->info.stage == MESA_SHADER_VERTEX) {
1345       input = ureg_DECL_vs_input(c->ureg, base);
1346       for (int i = 1; i < semantics.num_slots; i++)
1347          ureg_DECL_vs_input(c->ureg, base + i);
1348    } else {
1349       input = c->input_index_map[base];
1350    }
1351 
1352    input = ntr_shift_by_frac(input, frac, num_components);
1353 
1354    switch (instr->intrinsic) {
1355    case nir_intrinsic_load_input:
1356       input = ntr_ureg_src_indirect(c, input, instr->src[0], 0);
1357       ntr_store(c, &instr->def, input);
1358       break;
1359 
1360    case nir_intrinsic_load_interpolated_input: {
1361       input = ntr_ureg_src_indirect(c, input, instr->src[1], 0);
1362 
1363       nir_intrinsic_instr *bary_instr = nir_instr_as_intrinsic(instr->src[0].ssa->parent_instr);
1364 
1365       switch (bary_instr->intrinsic) {
1366       case nir_intrinsic_load_barycentric_pixel:
1367       case nir_intrinsic_load_barycentric_sample:
1368          /* For these, we know that the barycentric load matches the
1369           * interpolation on the input declaration, so we can use it directly.
1370           */
1371          ntr_store(c, &instr->def, input);
1372          break;
1373 
1374       case nir_intrinsic_load_barycentric_centroid:
1375          /* If the input was declared centroid, then there's no need to
1376           * emit the extra TGSI interp instruction, we can just read the
1377           * input.
1378           */
1379          if (c->centroid_inputs & (1ull << nir_intrinsic_base(instr))) {
1380             ntr_store(c, &instr->def, input);
1381          } else {
1382             ntr_INTERP_CENTROID(c, ntr_get_dest(c, &instr->def), input);
1383          }
1384          break;
1385 
1386       case nir_intrinsic_load_barycentric_at_sample:
1387          /* We stored the sample in the fake "bary" dest. */
1388          ntr_INTERP_SAMPLE(c, ntr_get_dest(c, &instr->def), input, ntr_get_src(c, instr->src[0]));
1389          break;
1390 
1391       case nir_intrinsic_load_barycentric_at_offset:
1392          /* We stored the offset in the fake "bary" dest. */
1393          ntr_INTERP_OFFSET(c, ntr_get_dest(c, &instr->def), input, ntr_get_src(c, instr->src[0]));
1394          break;
1395 
1396       default:
1397          unreachable("bad barycentric interp intrinsic\n");
1398       }
1399       break;
1400    }
1401 
1402    default:
1403       unreachable("bad load input intrinsic\n");
1404    }
1405 }
1406 
1407 static void
ntr_emit_store_output(struct ntr_compile * c,nir_intrinsic_instr * instr)1408 ntr_emit_store_output(struct ntr_compile *c, nir_intrinsic_instr *instr)
1409 {
1410    struct ureg_src src = ntr_get_src(c, instr->src[0]);
1411 
1412    if (src.File == TGSI_FILE_OUTPUT) {
1413       /* If our src is the output file, that's an indication that we were able
1414        * to emit the output stores in the generating instructions and we have
1415        * nothing to do here.
1416        */
1417       return;
1418    }
1419 
1420    uint32_t frac;
1421    struct ureg_dst out = ntr_output_decl(c, instr, &frac);
1422 
1423    if (instr->intrinsic == nir_intrinsic_store_per_vertex_output) {
1424       out = ntr_ureg_dst_indirect(c, out, instr->src[2]);
1425       out = ntr_ureg_dst_dimension_indirect(c, out, instr->src[1]);
1426    } else {
1427       out = ntr_ureg_dst_indirect(c, out, instr->src[1]);
1428    }
1429 
1430    uint8_t swizzle[4] = {0, 0, 0, 0};
1431    for (int i = frac; i < 4; i++) {
1432       if (out.WriteMask & (1 << i))
1433          swizzle[i] = i - frac;
1434    }
1435 
1436    src = ureg_swizzle(src, swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1437 
1438    ntr_MOV(c, out, src);
1439 }
1440 
1441 static void
ntr_emit_load_output(struct ntr_compile * c,nir_intrinsic_instr * instr)1442 ntr_emit_load_output(struct ntr_compile *c, nir_intrinsic_instr *instr)
1443 {
1444    nir_io_semantics semantics = nir_intrinsic_io_semantics(instr);
1445 
1446    /* ntr_try_store_in_tgsi_output() optimization is not valid if normal
1447     * load_output is present.
1448     */
1449    assert(c->s->info.stage != MESA_SHADER_VERTEX &&
1450           (c->s->info.stage != MESA_SHADER_FRAGMENT || semantics.fb_fetch_output));
1451 
1452    uint32_t frac;
1453    struct ureg_dst out = ntr_output_decl(c, instr, &frac);
1454 
1455    if (instr->intrinsic == nir_intrinsic_load_per_vertex_output) {
1456       out = ntr_ureg_dst_indirect(c, out, instr->src[1]);
1457       out = ntr_ureg_dst_dimension_indirect(c, out, instr->src[0]);
1458    } else {
1459       out = ntr_ureg_dst_indirect(c, out, instr->src[0]);
1460    }
1461 
1462    struct ureg_dst dst = ntr_get_dest(c, &instr->def);
1463    struct ureg_src out_src = ureg_src(out);
1464 
1465    /* Don't swizzling unavailable channels of the output in the writemasked-out
1466     * components. Avoids compile failures in virglrenderer with
1467     * TESS_LEVEL_INNER.
1468     */
1469    int fill_channel = ffs(dst.WriteMask) - 1;
1470    uint8_t swizzles[4] = {0, 1, 2, 3};
1471    for (int i = 0; i < 4; i++)
1472       if (!(dst.WriteMask & (1 << i)))
1473          swizzles[i] = fill_channel;
1474    out_src = ureg_swizzle(out_src, swizzles[0], swizzles[1], swizzles[2], swizzles[3]);
1475 
1476    if (semantics.fb_fetch_output)
1477       ntr_FBFETCH(c, dst, out_src);
1478    else
1479       ntr_MOV(c, dst, out_src);
1480 }
1481 
1482 static void
ntr_emit_load_sysval(struct ntr_compile * c,nir_intrinsic_instr * instr)1483 ntr_emit_load_sysval(struct ntr_compile *c, nir_intrinsic_instr *instr)
1484 {
1485    gl_system_value sysval = nir_system_value_from_intrinsic(instr->intrinsic);
1486    enum tgsi_semantic semantic = tgsi_get_sysval_semantic(sysval);
1487    struct ureg_src sv = ureg_DECL_system_value(c->ureg, semantic, 0);
1488 
1489    /* virglrenderer doesn't like references to channels of the sysval that
1490     * aren't defined, even if they aren't really read.  (GLSL compile fails on
1491     * gl_NumWorkGroups.w, for example).
1492     */
1493    uint32_t write_mask = BITSET_MASK(instr->def.num_components);
1494    sv = ntr_swizzle_for_write_mask(sv, write_mask);
1495 
1496    /* TGSI and NIR define these intrinsics as always loading ints, but they can
1497     * still appear on hardware with non-native-integers fragment shaders using
1498     * the draw path (i915g).  In that case, having called nir_lower_int_to_float
1499     * means that we actually want floats instead.
1500     */
1501    switch (instr->intrinsic) {
1502    case nir_intrinsic_load_vertex_id:
1503    case nir_intrinsic_load_instance_id:
1504       ntr_U2F(c, ntr_get_dest(c, &instr->def), sv);
1505       return;
1506 
1507    default:
1508       break;
1509    }
1510 
1511    ntr_store(c, &instr->def, sv);
1512 }
1513 
1514 static void
ntr_emit_intrinsic(struct ntr_compile * c,nir_intrinsic_instr * instr)1515 ntr_emit_intrinsic(struct ntr_compile *c, nir_intrinsic_instr *instr)
1516 {
1517    switch (instr->intrinsic) {
1518    case nir_intrinsic_load_ubo:
1519    case nir_intrinsic_load_ubo_vec4:
1520       ntr_emit_load_ubo(c, instr);
1521       break;
1522 
1523       /* Vertex */
1524    case nir_intrinsic_load_draw_id:
1525    case nir_intrinsic_load_invocation_id:
1526    case nir_intrinsic_load_frag_coord:
1527    case nir_intrinsic_load_point_coord:
1528    case nir_intrinsic_load_front_face:
1529       ntr_emit_load_sysval(c, instr);
1530       break;
1531 
1532    case nir_intrinsic_load_input:
1533    case nir_intrinsic_load_per_vertex_input:
1534    case nir_intrinsic_load_interpolated_input:
1535       ntr_emit_load_input(c, instr);
1536       break;
1537 
1538    case nir_intrinsic_store_output:
1539    case nir_intrinsic_store_per_vertex_output:
1540       ntr_emit_store_output(c, instr);
1541       break;
1542 
1543    case nir_intrinsic_load_output:
1544    case nir_intrinsic_load_per_vertex_output:
1545       ntr_emit_load_output(c, instr);
1546       break;
1547 
1548    case nir_intrinsic_terminate:
1549       ntr_KILL(c);
1550       break;
1551 
1552    case nir_intrinsic_terminate_if: {
1553       struct ureg_src cond = ureg_scalar(ntr_get_src(c, instr->src[0]), 0);
1554       /* For !native_integers, the bool got lowered to 1.0 or 0.0. */
1555       ntr_KILL_IF(c, ureg_negate(cond));
1556       break;
1557    }
1558       /* In TGSI we don't actually generate the barycentric coords, and emit
1559        * interp intrinsics later.  However, we do need to store the
1560        * load_barycentric_at_* argument so that we can use it at that point.
1561        */
1562    case nir_intrinsic_load_barycentric_pixel:
1563    case nir_intrinsic_load_barycentric_centroid:
1564    case nir_intrinsic_load_barycentric_sample:
1565       break;
1566    case nir_intrinsic_load_barycentric_at_sample:
1567    case nir_intrinsic_load_barycentric_at_offset:
1568       ntr_store(c, &instr->def, ntr_get_src(c, instr->src[0]));
1569       break;
1570 
1571    case nir_intrinsic_ddx:
1572    case nir_intrinsic_ddx_coarse:
1573       ntr_DDX(c, ntr_get_dest(c, &instr->def), ntr_get_src(c, instr->src[0]));
1574       return;
1575    case nir_intrinsic_ddy:
1576    case nir_intrinsic_ddy_coarse:
1577       ntr_DDY(c, ntr_get_dest(c, &instr->def), ntr_get_src(c, instr->src[0]));
1578       return;
1579 
1580    case nir_intrinsic_decl_reg:
1581    case nir_intrinsic_load_reg:
1582    case nir_intrinsic_load_reg_indirect:
1583    case nir_intrinsic_store_reg:
1584    case nir_intrinsic_store_reg_indirect:
1585       /* fully consumed */
1586       break;
1587 
1588    default:
1589       fprintf(stderr, "Unknown intrinsic: ");
1590       nir_print_instr(&instr->instr, stderr);
1591       fprintf(stderr, "\n");
1592       break;
1593    }
1594 }
1595 
1596 struct ntr_tex_operand_state {
1597    struct ureg_src srcs[4];
1598    unsigned i;
1599 };
1600 
1601 static void
ntr_push_tex_arg(struct ntr_compile * c,nir_tex_instr * instr,nir_tex_src_type tex_src_type,struct ntr_tex_operand_state * s)1602 ntr_push_tex_arg(struct ntr_compile *c, nir_tex_instr *instr, nir_tex_src_type tex_src_type,
1603                  struct ntr_tex_operand_state *s)
1604 {
1605    int tex_src = nir_tex_instr_src_index(instr, tex_src_type);
1606    if (tex_src < 0)
1607       return;
1608 
1609    nir_src *src = &instr->src[tex_src].src;
1610    s->srcs[s->i++] = ntr_get_src(c, *src);
1611 }
1612 
1613 static void
ntr_emit_texture(struct ntr_compile * c,nir_tex_instr * instr)1614 ntr_emit_texture(struct ntr_compile *c, nir_tex_instr *instr)
1615 {
1616    struct ureg_dst dst = ntr_get_dest(c, &instr->def);
1617    enum tgsi_texture_type target =
1618       tgsi_texture_type_from_sampler_dim(instr->sampler_dim, instr->is_array, instr->is_shadow);
1619    unsigned tex_opcode;
1620 
1621    int tex_handle_src = nir_tex_instr_src_index(instr, nir_tex_src_texture_handle);
1622    int sampler_handle_src = nir_tex_instr_src_index(instr, nir_tex_src_sampler_handle);
1623 
1624    struct ureg_src sampler;
1625    if (tex_handle_src >= 0 && sampler_handle_src >= 0) {
1626       /* It seems we can't get separate tex/sampler on GL, just use one of the handles */
1627       sampler = ntr_get_src(c, instr->src[tex_handle_src].src);
1628       assert(nir_tex_instr_src_index(instr, nir_tex_src_sampler_offset) == -1);
1629    } else {
1630       assert(tex_handle_src == -1 && sampler_handle_src == -1);
1631       sampler = ureg_DECL_sampler(c->ureg, instr->sampler_index);
1632       int sampler_src = nir_tex_instr_src_index(instr, nir_tex_src_sampler_offset);
1633       if (sampler_src >= 0) {
1634          struct ureg_src reladdr = ntr_get_src(c, instr->src[sampler_src].src);
1635          sampler = ureg_src_indirect(sampler, ntr_reladdr(c, reladdr, 2));
1636       }
1637    }
1638 
1639    switch (instr->op) {
1640    case nir_texop_tex:
1641       if (nir_tex_instr_src_size(instr, nir_tex_instr_src_index(instr, nir_tex_src_backend1)) >
1642           MAX2(instr->coord_components, 2) + instr->is_shadow)
1643          tex_opcode = TGSI_OPCODE_TXP;
1644       else
1645          tex_opcode = TGSI_OPCODE_TEX;
1646       break;
1647    case nir_texop_txl:
1648       tex_opcode = TGSI_OPCODE_TXL;
1649       break;
1650    case nir_texop_txb:
1651       tex_opcode = TGSI_OPCODE_TXB;
1652       break;
1653    case nir_texop_txd:
1654       tex_opcode = TGSI_OPCODE_TXD;
1655       break;
1656    case nir_texop_txs:
1657       tex_opcode = TGSI_OPCODE_TXQ;
1658       break;
1659    case nir_texop_tg4:
1660       tex_opcode = TGSI_OPCODE_TG4;
1661       break;
1662    case nir_texop_query_levels:
1663       tex_opcode = TGSI_OPCODE_TXQ;
1664       break;
1665    case nir_texop_lod:
1666       tex_opcode = TGSI_OPCODE_LODQ;
1667       break;
1668    case nir_texop_texture_samples:
1669       tex_opcode = TGSI_OPCODE_TXQS;
1670       break;
1671    default:
1672       unreachable("unsupported tex op");
1673    }
1674 
1675    struct ntr_tex_operand_state s = {.i = 0};
1676    ntr_push_tex_arg(c, instr, nir_tex_src_backend1, &s);
1677    ntr_push_tex_arg(c, instr, nir_tex_src_backend2, &s);
1678 
1679    /* non-coord arg for TXQ */
1680    if (tex_opcode == TGSI_OPCODE_TXQ) {
1681       ntr_push_tex_arg(c, instr, nir_tex_src_lod, &s);
1682       /* virglrenderer mistakenly looks at .w instead of .x, so make sure it's
1683        * scalar
1684        */
1685       s.srcs[s.i - 1] = ureg_scalar(s.srcs[s.i - 1], 0);
1686    }
1687 
1688    if (s.i > 1) {
1689       if (tex_opcode == TGSI_OPCODE_TEX)
1690          tex_opcode = TGSI_OPCODE_TEX2;
1691       if (tex_opcode == TGSI_OPCODE_TXB)
1692          tex_opcode = TGSI_OPCODE_TXB2;
1693       if (tex_opcode == TGSI_OPCODE_TXL)
1694          tex_opcode = TGSI_OPCODE_TXL2;
1695    }
1696 
1697    if (instr->op == nir_texop_txd) {
1698       /* Derivs appear in their own src args */
1699       int ddx = nir_tex_instr_src_index(instr, nir_tex_src_ddx);
1700       int ddy = nir_tex_instr_src_index(instr, nir_tex_src_ddy);
1701       s.srcs[s.i++] = ntr_get_src(c, instr->src[ddx].src);
1702       s.srcs[s.i++] = ntr_get_src(c, instr->src[ddy].src);
1703    }
1704 
1705    if (instr->op == nir_texop_tg4 && target != TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
1706       if (c->screen->caps.tgsi_tg4_component_in_swizzle) {
1707          sampler = ureg_scalar(sampler, instr->component);
1708          s.srcs[s.i++] = ureg_src_undef();
1709       } else {
1710          s.srcs[s.i++] = ureg_imm1u(c->ureg, instr->component);
1711       }
1712    }
1713 
1714    s.srcs[s.i++] = sampler;
1715 
1716    enum tgsi_return_type tex_type;
1717    switch (instr->dest_type) {
1718    case nir_type_float32:
1719       tex_type = TGSI_RETURN_TYPE_FLOAT;
1720       break;
1721    case nir_type_int32:
1722       tex_type = TGSI_RETURN_TYPE_SINT;
1723       break;
1724    case nir_type_uint32:
1725       tex_type = TGSI_RETURN_TYPE_UINT;
1726       break;
1727    default:
1728       unreachable("unknown texture type");
1729    }
1730 
1731    struct ureg_dst tex_dst;
1732    if (instr->op == nir_texop_query_levels)
1733       tex_dst = ureg_writemask(ntr_temp(c), TGSI_WRITEMASK_W);
1734    else
1735       tex_dst = dst;
1736 
1737    while (s.i < 4)
1738       s.srcs[s.i++] = ureg_src_undef();
1739 
1740    struct ntr_insn *insn =
1741       ntr_insn(c, tex_opcode, tex_dst, s.srcs[0], s.srcs[1], s.srcs[2], s.srcs[3]);
1742    insn->tex_target = target;
1743    insn->tex_return_type = tex_type;
1744    insn->is_tex = true;
1745 
1746    int tex_offset_src = nir_tex_instr_src_index(instr, nir_tex_src_offset);
1747    if (tex_offset_src >= 0) {
1748       struct ureg_src offset = ntr_get_src(c, instr->src[tex_offset_src].src);
1749 
1750       insn->tex_offset[0].File = offset.File;
1751       insn->tex_offset[0].Index = offset.Index;
1752       insn->tex_offset[0].SwizzleX = offset.SwizzleX;
1753       insn->tex_offset[0].SwizzleY = offset.SwizzleY;
1754       insn->tex_offset[0].SwizzleZ = offset.SwizzleZ;
1755       insn->tex_offset[0].Padding = 0;
1756    }
1757 
1758    if (nir_tex_instr_has_explicit_tg4_offsets(instr)) {
1759       for (uint8_t i = 0; i < 4; ++i) {
1760          struct ureg_src imm =
1761             ureg_imm2i(c->ureg, instr->tg4_offsets[i][0], instr->tg4_offsets[i][1]);
1762          insn->tex_offset[i].File = imm.File;
1763          insn->tex_offset[i].Index = imm.Index;
1764          insn->tex_offset[i].SwizzleX = imm.SwizzleX;
1765          insn->tex_offset[i].SwizzleY = imm.SwizzleY;
1766          insn->tex_offset[i].SwizzleZ = imm.SwizzleZ;
1767       }
1768    }
1769 
1770    if (instr->op == nir_texop_query_levels)
1771       ntr_MOV(c, dst, ureg_scalar(ureg_src(tex_dst), 3));
1772 }
1773 
1774 static void
ntr_emit_jump(struct ntr_compile * c,nir_jump_instr * jump)1775 ntr_emit_jump(struct ntr_compile *c, nir_jump_instr *jump)
1776 {
1777    switch (jump->type) {
1778    case nir_jump_break:
1779       ntr_BRK(c);
1780       break;
1781 
1782    case nir_jump_continue:
1783       ntr_CONT(c);
1784       break;
1785 
1786    default:
1787       fprintf(stderr, "Unknown jump instruction: ");
1788       nir_print_instr(&jump->instr, stderr);
1789       fprintf(stderr, "\n");
1790       abort();
1791    }
1792 }
1793 
1794 static void
ntr_emit_ssa_undef(struct ntr_compile * c,nir_undef_instr * instr)1795 ntr_emit_ssa_undef(struct ntr_compile *c, nir_undef_instr *instr)
1796 {
1797    /* Nothing to do but make sure that we have some storage to deref. */
1798    (void)ntr_get_ssa_def_decl(c, &instr->def);
1799 }
1800 
1801 static void
ntr_emit_instr(struct ntr_compile * c,nir_instr * instr)1802 ntr_emit_instr(struct ntr_compile *c, nir_instr *instr)
1803 {
1804    switch (instr->type) {
1805    case nir_instr_type_deref:
1806       /* ignored, will be walked by nir_intrinsic_image_*_deref. */
1807       break;
1808 
1809    case nir_instr_type_alu:
1810       ntr_emit_alu(c, nir_instr_as_alu(instr));
1811       break;
1812 
1813    case nir_instr_type_intrinsic:
1814       ntr_emit_intrinsic(c, nir_instr_as_intrinsic(instr));
1815       break;
1816 
1817    case nir_instr_type_load_const:
1818       /* Nothing to do here, as load consts are done directly from
1819        * ntr_get_src() (since many constant NIR srcs will often get folded
1820        * directly into a register file index instead of as a TGSI src).
1821        */
1822       break;
1823 
1824    case nir_instr_type_tex:
1825       ntr_emit_texture(c, nir_instr_as_tex(instr));
1826       break;
1827 
1828    case nir_instr_type_jump:
1829       ntr_emit_jump(c, nir_instr_as_jump(instr));
1830       break;
1831 
1832    case nir_instr_type_undef:
1833       ntr_emit_ssa_undef(c, nir_instr_as_undef(instr));
1834       break;
1835 
1836    default:
1837       fprintf(stderr, "Unknown NIR instr type: ");
1838       nir_print_instr(instr, stderr);
1839       fprintf(stderr, "\n");
1840       abort();
1841    }
1842 }
1843 
1844 static void
ntr_emit_if(struct ntr_compile * c,nir_if * if_stmt)1845 ntr_emit_if(struct ntr_compile *c, nir_if *if_stmt)
1846 {
1847    ntr_IF(c, c->if_cond);
1848 
1849    ntr_emit_cf_list(c, &if_stmt->then_list);
1850 
1851    if (!nir_cf_list_is_empty_block(&if_stmt->else_list)) {
1852       ntr_ELSE(c);
1853       ntr_emit_cf_list(c, &if_stmt->else_list);
1854    }
1855 
1856    ntr_ENDIF(c);
1857 }
1858 
1859 static void
ntr_emit_loop(struct ntr_compile * c,nir_loop * loop)1860 ntr_emit_loop(struct ntr_compile *c, nir_loop *loop)
1861 {
1862    assert(!nir_loop_has_continue_construct(loop));
1863    ntr_BGNLOOP(c);
1864    ntr_emit_cf_list(c, &loop->body);
1865    ntr_ENDLOOP(c);
1866 }
1867 
1868 static void
ntr_emit_block(struct ntr_compile * c,nir_block * block)1869 ntr_emit_block(struct ntr_compile *c, nir_block *block)
1870 {
1871    struct ntr_block *ntr_block = ntr_block_from_nir(c, block);
1872    c->cur_block = ntr_block;
1873 
1874    nir_foreach_instr (instr, block) {
1875       ntr_emit_instr(c, instr);
1876 
1877       /* Sanity check that we didn't accidentally ureg_OPCODE() instead of ntr_OPCODE(). */
1878       if (ureg_get_instruction_number(c->ureg) != 0) {
1879          fprintf(stderr, "Emitted ureg insn during: ");
1880          nir_print_instr(instr, stderr);
1881          fprintf(stderr, "\n");
1882          unreachable("emitted ureg insn");
1883       }
1884    }
1885 
1886    /* Set up the if condition for ntr_emit_if(), which we have to do before
1887     * freeing up the temps (the "if" is treated as inside the block for liveness
1888     * purposes, despite not being an instruction)
1889     *
1890     * Note that, while IF and UIF are supposed to look at only .x, virglrenderer
1891     * looks at all of .xyzw.  No harm in working around the bug.
1892     */
1893    nir_if *nif = nir_block_get_following_if(block);
1894    if (nif)
1895       c->if_cond = ureg_scalar(ntr_get_src(c, nif->condition), TGSI_SWIZZLE_X);
1896 }
1897 
1898 static void
ntr_emit_cf_list(struct ntr_compile * c,struct exec_list * list)1899 ntr_emit_cf_list(struct ntr_compile *c, struct exec_list *list)
1900 {
1901    foreach_list_typed (nir_cf_node, node, node, list) {
1902       switch (node->type) {
1903       case nir_cf_node_block:
1904          ntr_emit_block(c, nir_cf_node_as_block(node));
1905          break;
1906 
1907       case nir_cf_node_if:
1908          ntr_emit_if(c, nir_cf_node_as_if(node));
1909          break;
1910 
1911       case nir_cf_node_loop:
1912          ntr_emit_loop(c, nir_cf_node_as_loop(node));
1913          break;
1914 
1915       default:
1916          unreachable("unknown CF type");
1917       }
1918    }
1919 }
1920 
1921 static void
ntr_emit_block_ureg(struct ntr_compile * c,struct nir_block * block)1922 ntr_emit_block_ureg(struct ntr_compile *c, struct nir_block *block)
1923 {
1924    struct ntr_block *ntr_block = ntr_block_from_nir(c, block);
1925 
1926    /* Emit the ntr insns to tgsi_ureg. */
1927    util_dynarray_foreach (&ntr_block->insns, struct ntr_insn, insn) {
1928       const struct tgsi_opcode_info *opcode_info = tgsi_get_opcode_info(insn->opcode);
1929 
1930       switch (insn->opcode) {
1931       case TGSI_OPCODE_IF:
1932          ureg_IF(c->ureg, insn->src[0], &c->cf_label);
1933          break;
1934 
1935       case TGSI_OPCODE_ELSE:
1936          ureg_fixup_label(c->ureg, c->current_if_else, ureg_get_instruction_number(c->ureg));
1937          ureg_ELSE(c->ureg, &c->cf_label);
1938          c->current_if_else = c->cf_label;
1939          break;
1940 
1941       case TGSI_OPCODE_ENDIF:
1942          ureg_fixup_label(c->ureg, c->current_if_else, ureg_get_instruction_number(c->ureg));
1943          ureg_ENDIF(c->ureg);
1944          break;
1945 
1946       case TGSI_OPCODE_BGNLOOP:
1947          /* GLSL-to-TGSI never set the begin/end labels to anything, even though nvfx
1948           * does reference BGNLOOP's.  Follow the former behavior unless something comes up
1949           * with a need.
1950           */
1951          ureg_BGNLOOP(c->ureg, &c->cf_label);
1952          break;
1953 
1954       case TGSI_OPCODE_ENDLOOP:
1955          ureg_ENDLOOP(c->ureg, &c->cf_label);
1956          break;
1957 
1958       default:
1959          if (insn->is_tex) {
1960             int num_offsets = 0;
1961             for (int i = 0; i < ARRAY_SIZE(insn->tex_offset); i++) {
1962                if (insn->tex_offset[i].File != TGSI_FILE_NULL)
1963                   num_offsets = i + 1;
1964             }
1965             ureg_tex_insn(c->ureg, insn->opcode, insn->dst, opcode_info->num_dst, insn->tex_target,
1966                           insn->tex_return_type, insn->tex_offset, num_offsets, insn->src,
1967                           opcode_info->num_src);
1968          } else {
1969             ureg_insn(c->ureg, insn->opcode, insn->dst, opcode_info->num_dst, insn->src,
1970                       opcode_info->num_src, insn->precise);
1971          }
1972       }
1973    }
1974 }
1975 
1976 static void
ntr_emit_if_ureg(struct ntr_compile * c,nir_if * if_stmt)1977 ntr_emit_if_ureg(struct ntr_compile *c, nir_if *if_stmt)
1978 {
1979    /* Note: the last block emitted our IF opcode. */
1980 
1981    int if_stack = c->current_if_else;
1982    c->current_if_else = c->cf_label;
1983 
1984    /* Either the then or else block includes the ENDIF, which will fix up the
1985     * IF(/ELSE)'s label for jumping
1986     */
1987    ntr_emit_cf_list_ureg(c, &if_stmt->then_list);
1988    ntr_emit_cf_list_ureg(c, &if_stmt->else_list);
1989 
1990    c->current_if_else = if_stack;
1991 }
1992 
1993 static void
ntr_emit_cf_list_ureg(struct ntr_compile * c,struct exec_list * list)1994 ntr_emit_cf_list_ureg(struct ntr_compile *c, struct exec_list *list)
1995 {
1996    foreach_list_typed (nir_cf_node, node, node, list) {
1997       switch (node->type) {
1998       case nir_cf_node_block:
1999          ntr_emit_block_ureg(c, nir_cf_node_as_block(node));
2000          break;
2001 
2002       case nir_cf_node_if:
2003          ntr_emit_if_ureg(c, nir_cf_node_as_if(node));
2004          break;
2005 
2006       case nir_cf_node_loop:
2007          /* GLSL-to-TGSI never set the begin/end labels to anything, even though nvfx
2008           * does reference BGNLOOP's.  Follow the former behavior unless something comes up
2009           * with a need.
2010           */
2011          ntr_emit_cf_list_ureg(c, &nir_cf_node_as_loop(node)->body);
2012          break;
2013 
2014       default:
2015          unreachable("unknown CF type");
2016       }
2017    }
2018 }
2019 
2020 static void
ntr_emit_impl(struct ntr_compile * c,nir_function_impl * impl)2021 ntr_emit_impl(struct ntr_compile *c, nir_function_impl *impl)
2022 {
2023    c->impl = impl;
2024 
2025    c->ssa_temp = rzalloc_array(c, struct ureg_src, impl->ssa_alloc);
2026    c->reg_temp = rzalloc_array(c, struct ureg_dst, impl->ssa_alloc);
2027 
2028    /* Set up the struct ntr_blocks to put insns in */
2029    c->blocks = _mesa_pointer_hash_table_create(c);
2030    nir_foreach_block (block, impl) {
2031       struct ntr_block *ntr_block = rzalloc(c->blocks, struct ntr_block);
2032       util_dynarray_init(&ntr_block->insns, ntr_block);
2033       _mesa_hash_table_insert(c->blocks, block, ntr_block);
2034    }
2035 
2036    ntr_setup_registers(c);
2037 
2038    c->cur_block = ntr_block_from_nir(c, nir_start_block(impl));
2039    ntr_setup_inputs(c);
2040    ntr_setup_outputs(c);
2041    ntr_setup_uniforms(c);
2042 
2043    /* Emit the ntr insns */
2044    ntr_emit_cf_list(c, &impl->body);
2045 
2046    if (c->s->info.stage == MESA_SHADER_FRAGMENT)
2047       ntr_allocate_regs(c, impl);
2048    else
2049       ntr_allocate_regs_unoptimized(c, impl);
2050 
2051    /* Turn the ntr insns into actual TGSI tokens */
2052    ntr_emit_cf_list_ureg(c, &impl->body);
2053 
2054    ralloc_free(c->liveness);
2055    c->liveness = NULL;
2056 }
2057 
2058 static int
type_size(const struct glsl_type * type,bool bindless)2059 type_size(const struct glsl_type *type, bool bindless)
2060 {
2061    return glsl_count_attribute_slots(type, false);
2062 }
2063 
2064 /* Allow vectorizing of ALU instructions.
2065  */
2066 static uint8_t
ntr_should_vectorize_instr(const nir_instr * instr,const void * data)2067 ntr_should_vectorize_instr(const nir_instr *instr, const void *data)
2068 {
2069    if (instr->type != nir_instr_type_alu)
2070       return 0;
2071 
2072    return 4;
2073 }
2074 
2075 static bool
ntr_should_vectorize_io(unsigned align,unsigned bit_size,unsigned num_components,unsigned high_offset,nir_intrinsic_instr * low,nir_intrinsic_instr * high,void * data)2076 ntr_should_vectorize_io(unsigned align, unsigned bit_size, unsigned num_components,
2077                         unsigned high_offset, nir_intrinsic_instr *low, nir_intrinsic_instr *high,
2078                         void *data)
2079 {
2080    if (bit_size != 32)
2081       return false;
2082 
2083    /* Our offset alignment should always be at least 4 bytes */
2084    if (align < 4)
2085       return false;
2086 
2087    /* No wrapping off the end of a TGSI reg.  We could do a bit better by
2088     * looking at low's actual offset.  XXX: With LOAD_CONSTBUF maybe we don't
2089     * need this restriction.
2090     */
2091    unsigned worst_start_component = align == 4 ? 3 : align / 4;
2092    if (worst_start_component + num_components > 4)
2093       return false;
2094 
2095    return true;
2096 }
2097 
2098 static nir_variable_mode
ntr_no_indirects_mask(nir_shader * s,struct pipe_screen * screen)2099 ntr_no_indirects_mask(nir_shader *s, struct pipe_screen *screen)
2100 {
2101    unsigned pipe_stage = pipe_shader_type_from_mesa(s->info.stage);
2102    unsigned indirect_mask = nir_var_shader_in | nir_var_shader_out;
2103 
2104    if (!screen->get_shader_param(screen, pipe_stage, PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR)) {
2105       indirect_mask |= nir_var_function_temp;
2106    }
2107 
2108    return indirect_mask;
2109 }
2110 
2111 struct ntr_lower_tex_state {
2112    nir_scalar channels[8];
2113    unsigned i;
2114 };
2115 
2116 static void
nir_to_rc_lower_tex_instr_arg(nir_builder * b,nir_tex_instr * instr,nir_tex_src_type tex_src_type,struct ntr_lower_tex_state * s)2117 nir_to_rc_lower_tex_instr_arg(nir_builder *b, nir_tex_instr *instr, nir_tex_src_type tex_src_type,
2118                               struct ntr_lower_tex_state *s)
2119 {
2120    int tex_src = nir_tex_instr_src_index(instr, tex_src_type);
2121    if (tex_src < 0)
2122       return;
2123 
2124    nir_def *def = instr->src[tex_src].src.ssa;
2125    for (int i = 0; i < def->num_components; i++) {
2126       s->channels[s->i++] = nir_get_scalar(def, i);
2127    }
2128 
2129    nir_tex_instr_remove_src(instr, tex_src);
2130 }
2131 
2132 /**
2133  * Merges together a vec4 of tex coordinate/compare/bias/lod into a backend tex
2134  * src.  This lets NIR handle the coalescing of the vec4 rather than trying to
2135  * manage it on our own, and may lead to more vectorization.
2136  */
2137 static bool
nir_to_rc_lower_tex_instr(nir_builder * b,nir_instr * instr,void * data)2138 nir_to_rc_lower_tex_instr(nir_builder *b, nir_instr *instr, void *data)
2139 {
2140    if (instr->type != nir_instr_type_tex)
2141       return false;
2142 
2143    nir_tex_instr *tex = nir_instr_as_tex(instr);
2144 
2145    if (nir_tex_instr_src_index(tex, nir_tex_src_coord) < 0)
2146       return false;
2147 
2148    b->cursor = nir_before_instr(instr);
2149 
2150    struct ntr_lower_tex_state s = {0};
2151 
2152    nir_to_rc_lower_tex_instr_arg(b, tex, nir_tex_src_coord, &s);
2153    /* We always have at least two slots for the coordinate, even on 1D. */
2154    s.i = MAX2(s.i, 2);
2155 
2156    nir_to_rc_lower_tex_instr_arg(b, tex, nir_tex_src_comparator, &s);
2157    s.i = MAX2(s.i, 3);
2158 
2159    nir_to_rc_lower_tex_instr_arg(b, tex, nir_tex_src_bias, &s);
2160 
2161    /* XXX: LZ */
2162    nir_to_rc_lower_tex_instr_arg(b, tex, nir_tex_src_lod, &s);
2163    nir_to_rc_lower_tex_instr_arg(b, tex, nir_tex_src_projector, &s);
2164    nir_to_rc_lower_tex_instr_arg(b, tex, nir_tex_src_ms_index, &s);
2165 
2166    /* No need to pack undefs in unused channels of the tex instr */
2167    while (!s.channels[s.i - 1].def)
2168       s.i--;
2169 
2170    /* Instead of putting undefs in the unused slots of the vecs, just put in
2171     * another used channel.  Otherwise, we'll get unnecessary moves into
2172     * registers.
2173     */
2174    assert(s.channels[0].def != NULL);
2175    for (int i = 1; i < s.i; i++) {
2176       if (!s.channels[i].def)
2177          s.channels[i] = s.channels[0];
2178    }
2179 
2180    nir_tex_instr_add_src(tex, nir_tex_src_backend1, nir_vec_scalars(b, s.channels, MIN2(s.i, 4)));
2181    if (s.i > 4)
2182       nir_tex_instr_add_src(tex, nir_tex_src_backend2, nir_vec_scalars(b, &s.channels[4], s.i - 4));
2183 
2184    return true;
2185 }
2186 
2187 static bool
nir_to_rc_lower_tex(nir_shader * s)2188 nir_to_rc_lower_tex(nir_shader *s)
2189 {
2190    return nir_shader_instructions_pass(s, nir_to_rc_lower_tex_instr, nir_metadata_control_flow,
2191                                        NULL);
2192 }
2193 
2194 /* Lowers texture projectors if we can't do them as TGSI_OPCODE_TXP. */
2195 static void
nir_to_rc_lower_txp(nir_shader * s)2196 nir_to_rc_lower_txp(nir_shader *s)
2197 {
2198    nir_lower_tex_options lower_tex_options = {
2199       .lower_txp = 0,
2200    };
2201 
2202    nir_foreach_block (block, nir_shader_get_entrypoint(s)) {
2203       nir_foreach_instr (instr, block) {
2204          if (instr->type != nir_instr_type_tex)
2205             continue;
2206          nir_tex_instr *tex = nir_instr_as_tex(instr);
2207 
2208          if (nir_tex_instr_src_index(tex, nir_tex_src_projector) < 0)
2209             continue;
2210 
2211          bool has_compare = nir_tex_instr_src_index(tex, nir_tex_src_comparator) >= 0;
2212          bool has_lod = nir_tex_instr_src_index(tex, nir_tex_src_lod) >= 0 ||
2213                         s->info.stage != MESA_SHADER_FRAGMENT;
2214          bool has_offset = nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0;
2215 
2216          /* We can do TXP for any tex (not txg) where we can fit all the
2217           * coordinates and comparator and projector in one vec4 without any
2218           * other modifiers to add on.
2219           *
2220           * nir_lower_tex() only handles the lowering on a sampler-dim basis, so
2221           * if we get any funny projectors then we just blow them all away.
2222           */
2223          if (tex->op != nir_texop_tex || has_lod || has_offset ||
2224              (tex->coord_components >= 3 && has_compare))
2225             lower_tex_options.lower_txp |= 1 << tex->sampler_dim;
2226       }
2227    }
2228 
2229    /* nir_lower_tex must be run even if no options are set, because we need the
2230     * LOD to be set for query_levels and for non-fragment shaders.
2231     */
2232    NIR_PASS_V(s, nir_lower_tex, &lower_tex_options);
2233 }
2234 
2235 /**
2236  * Translates the NIR shader to TGSI.
2237  *
2238  * This requires some lowering of the NIR shader to prepare it for translation.
2239  * We take ownership of the NIR shader passed, returning a reference to the new
2240  * TGSI tokens instead.  If you need to keep the NIR, then pass us a clone.
2241  */
2242 const void *
nir_to_rc(struct nir_shader * s,struct pipe_screen * screen)2243 nir_to_rc(struct nir_shader *s, struct pipe_screen *screen)
2244 {
2245    struct ntr_compile *c;
2246    const void *tgsi_tokens;
2247    bool is_r500 = r300_screen(screen)->caps.is_r500;
2248    c = rzalloc(NULL, struct ntr_compile);
2249    c->screen = screen;
2250    c->lower_fabs = !is_r500 && s->info.stage == MESA_SHADER_VERTEX;
2251 
2252    if (s->info.stage == MESA_SHADER_FRAGMENT) {
2253       if (is_r500) {
2254          NIR_PASS_V(s, r300_transform_fs_trig_input);
2255       }
2256    } else if (r300_screen(screen)->caps.has_tcl) {
2257       if (is_r500) {
2258          /* Only nine should set both NTT shader name and
2259           * use_legacy_math_rules and D3D9 already mandates
2260           * the proper range for the trigonometric inputs.
2261           */
2262          if (!s->info.use_legacy_math_rules || !(s->info.name && !strcmp("TTN", s->info.name))) {
2263             NIR_PASS_V(s, r300_transform_vs_trig_input);
2264          }
2265       } else {
2266          if (r300_screen(screen)->caps.is_r400) {
2267             NIR_PASS_V(s, r300_transform_vs_trig_input);
2268          }
2269       }
2270    }
2271 
2272    /* Lower array indexing on FS inputs.  Since we don't set
2273     * ureg->supports_any_inout_decl_range, the TGSI input decls will be split to
2274     * elements by ureg, and so dynamically indexing them would be invalid.
2275     * Ideally we would set that ureg flag based on
2276     * PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE, but can't due to mesa/st
2277     * splitting NIR VS outputs to elements even if the FS doesn't get the
2278     * corresponding splitting, and virgl depends on TGSI across link boundaries
2279     * having matching declarations.
2280     */
2281    if (s->info.stage == MESA_SHADER_FRAGMENT) {
2282       NIR_PASS_V(s, nir_lower_indirect_derefs, nir_var_shader_in, UINT32_MAX);
2283       NIR_PASS_V(s, nir_remove_dead_variables, nir_var_shader_in, NULL);
2284    }
2285 
2286    NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size,
2287               nir_lower_io_use_interpolated_input_intrinsics);
2288 
2289    nir_to_rc_lower_txp(s);
2290    NIR_PASS_V(s, nir_to_rc_lower_tex);
2291 
2292    bool progress;
2293    do {
2294       progress = false;
2295       NIR_PASS(progress, s, nir_opt_algebraic);
2296       NIR_PASS(progress, s, nir_opt_constant_folding);
2297    } while (progress);
2298 
2299    do {
2300       progress = false;
2301       NIR_PASS(progress, s, nir_opt_algebraic_late);
2302       if (progress) {
2303          NIR_PASS_V(s, nir_copy_prop);
2304          NIR_PASS_V(s, nir_opt_dce);
2305          NIR_PASS_V(s, nir_opt_cse);
2306       }
2307    } while (progress);
2308 
2309    if (s->info.stage == MESA_SHADER_FRAGMENT) {
2310       NIR_PASS_V(s, r300_nir_prepare_presubtract);
2311    }
2312 
2313    NIR_PASS_V(s, nir_lower_int_to_float);
2314    NIR_PASS_V(s, nir_copy_prop);
2315    NIR_PASS_V(s, r300_nir_post_integer_lowering);
2316    NIR_PASS_V(s, nir_lower_bool_to_float, is_r500 || s->info.stage == MESA_SHADER_FRAGMENT);
2317    /* bool_to_float generates MOVs for b2f32 that we want to clean up. */
2318    NIR_PASS_V(s, nir_copy_prop);
2319    /* CSE cleanup after late ftrunc lowering. */
2320    NIR_PASS_V(s, nir_opt_cse);
2321    /* At this point we need to clean;
2322     *  a) fcsel_gt that come from the ftrunc lowering on R300,
2323     *  b) all flavours of fcsels that read three different temp sources on R500.
2324     */
2325    if (s->info.stage == MESA_SHADER_VERTEX) {
2326       if (is_r500)
2327          NIR_PASS_V(s, r300_nir_lower_fcsel_r500);
2328       else
2329          NIR_PASS_V(s, r300_nir_lower_fcsel_r300);
2330       NIR_PASS_V(s, r300_nir_lower_flrp);
2331    } else {
2332       NIR_PASS_V(s, r300_nir_lower_comparison_fs);
2333    }
2334    NIR_PASS_V(s, r300_nir_opt_algebraic_late);
2335    NIR_PASS_V(s, nir_opt_dce);
2336 
2337    nir_move_options move_all = nir_move_const_undef | nir_move_load_ubo | nir_move_load_input |
2338                                nir_move_comparisons | nir_move_copies | nir_move_load_ssbo;
2339 
2340    NIR_PASS_V(s, nir_opt_move, move_all);
2341    NIR_PASS_V(s, nir_move_vec_src_uses_to_dest, true);
2342    /* Late vectorizing after nir_move_vec_src_uses_to_dest helps instructions but
2343     * increases register usage. Testing shows this is beneficial only in VS.
2344     */
2345    if (s->info.stage == MESA_SHADER_VERTEX)
2346       NIR_PASS_V(s, nir_opt_vectorize, ntr_should_vectorize_instr, NULL);
2347 
2348    NIR_PASS_V(s, nir_convert_from_ssa, true);
2349    NIR_PASS_V(s, nir_lower_vec_to_regs, NULL, NULL);
2350 
2351    /* locals_to_reg_intrinsics will leave dead derefs that are good to clean up.
2352     */
2353    NIR_PASS_V(s, nir_lower_locals_to_regs, 32);
2354    NIR_PASS_V(s, nir_opt_dce);
2355 
2356    /* See comment in ntr_get_alu_src for supported modifiers */
2357    NIR_PASS_V(s, nir_legacy_trivialize, !c->lower_fabs);
2358 
2359    if (NIR_DEBUG(TGSI)) {
2360       fprintf(stderr, "NIR before translation to TGSI:\n");
2361       nir_print_shader(s, stderr);
2362    }
2363 
2364    c->s = s;
2365    c->ureg = ureg_create(pipe_shader_type_from_mesa(s->info.stage));
2366    ureg_setup_shader_info(c->ureg, &s->info);
2367    if (s->info.use_legacy_math_rules && screen->caps.legacy_math_rules)
2368       ureg_property(c->ureg, TGSI_PROPERTY_LEGACY_MATH_RULES, 1);
2369 
2370    if (s->info.stage == MESA_SHADER_FRAGMENT) {
2371       /* The draw module's polygon stipple layer doesn't respect the chosen
2372        * coordinate mode, so leave it as unspecified unless we're actually
2373        * reading the position in the shader already.  See
2374        * gl-2.1-polygon-stipple-fs on softpipe.
2375        */
2376       if ((s->info.inputs_read & VARYING_BIT_POS) ||
2377           BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRAG_COORD)) {
2378          ureg_property(c->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN,
2379                        s->info.fs.origin_upper_left ? TGSI_FS_COORD_ORIGIN_UPPER_LEFT
2380                                                     : TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
2381 
2382          ureg_property(c->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
2383                        s->info.fs.pixel_center_integer ? TGSI_FS_COORD_PIXEL_CENTER_INTEGER
2384                                                        : TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER);
2385       }
2386    }
2387    /* Emit the main function */
2388    nir_function_impl *impl = nir_shader_get_entrypoint(c->s);
2389    ntr_emit_impl(c, impl);
2390    ureg_END(c->ureg);
2391 
2392    tgsi_tokens = ureg_get_tokens(c->ureg, NULL);
2393 
2394    if (NIR_DEBUG(TGSI)) {
2395       fprintf(stderr, "TGSI after translation from NIR:\n");
2396       tgsi_dump(tgsi_tokens, 0);
2397    }
2398 
2399    ureg_destroy(c->ureg);
2400 
2401    ralloc_free(c);
2402    ralloc_free(s);
2403 
2404    return tgsi_tokens;
2405 }
2406