• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Jonathan Marek <jonathan@marek.ca>
25  */
26 
27 #include "ir2_private.h"
28 
29 #include "fd2_program.h"
30 #include "freedreno_util.h"
31 #include "nir_legacy.h"
32 
33 static const nir_shader_compiler_options options = {
34    .lower_fpow = true,
35    .lower_flrp32 = true,
36    .lower_fmod = true,
37    .lower_fdiv = true,
38    .lower_fceil = true,
39    .fuse_ffma16 = true,
40    .fuse_ffma32 = true,
41    .fuse_ffma64 = true,
42    /* .fdot_replicates = true, it is replicated, but it makes things worse */
43    .lower_all_io_to_temps = true,
44    .vertex_id_zero_based = true, /* its not implemented anyway */
45    .lower_bitops = true,
46    .lower_vector_cmp = true,
47    .lower_fdph = true,
48    .has_fsub = true,
49    .has_isub = true,
50    .lower_insert_byte = true,
51    .lower_insert_word = true,
52    .force_indirect_unrolling = nir_var_all,
53    .force_indirect_unrolling_sampler = true,
54    .max_unroll_iterations = 32,
55 };
56 
57 const nir_shader_compiler_options *
ir2_get_compiler_options(void)58 ir2_get_compiler_options(void)
59 {
60    return &options;
61 }
62 
63 #define OPT(nir, pass, ...)                                                    \
64    ({                                                                          \
65       bool this_progress = false;                                              \
66       NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__);                       \
67       this_progress;                                                           \
68    })
69 #define OPT_V(nir, pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
70 
71 static void
ir2_optimize_loop(nir_shader * s)72 ir2_optimize_loop(nir_shader *s)
73 {
74    bool progress;
75    do {
76       progress = false;
77 
78       OPT_V(s, nir_lower_vars_to_ssa);
79       progress |= OPT(s, nir_opt_copy_prop_vars);
80       progress |= OPT(s, nir_copy_prop);
81       progress |= OPT(s, nir_opt_dce);
82       progress |= OPT(s, nir_opt_cse);
83       /* progress |= OPT(s, nir_opt_gcm, true); */
84       progress |= OPT(s, nir_opt_peephole_select, UINT_MAX, true, true);
85       progress |= OPT(s, nir_opt_intrinsics);
86       progress |= OPT(s, nir_opt_algebraic);
87       progress |= OPT(s, nir_opt_constant_folding);
88       progress |= OPT(s, nir_opt_dead_cf);
89       if (OPT(s, nir_opt_loop)) {
90          progress |= true;
91          /* If nir_opt_loop makes progress, then we need to clean
92           * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
93           * to make progress.
94           */
95          OPT(s, nir_copy_prop);
96          OPT(s, nir_opt_dce);
97       }
98       progress |= OPT(s, nir_opt_loop_unroll);
99       progress |= OPT(s, nir_opt_if, nir_opt_if_optimize_phi_true_false);
100       progress |= OPT(s, nir_opt_remove_phis);
101       progress |= OPT(s, nir_opt_undef);
102 
103    } while (progress);
104 }
105 
106 /* trig workarounds is the same as ir3.. but we don't want to include ir3 */
107 bool ir3_nir_apply_trig_workarounds(nir_shader *shader);
108 
109 int
ir2_optimize_nir(nir_shader * s,bool lower)110 ir2_optimize_nir(nir_shader *s, bool lower)
111 {
112    struct nir_lower_tex_options tex_options = {
113       .lower_txp = ~0u,
114       .lower_rect = 0,
115       .lower_invalid_implicit_lod = true,
116    };
117 
118    if (FD_DBG(DISASM)) {
119       debug_printf("----------------------\n");
120       nir_print_shader(s, stdout);
121       debug_printf("----------------------\n");
122    }
123 
124    OPT_V(s, nir_lower_vars_to_ssa);
125    OPT_V(s, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out,
126          UINT32_MAX);
127 
128    if (lower) {
129       OPT_V(s, ir3_nir_apply_trig_workarounds);
130       OPT_V(s, nir_lower_tex, &tex_options);
131    }
132 
133    ir2_optimize_loop(s);
134 
135    OPT_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
136    OPT_V(s, nir_opt_sink, nir_move_const_undef);
137 
138    /* TODO we dont want to get shaders writing to depth for depth textures */
139    if (s->info.stage == MESA_SHADER_FRAGMENT) {
140       nir_foreach_shader_out_variable (var, s) {
141          if (var->data.location == FRAG_RESULT_DEPTH)
142             return -1;
143       }
144    }
145 
146    return 0;
147 }
148 
149 static struct ir2_src
load_const(struct ir2_context * ctx,float * value_f,unsigned ncomp)150 load_const(struct ir2_context *ctx, float *value_f, unsigned ncomp)
151 {
152    struct fd2_shader_stateobj *so = ctx->so;
153    unsigned imm_ncomp, swiz, idx, i, j;
154    uint32_t *value = (uint32_t *)value_f;
155 
156    /* try to merge with existing immediate (TODO: try with neg) */
157    for (idx = 0; idx < so->num_immediates; idx++) {
158       swiz = 0;
159       imm_ncomp = so->immediates[idx].ncomp;
160       for (i = 0; i < ncomp; i++) {
161          for (j = 0; j < imm_ncomp; j++) {
162             if (value[i] == so->immediates[idx].val[j])
163                break;
164          }
165          if (j == imm_ncomp) {
166             if (j == 4)
167                break;
168             so->immediates[idx].val[imm_ncomp++] = value[i];
169          }
170          swiz |= swiz_set(j, i);
171       }
172       /* matched all components */
173       if (i == ncomp)
174          break;
175    }
176 
177    /* need to allocate new immediate */
178    if (idx == so->num_immediates) {
179       swiz = 0;
180       imm_ncomp = 0;
181       for (i = 0; i < ncomp; i++) {
182          for (j = 0; j < imm_ncomp; j++) {
183             if (value[i] == ctx->so->immediates[idx].val[j])
184                break;
185          }
186          if (j == imm_ncomp) {
187             so->immediates[idx].val[imm_ncomp++] = value[i];
188          }
189          swiz |= swiz_set(j, i);
190       }
191       so->num_immediates++;
192    }
193    so->immediates[idx].ncomp = imm_ncomp;
194 
195    if (ncomp == 1)
196       swiz = swiz_merge(swiz, IR2_SWIZZLE_XXXX);
197 
198    return ir2_src(so->first_immediate + idx, swiz, IR2_SRC_CONST);
199 }
200 
201 struct ir2_src
ir2_zero(struct ir2_context * ctx)202 ir2_zero(struct ir2_context *ctx)
203 {
204    return load_const(ctx, (float[]){0.0f}, 1);
205 }
206 
207 static void
update_range(struct ir2_context * ctx,struct ir2_reg * reg)208 update_range(struct ir2_context *ctx, struct ir2_reg *reg)
209 {
210    if (!reg->initialized) {
211       reg->initialized = true;
212       reg->loop_depth = ctx->loop_depth;
213    }
214 
215    if (ctx->loop_depth > reg->loop_depth) {
216       reg->block_idx_free = ctx->loop_last_block[reg->loop_depth + 1];
217    } else {
218       reg->loop_depth = ctx->loop_depth;
219       reg->block_idx_free = -1;
220    }
221 
222    /* for regs we want to free at the end of the loop in any case
223     * XXX dont do this for ssa
224     */
225    if (reg->loop_depth)
226       reg->block_idx_free = ctx->loop_last_block[reg->loop_depth];
227 }
228 
229 static struct ir2_src
make_legacy_src(struct ir2_context * ctx,nir_legacy_src src)230 make_legacy_src(struct ir2_context *ctx, nir_legacy_src src)
231 {
232    struct ir2_src res = {};
233    struct ir2_reg *reg;
234 
235    /* Handle constants specially */
236    if (src.is_ssa) {
237       nir_const_value *const_value =
238          nir_src_as_const_value(nir_src_for_ssa(src.ssa));
239 
240       if (const_value) {
241          float c[src.ssa->num_components];
242          nir_const_value_to_array(c, const_value, src.ssa->num_components, f32);
243          return load_const(ctx, c, src.ssa->num_components);
244       }
245    }
246 
247    /* Otherwise translate the SSA def or register */
248    if (!src.is_ssa) {
249       res.num = src.reg.handle->index;
250       res.type = IR2_SRC_REG;
251       reg = &ctx->reg[res.num];
252    } else {
253       assert(ctx->ssa_map[src.ssa->index] >= 0);
254       res.num = ctx->ssa_map[src.ssa->index];
255       res.type = IR2_SRC_SSA;
256       reg = &ctx->instr[res.num].ssa;
257    }
258 
259    update_range(ctx, reg);
260    return res;
261 }
262 
263 static struct ir2_src
make_src(struct ir2_context * ctx,nir_src src)264 make_src(struct ir2_context *ctx, nir_src src)
265 {
266    return make_legacy_src(ctx, nir_legacy_chase_src(&src));
267 }
268 
269 static void
set_legacy_index(struct ir2_context * ctx,nir_legacy_dest dst,struct ir2_instr * instr)270 set_legacy_index(struct ir2_context *ctx, nir_legacy_dest dst,
271                  struct ir2_instr *instr)
272 {
273    struct ir2_reg *reg = &instr->ssa;
274 
275    if (dst.is_ssa) {
276       ctx->ssa_map[dst.ssa->index] = instr->idx;
277    } else {
278       reg = &ctx->reg[dst.reg.handle->index];
279 
280       instr->is_ssa = false;
281       instr->reg = reg;
282    }
283    update_range(ctx, reg);
284 }
285 
286 static void
set_index(struct ir2_context * ctx,nir_def * def,struct ir2_instr * instr)287 set_index(struct ir2_context *ctx, nir_def *def, struct ir2_instr *instr)
288 {
289    set_legacy_index(ctx, nir_legacy_chase_dest(def), instr);
290 }
291 
292 static struct ir2_instr *
ir2_instr_create(struct ir2_context * ctx,int type)293 ir2_instr_create(struct ir2_context *ctx, int type)
294 {
295    struct ir2_instr *instr;
296 
297    instr = &ctx->instr[ctx->instr_count++];
298    instr->idx = ctx->instr_count - 1;
299    instr->type = type;
300    instr->block_idx = ctx->block_idx;
301    instr->pred = ctx->pred;
302    instr->is_ssa = true;
303    return instr;
304 }
305 
306 static struct ir2_instr *
instr_create_alu(struct ir2_context * ctx,nir_op opcode,unsigned ncomp)307 instr_create_alu(struct ir2_context *ctx, nir_op opcode, unsigned ncomp)
308 {
309    /* emit_alu will fixup instrs that don't map directly */
310    static const struct ir2_opc {
311       int8_t scalar, vector;
312    } nir_ir2_opc[nir_num_opcodes + 1] = {
313       [0 ... nir_num_opcodes - 1] = {-1, -1},
314 
315       [nir_op_mov] = {MAXs, MAXv},
316       [nir_op_fneg] = {MAXs, MAXv},
317       [nir_op_fabs] = {MAXs, MAXv},
318       [nir_op_fsat] = {MAXs, MAXv},
319       [nir_op_fsign] = {-1, CNDGTEv},
320       [nir_op_fadd] = {ADDs, ADDv},
321       [nir_op_fsub] = {ADDs, ADDv},
322       [nir_op_fmul] = {MULs, MULv},
323       [nir_op_ffma] = {-1, MULADDv},
324       [nir_op_fmax] = {MAXs, MAXv},
325       [nir_op_fmin] = {MINs, MINv},
326       [nir_op_ffloor] = {FLOORs, FLOORv},
327       [nir_op_ffract] = {FRACs, FRACv},
328       [nir_op_ftrunc] = {TRUNCs, TRUNCv},
329       [nir_op_fdot2] = {-1, DOT2ADDv},
330       [nir_op_fdot3] = {-1, DOT3v},
331       [nir_op_fdot4] = {-1, DOT4v},
332       [nir_op_sge] = {-1, SETGTEv},
333       [nir_op_slt] = {-1, SETGTv},
334       [nir_op_sne] = {-1, SETNEv},
335       [nir_op_seq] = {-1, SETEv},
336       [nir_op_fcsel] = {-1, CNDEv},
337       [nir_op_frsq] = {RECIPSQ_IEEE, -1},
338       [nir_op_frcp] = {RECIP_IEEE, -1},
339       [nir_op_flog2] = {LOG_IEEE, -1},
340       [nir_op_fexp2] = {EXP_IEEE, -1},
341       [nir_op_fsqrt] = {SQRT_IEEE, -1},
342       [nir_op_fcos] = {COS, -1},
343       [nir_op_fsin] = {SIN, -1},
344    /* no fsat, fneg, fabs since source mods deal with those */
345 
346    /* so we can use this function with non-nir op */
347 #define ir2_op_cube nir_num_opcodes
348       [ir2_op_cube] = {-1, CUBEv},
349    };
350 
351    struct ir2_opc op = nir_ir2_opc[opcode];
352    assert(op.vector >= 0 || op.scalar >= 0);
353 
354    struct ir2_instr *instr = ir2_instr_create(ctx, IR2_ALU);
355    instr->alu.vector_opc = op.vector;
356    instr->alu.scalar_opc = op.scalar;
357    instr->alu.export = -1;
358    instr->alu.write_mask = (1 << ncomp) - 1;
359    instr->src_count =
360       opcode == ir2_op_cube ? 2 : nir_op_infos[opcode].num_inputs;
361    instr->ssa.ncomp = ncomp;
362    return instr;
363 }
364 
365 static struct ir2_instr *
instr_create_alu_reg(struct ir2_context * ctx,nir_op opcode,uint8_t write_mask,struct ir2_instr * share_reg)366 instr_create_alu_reg(struct ir2_context *ctx, nir_op opcode, uint8_t write_mask,
367                      struct ir2_instr *share_reg)
368 {
369    struct ir2_instr *instr;
370    struct ir2_reg *reg;
371 
372    reg = share_reg ? share_reg->reg : &ctx->reg[ctx->reg_count++];
373    reg->ncomp = MAX2(reg->ncomp, util_logbase2(write_mask) + 1);
374 
375    instr = instr_create_alu(ctx, opcode, util_bitcount(write_mask));
376    instr->alu.write_mask = write_mask;
377    instr->reg = reg;
378    instr->is_ssa = false;
379    return instr;
380 }
381 
382 static struct ir2_instr *
instr_create_alu_dest(struct ir2_context * ctx,nir_op opcode,nir_def * def)383 instr_create_alu_dest(struct ir2_context *ctx, nir_op opcode, nir_def *def)
384 {
385    struct ir2_instr *instr;
386    instr = instr_create_alu(ctx, opcode, def->num_components);
387    set_index(ctx, def, instr);
388    return instr;
389 }
390 
391 static struct ir2_instr *
ir2_instr_create_fetch(struct ir2_context * ctx,nir_def * def,instr_fetch_opc_t opc)392 ir2_instr_create_fetch(struct ir2_context *ctx, nir_def *def,
393                        instr_fetch_opc_t opc)
394 {
395    struct ir2_instr *instr = ir2_instr_create(ctx, IR2_FETCH);
396    instr->fetch.opc = opc;
397    instr->src_count = 1;
398    instr->ssa.ncomp = def->num_components;
399    set_index(ctx, def, instr);
400    return instr;
401 }
402 
403 static struct ir2_src
make_src_noconst(struct ir2_context * ctx,nir_src src)404 make_src_noconst(struct ir2_context *ctx, nir_src src)
405 {
406    struct ir2_instr *instr;
407 
408    if (nir_src_as_const_value(src)) {
409       instr = instr_create_alu(ctx, nir_op_mov, src.ssa->num_components);
410       instr->src[0] = make_src(ctx, src);
411       return ir2_src(instr->idx, 0, IR2_SRC_SSA);
412    }
413 
414    return make_src(ctx, src);
415 }
416 
417 static void
emit_alu(struct ir2_context * ctx,nir_alu_instr * alu)418 emit_alu(struct ir2_context *ctx, nir_alu_instr *alu)
419 {
420    const nir_op_info *info = &nir_op_infos[alu->op];
421    nir_def *def = &alu->def;
422    struct ir2_instr *instr;
423    struct ir2_src tmp;
424    unsigned ncomp;
425 
426    /* Don't emit modifiers that are totally folded */
427    if (((alu->op == nir_op_fneg) || (alu->op == nir_op_fabs)) &&
428        nir_legacy_float_mod_folds(alu))
429       return;
430 
431    if ((alu->op == nir_op_fsat) && nir_legacy_fsat_folds(alu))
432       return;
433 
434    /* get the number of dst components */
435    ncomp = def->num_components;
436 
437    instr = instr_create_alu(ctx, alu->op, ncomp);
438 
439    nir_legacy_alu_dest legacy_dest =
440       nir_legacy_chase_alu_dest(&alu->def);
441    set_legacy_index(ctx, legacy_dest.dest, instr);
442    instr->alu.saturate = legacy_dest.fsat;
443    instr->alu.write_mask = legacy_dest.write_mask;
444 
445    for (int i = 0; i < info->num_inputs; i++) {
446       nir_alu_src *src = &alu->src[i];
447 
448       /* compress swizzle with writemask when applicable */
449       unsigned swiz = 0, j = 0;
450       for (int i = 0; i < 4; i++) {
451          if (!(legacy_dest.write_mask & 1 << i) && !info->output_size)
452             continue;
453          swiz |= swiz_set(src->swizzle[i], j++);
454       }
455 
456       nir_legacy_alu_src legacy_src =
457          nir_legacy_chase_alu_src(src, true /* fuse_abs */);
458 
459       instr->src[i] = make_legacy_src(ctx, legacy_src.src);
460       instr->src[i].swizzle = swiz_merge(instr->src[i].swizzle, swiz);
461       instr->src[i].negate = legacy_src.fneg;
462       instr->src[i].abs = legacy_src.fabs;
463    }
464 
465    /* workarounds for NIR ops that don't map directly to a2xx ops */
466    switch (alu->op) {
467    case nir_op_fneg:
468       instr->src[0].negate = 1;
469       break;
470    case nir_op_fabs:
471       instr->src[0].abs = 1;
472       break;
473    case nir_op_fsat:
474       instr->alu.saturate = 1;
475       break;
476    case nir_op_slt:
477       tmp = instr->src[0];
478       instr->src[0] = instr->src[1];
479       instr->src[1] = tmp;
480       break;
481    case nir_op_fcsel:
482       tmp = instr->src[1];
483       instr->src[1] = instr->src[2];
484       instr->src[2] = tmp;
485       break;
486    case nir_op_fsub:
487       instr->src[1].negate = !instr->src[1].negate;
488       break;
489    case nir_op_fdot2:
490       instr->src_count = 3;
491       instr->src[2] = ir2_zero(ctx);
492       break;
493    case nir_op_fsign: {
494       /* we need an extra instruction to deal with the zero case */
495       struct ir2_instr *tmp;
496 
497       /* tmp = x == 0 ? 0 : 1 */
498       tmp = instr_create_alu(ctx, nir_op_fcsel, ncomp);
499       tmp->src[0] = instr->src[0];
500       tmp->src[1] = ir2_zero(ctx);
501       tmp->src[2] = load_const(ctx, (float[]){1.0f}, 1);
502 
503       /* result = x >= 0 ? tmp : -tmp */
504       instr->src[1] = ir2_src(tmp->idx, 0, IR2_SRC_SSA);
505       instr->src[2] = instr->src[1];
506       instr->src[2].negate = true;
507       instr->src_count = 3;
508    } break;
509    default:
510       break;
511    }
512 }
513 
514 static void
load_input(struct ir2_context * ctx,nir_def * def,unsigned idx)515 load_input(struct ir2_context *ctx, nir_def *def, unsigned idx)
516 {
517    struct ir2_instr *instr;
518    int slot = -1;
519 
520    if (ctx->so->type == MESA_SHADER_VERTEX) {
521       instr = ir2_instr_create_fetch(ctx, def, 0);
522       instr->src[0] = ir2_src(0, 0, IR2_SRC_INPUT);
523       instr->fetch.vtx.const_idx = 20 + (idx / 3);
524       instr->fetch.vtx.const_idx_sel = idx % 3;
525       return;
526    }
527 
528    /* get slot from idx */
529    nir_foreach_shader_in_variable (var, ctx->nir) {
530       if (var->data.driver_location == idx) {
531          slot = var->data.location;
532          break;
533       }
534    }
535    assert(slot >= 0);
536 
537    switch (slot) {
538    case VARYING_SLOT_POS:
539       /* need to extract xy with abs and add tile offset on a20x
540        * zw from fragcoord input (w inverted in fragment shader)
541        * TODO: only components that are required by fragment shader
542        */
543       instr = instr_create_alu_reg(
544          ctx, ctx->so->is_a20x ? nir_op_fadd : nir_op_mov, 3, NULL);
545       instr->src[0] = ir2_src(ctx->f->inputs_count, 0, IR2_SRC_INPUT);
546       instr->src[0].abs = true;
547       /* on a20x, C64 contains the tile offset */
548       instr->src[1] = ir2_src(64, 0, IR2_SRC_CONST);
549 
550       instr = instr_create_alu_reg(ctx, nir_op_mov, 4, instr);
551       instr->src[0] = ir2_src(ctx->f->fragcoord, 0, IR2_SRC_INPUT);
552 
553       instr = instr_create_alu_reg(ctx, nir_op_frcp, 8, instr);
554       instr->src[0] = ir2_src(ctx->f->fragcoord, IR2_SWIZZLE_Y, IR2_SRC_INPUT);
555 
556       unsigned reg_idx = instr->reg - ctx->reg; /* XXX */
557       instr = instr_create_alu_dest(ctx, nir_op_mov, def);
558       instr->src[0] = ir2_src(reg_idx, 0, IR2_SRC_REG);
559       break;
560    default:
561       instr = instr_create_alu_dest(ctx, nir_op_mov, def);
562       instr->src[0] = ir2_src(idx, 0, IR2_SRC_INPUT);
563       break;
564    }
565 }
566 
567 static unsigned
output_slot(struct ir2_context * ctx,nir_intrinsic_instr * intr)568 output_slot(struct ir2_context *ctx, nir_intrinsic_instr *intr)
569 {
570    int slot = -1;
571    unsigned idx = nir_intrinsic_base(intr);
572    nir_foreach_shader_out_variable (var, ctx->nir) {
573       if (var->data.driver_location == idx) {
574          slot = var->data.location;
575          break;
576       }
577    }
578    assert(slot != -1);
579    return slot;
580 }
581 
582 static void
store_output(struct ir2_context * ctx,nir_src src,unsigned slot,unsigned ncomp)583 store_output(struct ir2_context *ctx, nir_src src, unsigned slot,
584              unsigned ncomp)
585 {
586    struct ir2_instr *instr;
587    unsigned idx = 0;
588 
589    if (ctx->so->type == MESA_SHADER_VERTEX) {
590       switch (slot) {
591       case VARYING_SLOT_POS:
592          ctx->position = make_src(ctx, src);
593          idx = 62;
594          break;
595       case VARYING_SLOT_PSIZ:
596          ctx->so->writes_psize = true;
597          idx = 63;
598          break;
599       default:
600          /* find matching slot from fragment shader input */
601          for (idx = 0; idx < ctx->f->inputs_count; idx++)
602             if (ctx->f->inputs[idx].slot == slot)
603                break;
604          if (idx == ctx->f->inputs_count)
605             return;
606       }
607    } else if (slot != FRAG_RESULT_COLOR && slot != FRAG_RESULT_DATA0) {
608       /* only color output is implemented */
609       return;
610    }
611 
612    instr = instr_create_alu(ctx, nir_op_mov, ncomp);
613    instr->src[0] = make_src(ctx, src);
614    instr->alu.export = idx;
615 }
616 
617 static void
emit_intrinsic(struct ir2_context * ctx,nir_intrinsic_instr * intr)618 emit_intrinsic(struct ir2_context *ctx, nir_intrinsic_instr *intr)
619 {
620    struct ir2_instr *instr;
621    ASSERTED nir_const_value *const_offset;
622    unsigned idx;
623 
624    switch (intr->intrinsic) {
625    case nir_intrinsic_decl_reg:
626    case nir_intrinsic_load_reg:
627    case nir_intrinsic_store_reg:
628       /* Nothing to do for these */
629       break;
630 
631    case nir_intrinsic_load_input:
632       load_input(ctx, &intr->def, nir_intrinsic_base(intr));
633       break;
634    case nir_intrinsic_store_output:
635       store_output(ctx, intr->src[0], output_slot(ctx, intr),
636                    intr->num_components);
637       break;
638    case nir_intrinsic_load_uniform:
639       const_offset = nir_src_as_const_value(intr->src[0]);
640       assert(const_offset); /* TODO can be false in ES2? */
641       idx = nir_intrinsic_base(intr);
642       idx += (uint32_t)const_offset[0].f32;
643       instr = instr_create_alu_dest(ctx, nir_op_mov, &intr->def);
644       instr->src[0] = ir2_src(idx, 0, IR2_SRC_CONST);
645       break;
646    case nir_intrinsic_discard:
647    case nir_intrinsic_discard_if:
648       instr = ir2_instr_create(ctx, IR2_ALU);
649       instr->alu.vector_opc = VECTOR_NONE;
650       if (intr->intrinsic == nir_intrinsic_discard_if) {
651          instr->alu.scalar_opc = KILLNEs;
652          instr->src[0] = make_src(ctx, intr->src[0]);
653       } else {
654          instr->alu.scalar_opc = KILLEs;
655          instr->src[0] = ir2_zero(ctx);
656       }
657       instr->alu.export = -1;
658       instr->src_count = 1;
659       ctx->so->has_kill = true;
660       break;
661    case nir_intrinsic_load_front_face:
662       /* gl_FrontFacing is in the sign of param.x
663        * rcp required because otherwise we can't differentiate -0.0 and +0.0
664        */
665       ctx->so->need_param = true;
666 
667       struct ir2_instr *tmp = instr_create_alu(ctx, nir_op_frcp, 1);
668       tmp->src[0] = ir2_src(ctx->f->inputs_count, 0, IR2_SRC_INPUT);
669 
670       instr = instr_create_alu_dest(ctx, nir_op_sge, &intr->def);
671       instr->src[0] = ir2_src(tmp->idx, 0, IR2_SRC_SSA);
672       instr->src[1] = ir2_zero(ctx);
673       break;
674    case nir_intrinsic_load_point_coord:
675       /* param.zw (note: abs might be needed like fragcoord in param.xy?) */
676       ctx->so->need_param = true;
677 
678       instr = instr_create_alu_dest(ctx, nir_op_mov, &intr->def);
679       instr->src[0] =
680          ir2_src(ctx->f->inputs_count, IR2_SWIZZLE_ZW, IR2_SRC_INPUT);
681       break;
682    default:
683       compile_error(ctx, "unimplemented intr %d\n", intr->intrinsic);
684       break;
685    }
686 }
687 
688 static void
emit_tex(struct ir2_context * ctx,nir_tex_instr * tex)689 emit_tex(struct ir2_context *ctx, nir_tex_instr *tex)
690 {
691    bool is_rect = false, is_cube = false;
692    struct ir2_instr *instr;
693    nir_src *coord, *lod_bias;
694 
695    coord = lod_bias = NULL;
696 
697    for (unsigned i = 0; i < tex->num_srcs; i++) {
698       switch (tex->src[i].src_type) {
699       case nir_tex_src_coord:
700          coord = &tex->src[i].src;
701          break;
702       case nir_tex_src_bias:
703       case nir_tex_src_lod:
704          assert(!lod_bias);
705          lod_bias = &tex->src[i].src;
706          break;
707       default:
708          compile_error(ctx, "Unhandled NIR tex src type: %d\n",
709                        tex->src[i].src_type);
710          return;
711       }
712    }
713 
714    switch (tex->op) {
715    case nir_texop_tex:
716    case nir_texop_txb:
717    case nir_texop_txl:
718       break;
719    default:
720       compile_error(ctx, "unimplemented texop %d\n", tex->op);
721       return;
722    }
723 
724    switch (tex->sampler_dim) {
725    case GLSL_SAMPLER_DIM_2D:
726    case GLSL_SAMPLER_DIM_EXTERNAL:
727       break;
728    case GLSL_SAMPLER_DIM_RECT:
729       is_rect = true;
730       break;
731    case GLSL_SAMPLER_DIM_CUBE:
732       is_cube = true;
733       break;
734    default:
735       compile_error(ctx, "unimplemented sampler %d\n", tex->sampler_dim);
736       return;
737    }
738 
739    struct ir2_src src_coord = make_src_noconst(ctx, *coord);
740 
741    /* for cube maps
742     * tmp = cube(coord)
743     * tmp.xy = tmp.xy / |tmp.z| + 1.5
744     * coord = tmp.xyw
745     */
746    if (is_cube) {
747       struct ir2_instr *rcp, *coord_xy;
748       unsigned reg_idx;
749 
750       instr = instr_create_alu_reg(ctx, ir2_op_cube, 15, NULL);
751       instr->src[0] = src_coord;
752       instr->src[0].swizzle = IR2_SWIZZLE_ZZXY;
753       instr->src[1] = src_coord;
754       instr->src[1].swizzle = IR2_SWIZZLE_YXZZ;
755 
756       reg_idx = instr->reg - ctx->reg; /* hacky */
757 
758       rcp = instr_create_alu(ctx, nir_op_frcp, 1);
759       rcp->src[0] = ir2_src(reg_idx, IR2_SWIZZLE_Z, IR2_SRC_REG);
760       rcp->src[0].abs = true;
761 
762       coord_xy = instr_create_alu_reg(ctx, nir_op_ffma, 3, instr);
763       coord_xy->src[0] = ir2_src(reg_idx, 0, IR2_SRC_REG);
764       coord_xy->src[1] = ir2_src(rcp->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
765       coord_xy->src[2] = load_const(ctx, (float[]){1.5f}, 1);
766 
767       src_coord = ir2_src(reg_idx, 0, IR2_SRC_REG);
768       /* TODO: lod/bias transformed by src_coord.z ? */
769    }
770 
771    instr = ir2_instr_create_fetch(ctx, &tex->def, TEX_FETCH);
772    instr->src[0] = src_coord;
773    instr->src[0].swizzle = is_cube ? IR2_SWIZZLE_YXW : 0;
774    instr->fetch.tex.is_cube = is_cube;
775    instr->fetch.tex.is_rect = is_rect;
776    instr->fetch.tex.samp_id = tex->sampler_index;
777 
778    /* for lod/bias, we insert an extra src for the backend to deal with */
779    if (lod_bias) {
780       instr->src[1] = make_src_noconst(ctx, *lod_bias);
781       /* backend will use 2-3 components so apply swizzle */
782       swiz_merge_p(&instr->src[1].swizzle, IR2_SWIZZLE_XXXX);
783       instr->src_count = 2;
784    }
785 }
786 
787 static void
setup_input(struct ir2_context * ctx,nir_variable * in)788 setup_input(struct ir2_context *ctx, nir_variable *in)
789 {
790    struct fd2_shader_stateobj *so = ctx->so;
791    unsigned n = in->data.driver_location;
792    unsigned slot = in->data.location;
793 
794    assert(glsl_type_is_vector_or_scalar(in->type) ||
795           glsl_type_is_unsized_array(in->type));
796 
797    /* handle later */
798    if (ctx->so->type == MESA_SHADER_VERTEX)
799       return;
800 
801    if (ctx->so->type != MESA_SHADER_FRAGMENT)
802       compile_error(ctx, "unknown shader type: %d\n", ctx->so->type);
803 
804    n = ctx->f->inputs_count++;
805 
806    /* half of fragcoord from param reg, half from a varying */
807    if (slot == VARYING_SLOT_POS) {
808       ctx->f->fragcoord = n;
809       so->need_param = true;
810    }
811 
812    ctx->f->inputs[n].slot = slot;
813    ctx->f->inputs[n].ncomp = glsl_get_components(in->type);
814 
815    /* in->data.interpolation?
816     * opengl ES 2.0 can't do flat mode, but we still get it from GALLIUM_HUD
817     */
818 }
819 
820 static void
emit_undef(struct ir2_context * ctx,nir_undef_instr * undef)821 emit_undef(struct ir2_context *ctx, nir_undef_instr *undef)
822 {
823    /* TODO we don't want to emit anything for undefs */
824 
825    struct ir2_instr *instr;
826 
827    instr = instr_create_alu_dest(ctx, nir_op_mov, &undef->def);
828    instr->src[0] = ir2_src(0, 0, IR2_SRC_CONST);
829 }
830 
831 static void
emit_instr(struct ir2_context * ctx,nir_instr * instr)832 emit_instr(struct ir2_context *ctx, nir_instr *instr)
833 {
834    switch (instr->type) {
835    case nir_instr_type_alu:
836       emit_alu(ctx, nir_instr_as_alu(instr));
837       break;
838    case nir_instr_type_deref:
839       /* ignored, handled as part of the intrinsic they are src to */
840       break;
841    case nir_instr_type_intrinsic:
842       emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
843       break;
844    case nir_instr_type_load_const:
845       /* dealt with when using nir_src */
846       break;
847    case nir_instr_type_tex:
848       emit_tex(ctx, nir_instr_as_tex(instr));
849       break;
850    case nir_instr_type_jump:
851       ctx->block_has_jump[ctx->block_idx] = true;
852       break;
853    case nir_instr_type_undef:
854       emit_undef(ctx, nir_instr_as_undef(instr));
855       break;
856    default:
857       break;
858    }
859 }
860 
861 /* fragcoord.zw and a20x hw binning outputs */
862 static void
extra_position_exports(struct ir2_context * ctx,bool binning)863 extra_position_exports(struct ir2_context *ctx, bool binning)
864 {
865    struct ir2_instr *instr, *rcp, *sc, *wincoord, *off;
866 
867    if (ctx->f->fragcoord < 0 && !binning)
868       return;
869 
870    instr = instr_create_alu(ctx, nir_op_fmax, 1);
871    instr->src[0] = ctx->position;
872    instr->src[0].swizzle = IR2_SWIZZLE_W;
873    instr->src[1] = ir2_zero(ctx);
874 
875    rcp = instr_create_alu(ctx, nir_op_frcp, 1);
876    rcp->src[0] = ir2_src(instr->idx, 0, IR2_SRC_SSA);
877 
878    sc = instr_create_alu(ctx, nir_op_fmul, 4);
879    sc->src[0] = ctx->position;
880    sc->src[1] = ir2_src(rcp->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
881 
882    wincoord = instr_create_alu(ctx, nir_op_ffma, 4);
883    wincoord->src[0] = ir2_src(66, 0, IR2_SRC_CONST);
884    wincoord->src[1] = ir2_src(sc->idx, 0, IR2_SRC_SSA);
885    wincoord->src[2] = ir2_src(65, 0, IR2_SRC_CONST);
886 
887    /* fragcoord z/w */
888    if (ctx->f->fragcoord >= 0 && !binning) {
889       instr = instr_create_alu(ctx, nir_op_mov, 1);
890       instr->src[0] = ir2_src(wincoord->idx, IR2_SWIZZLE_Z, IR2_SRC_SSA);
891       instr->alu.export = ctx->f->fragcoord;
892 
893       instr = instr_create_alu(ctx, nir_op_mov, 1);
894       instr->src[0] = ctx->position;
895       instr->src[0].swizzle = IR2_SWIZZLE_W;
896       instr->alu.export = ctx->f->fragcoord;
897       instr->alu.write_mask = 2;
898    }
899 
900    if (!binning)
901       return;
902 
903    off = instr_create_alu(ctx, nir_op_fadd, 1);
904    off->src[0] = ir2_src(64, 0, IR2_SRC_CONST);
905    off->src[1] = ir2_src(2, 0, IR2_SRC_INPUT);
906 
907    /* 8 max set in freedreno_screen.. unneeded instrs patched out */
908    for (int i = 0; i < 8; i++) {
909       instr = instr_create_alu(ctx, nir_op_ffma, 4);
910       instr->src[0] = ir2_src(1, IR2_SWIZZLE_WYWW, IR2_SRC_CONST);
911       instr->src[1] = ir2_src(off->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
912       instr->src[2] = ir2_src(3 + i, 0, IR2_SRC_CONST);
913       instr->alu.export = 32;
914 
915       instr = instr_create_alu(ctx, nir_op_ffma, 4);
916       instr->src[0] = ir2_src(68 + i * 2, 0, IR2_SRC_CONST);
917       instr->src[1] = ir2_src(wincoord->idx, 0, IR2_SRC_SSA);
918       instr->src[2] = ir2_src(67 + i * 2, 0, IR2_SRC_CONST);
919       instr->alu.export = 33;
920    }
921 }
922 
923 static bool emit_cf_list(struct ir2_context *ctx, struct exec_list *list);
924 
925 static bool
emit_block(struct ir2_context * ctx,nir_block * block)926 emit_block(struct ir2_context *ctx, nir_block *block)
927 {
928    struct ir2_instr *instr;
929    nir_block *succs = block->successors[0];
930 
931    ctx->block_idx = block->index;
932 
933    nir_foreach_instr (instr, block)
934       emit_instr(ctx, instr);
935 
936    if (!succs || !succs->index)
937       return false;
938 
939    /* we want to be smart and always jump and have the backend cleanup
940     * but we are not, so there are two cases where jump is needed:
941     *  loops (succs index lower)
942     *  jumps (jump instruction seen in block)
943     */
944    if (succs->index > block->index && !ctx->block_has_jump[block->index])
945       return false;
946 
947    assert(block->successors[1] == NULL);
948 
949    instr = ir2_instr_create(ctx, IR2_CF);
950    instr->cf.block_idx = succs->index;
951    /* XXX can't jump to a block with different predicate */
952    return true;
953 }
954 
955 static void
emit_if(struct ir2_context * ctx,nir_if * nif)956 emit_if(struct ir2_context *ctx, nir_if *nif)
957 {
958    unsigned pred = ctx->pred, pred_idx = ctx->pred_idx;
959    struct ir2_instr *instr;
960 
961    /* XXX: blob seems to always use same register for condition */
962 
963    instr = ir2_instr_create(ctx, IR2_ALU);
964    instr->src[0] = make_src(ctx, nif->condition);
965    instr->src_count = 1;
966    instr->ssa.ncomp = 1;
967    instr->alu.vector_opc = VECTOR_NONE;
968    instr->alu.scalar_opc = SCALAR_NONE;
969    instr->alu.export = -1;
970    instr->alu.write_mask = 1;
971    instr->pred = 0;
972 
973    /* if nested, use PRED_SETNE_PUSHv */
974    if (pred) {
975       instr->alu.vector_opc = PRED_SETNE_PUSHv;
976       instr->src[1] = instr->src[0];
977       instr->src[0] = ir2_src(pred_idx, 0, IR2_SRC_SSA);
978       instr->src[0].swizzle = IR2_SWIZZLE_XXXX;
979       instr->src[1].swizzle = IR2_SWIZZLE_XXXX;
980       instr->src_count = 2;
981    } else {
982       instr->alu.scalar_opc = PRED_SETNEs;
983    }
984 
985    ctx->pred_idx = instr->idx;
986    ctx->pred = 3;
987 
988    emit_cf_list(ctx, &nif->then_list);
989 
990    /* TODO: if these is no else branch we don't need this
991     * and if the else branch is simple, can just flip ctx->pred instead
992     */
993    instr = ir2_instr_create(ctx, IR2_ALU);
994    instr->src[0] = ir2_src(ctx->pred_idx, 0, IR2_SRC_SSA);
995    instr->src_count = 1;
996    instr->ssa.ncomp = 1;
997    instr->alu.vector_opc = VECTOR_NONE;
998    instr->alu.scalar_opc = PRED_SET_INVs;
999    instr->alu.export = -1;
1000    instr->alu.write_mask = 1;
1001    instr->pred = 0;
1002    ctx->pred_idx = instr->idx;
1003 
1004    emit_cf_list(ctx, &nif->else_list);
1005 
1006    /* restore predicate for nested predicates */
1007    if (pred) {
1008       instr = ir2_instr_create(ctx, IR2_ALU);
1009       instr->src[0] = ir2_src(ctx->pred_idx, 0, IR2_SRC_SSA);
1010       instr->src_count = 1;
1011       instr->ssa.ncomp = 1;
1012       instr->alu.vector_opc = VECTOR_NONE;
1013       instr->alu.scalar_opc = PRED_SET_POPs;
1014       instr->alu.export = -1;
1015       instr->alu.write_mask = 1;
1016       instr->pred = 0;
1017       ctx->pred_idx = instr->idx;
1018    }
1019 
1020    /* restore ctx->pred */
1021    ctx->pred = pred;
1022 }
1023 
1024 /* get the highest block idx in the loop, so we know when
1025  * we can free registers that are allocated outside the loop
1026  */
1027 static unsigned
loop_last_block(struct exec_list * list)1028 loop_last_block(struct exec_list *list)
1029 {
1030    nir_cf_node *node =
1031       exec_node_data(nir_cf_node, exec_list_get_tail(list), node);
1032    switch (node->type) {
1033    case nir_cf_node_block:
1034       return nir_cf_node_as_block(node)->index;
1035    case nir_cf_node_if:
1036       assert(0); /* XXX could this ever happen? */
1037       return 0;
1038    case nir_cf_node_loop:
1039       return loop_last_block(&nir_cf_node_as_loop(node)->body);
1040    default:
1041       compile_error(ctx, "Not supported\n");
1042       return 0;
1043    }
1044 }
1045 
1046 static void
emit_loop(struct ir2_context * ctx,nir_loop * nloop)1047 emit_loop(struct ir2_context *ctx, nir_loop *nloop)
1048 {
1049    assert(!nir_loop_has_continue_construct(nloop));
1050    ctx->loop_last_block[++ctx->loop_depth] = loop_last_block(&nloop->body);
1051    emit_cf_list(ctx, &nloop->body);
1052    ctx->loop_depth--;
1053 }
1054 
1055 static bool
emit_cf_list(struct ir2_context * ctx,struct exec_list * list)1056 emit_cf_list(struct ir2_context *ctx, struct exec_list *list)
1057 {
1058    bool ret = false;
1059    foreach_list_typed (nir_cf_node, node, node, list) {
1060       ret = false;
1061       switch (node->type) {
1062       case nir_cf_node_block:
1063          ret = emit_block(ctx, nir_cf_node_as_block(node));
1064          break;
1065       case nir_cf_node_if:
1066          emit_if(ctx, nir_cf_node_as_if(node));
1067          break;
1068       case nir_cf_node_loop:
1069          emit_loop(ctx, nir_cf_node_as_loop(node));
1070          break;
1071       case nir_cf_node_function:
1072          compile_error(ctx, "Not supported\n");
1073          break;
1074       }
1075    }
1076    return ret;
1077 }
1078 
1079 static void
cleanup_binning(struct ir2_context * ctx)1080 cleanup_binning(struct ir2_context *ctx)
1081 {
1082    assert(ctx->so->type == MESA_SHADER_VERTEX);
1083 
1084    /* kill non-position outputs for binning variant */
1085    nir_foreach_block (block, nir_shader_get_entrypoint(ctx->nir)) {
1086       nir_foreach_instr_safe (instr, block) {
1087          if (instr->type != nir_instr_type_intrinsic)
1088             continue;
1089 
1090          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1091          if (intr->intrinsic != nir_intrinsic_store_output)
1092             continue;
1093 
1094          if (output_slot(ctx, intr) != VARYING_SLOT_POS)
1095             nir_instr_remove(instr);
1096       }
1097    }
1098 
1099    ir2_optimize_nir(ctx->nir, false);
1100 }
1101 
1102 static bool
ir2_alu_to_scalar_filter_cb(const nir_instr * instr,const void * data)1103 ir2_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data)
1104 {
1105    if (instr->type != nir_instr_type_alu)
1106       return false;
1107 
1108    nir_alu_instr *alu = nir_instr_as_alu(instr);
1109    switch (alu->op) {
1110    case nir_op_frsq:
1111    case nir_op_frcp:
1112    case nir_op_flog2:
1113    case nir_op_fexp2:
1114    case nir_op_fsqrt:
1115    case nir_op_fcos:
1116    case nir_op_fsin:
1117       return true;
1118    default:
1119       break;
1120    }
1121 
1122    return false;
1123 }
1124 
1125 void
ir2_nir_compile(struct ir2_context * ctx,bool binning)1126 ir2_nir_compile(struct ir2_context *ctx, bool binning)
1127 {
1128    struct fd2_shader_stateobj *so = ctx->so;
1129 
1130    memset(ctx->ssa_map, 0xff, sizeof(ctx->ssa_map));
1131 
1132    ctx->nir = nir_shader_clone(NULL, so->nir);
1133 
1134    if (binning)
1135       cleanup_binning(ctx);
1136 
1137    OPT_V(ctx->nir, nir_copy_prop);
1138    OPT_V(ctx->nir, nir_opt_dce);
1139    OPT_V(ctx->nir, nir_opt_move, nir_move_comparisons);
1140 
1141    OPT_V(ctx->nir, nir_lower_int_to_float);
1142    OPT_V(ctx->nir, nir_lower_bool_to_float, true);
1143    while (OPT(ctx->nir, nir_opt_algebraic))
1144       ;
1145    OPT_V(ctx->nir, nir_opt_algebraic_late);
1146    OPT_V(ctx->nir, nir_lower_alu_to_scalar, ir2_alu_to_scalar_filter_cb, NULL);
1147 
1148    OPT_V(ctx->nir, nir_convert_from_ssa, true);
1149 
1150    OPT_V(ctx->nir, nir_move_vec_src_uses_to_dest, false);
1151    OPT_V(ctx->nir, nir_lower_vec_to_regs, NULL, NULL);
1152 
1153    OPT_V(ctx->nir, nir_legacy_trivialize, true);
1154 
1155    OPT_V(ctx->nir, nir_opt_dce);
1156 
1157    nir_sweep(ctx->nir);
1158 
1159    if (FD_DBG(DISASM)) {
1160       debug_printf("----------------------\n");
1161       nir_print_shader(ctx->nir, stdout);
1162       debug_printf("----------------------\n");
1163    }
1164 
1165    /* fd2_shader_stateobj init */
1166    if (so->type == MESA_SHADER_FRAGMENT) {
1167       ctx->f->fragcoord = -1;
1168       ctx->f->inputs_count = 0;
1169       memset(ctx->f->inputs, 0, sizeof(ctx->f->inputs));
1170    }
1171 
1172    /* Setup inputs: */
1173    nir_foreach_shader_in_variable (in, ctx->nir)
1174       setup_input(ctx, in);
1175 
1176    if (so->type == MESA_SHADER_FRAGMENT) {
1177       unsigned idx;
1178       for (idx = 0; idx < ctx->f->inputs_count; idx++) {
1179          ctx->input[idx].ncomp = ctx->f->inputs[idx].ncomp;
1180          update_range(ctx, &ctx->input[idx]);
1181       }
1182       /* assume we have param input and kill it later if not */
1183       ctx->input[idx].ncomp = 4;
1184       update_range(ctx, &ctx->input[idx]);
1185    } else {
1186       ctx->input[0].ncomp = 1;
1187       ctx->input[2].ncomp = 1;
1188       update_range(ctx, &ctx->input[0]);
1189       update_range(ctx, &ctx->input[2]);
1190    }
1191 
1192    /* And emit the body: */
1193    nir_function_impl *fxn = nir_shader_get_entrypoint(ctx->nir);
1194 
1195    nir_foreach_reg_decl (decl, fxn) {
1196       assert(decl->def.index < ARRAY_SIZE(ctx->reg));
1197       ctx->reg[decl->def.index].ncomp = nir_intrinsic_num_components(decl);
1198       ctx->reg_count = MAX2(ctx->reg_count, decl->def.index + 1);
1199    }
1200 
1201    nir_metadata_require(fxn, nir_metadata_block_index);
1202    emit_cf_list(ctx, &fxn->body);
1203    /* TODO emit_block(ctx, fxn->end_block); */
1204 
1205    if (so->type == MESA_SHADER_VERTEX)
1206       extra_position_exports(ctx, binning);
1207 
1208    ralloc_free(ctx->nir);
1209 
1210    /* kill unused param input */
1211    if (so->type == MESA_SHADER_FRAGMENT && !so->need_param)
1212       ctx->input[ctx->f->inputs_count].initialized = false;
1213 }
1214