1 /*
2 * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Jonathan Marek <jonathan@marek.ca>
25 */
26
27 #include "ir2_private.h"
28
29 #include "fd2_program.h"
30 #include "freedreno_util.h"
31 #include "nir_legacy.h"
32
33 static const nir_shader_compiler_options options = {
34 .lower_fpow = true,
35 .lower_flrp32 = true,
36 .lower_fmod = true,
37 .lower_fdiv = true,
38 .lower_fceil = true,
39 .fuse_ffma16 = true,
40 .fuse_ffma32 = true,
41 .fuse_ffma64 = true,
42 /* .fdot_replicates = true, it is replicated, but it makes things worse */
43 .lower_all_io_to_temps = true,
44 .vertex_id_zero_based = true, /* its not implemented anyway */
45 .lower_bitops = true,
46 .lower_vector_cmp = true,
47 .lower_fdph = true,
48 .has_fsub = true,
49 .has_isub = true,
50 .lower_insert_byte = true,
51 .lower_insert_word = true,
52 .force_indirect_unrolling = nir_var_all,
53 .force_indirect_unrolling_sampler = true,
54 .max_unroll_iterations = 32,
55 };
56
57 const nir_shader_compiler_options *
ir2_get_compiler_options(void)58 ir2_get_compiler_options(void)
59 {
60 return &options;
61 }
62
63 #define OPT(nir, pass, ...) \
64 ({ \
65 bool this_progress = false; \
66 NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \
67 this_progress; \
68 })
69 #define OPT_V(nir, pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
70
71 static void
ir2_optimize_loop(nir_shader * s)72 ir2_optimize_loop(nir_shader *s)
73 {
74 bool progress;
75 do {
76 progress = false;
77
78 OPT_V(s, nir_lower_vars_to_ssa);
79 progress |= OPT(s, nir_opt_copy_prop_vars);
80 progress |= OPT(s, nir_copy_prop);
81 progress |= OPT(s, nir_opt_dce);
82 progress |= OPT(s, nir_opt_cse);
83 /* progress |= OPT(s, nir_opt_gcm, true); */
84 progress |= OPT(s, nir_opt_peephole_select, UINT_MAX, true, true);
85 progress |= OPT(s, nir_opt_intrinsics);
86 progress |= OPT(s, nir_opt_algebraic);
87 progress |= OPT(s, nir_opt_constant_folding);
88 progress |= OPT(s, nir_opt_dead_cf);
89 if (OPT(s, nir_opt_loop)) {
90 progress |= true;
91 /* If nir_opt_loop makes progress, then we need to clean
92 * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
93 * to make progress.
94 */
95 OPT(s, nir_copy_prop);
96 OPT(s, nir_opt_dce);
97 }
98 progress |= OPT(s, nir_opt_loop_unroll);
99 progress |= OPT(s, nir_opt_if, nir_opt_if_optimize_phi_true_false);
100 progress |= OPT(s, nir_opt_remove_phis);
101 progress |= OPT(s, nir_opt_undef);
102
103 } while (progress);
104 }
105
106 /* trig workarounds is the same as ir3.. but we don't want to include ir3 */
107 bool ir3_nir_apply_trig_workarounds(nir_shader *shader);
108
109 int
ir2_optimize_nir(nir_shader * s,bool lower)110 ir2_optimize_nir(nir_shader *s, bool lower)
111 {
112 struct nir_lower_tex_options tex_options = {
113 .lower_txp = ~0u,
114 .lower_rect = 0,
115 .lower_invalid_implicit_lod = true,
116 };
117
118 if (FD_DBG(DISASM)) {
119 debug_printf("----------------------\n");
120 nir_print_shader(s, stdout);
121 debug_printf("----------------------\n");
122 }
123
124 OPT_V(s, nir_lower_vars_to_ssa);
125 OPT_V(s, nir_lower_indirect_derefs, nir_var_shader_in | nir_var_shader_out,
126 UINT32_MAX);
127
128 if (lower) {
129 OPT_V(s, ir3_nir_apply_trig_workarounds);
130 OPT_V(s, nir_lower_tex, &tex_options);
131 }
132
133 ir2_optimize_loop(s);
134
135 OPT_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
136 OPT_V(s, nir_opt_sink, nir_move_const_undef);
137
138 /* TODO we dont want to get shaders writing to depth for depth textures */
139 if (s->info.stage == MESA_SHADER_FRAGMENT) {
140 nir_foreach_shader_out_variable (var, s) {
141 if (var->data.location == FRAG_RESULT_DEPTH)
142 return -1;
143 }
144 }
145
146 return 0;
147 }
148
149 static struct ir2_src
load_const(struct ir2_context * ctx,float * value_f,unsigned ncomp)150 load_const(struct ir2_context *ctx, float *value_f, unsigned ncomp)
151 {
152 struct fd2_shader_stateobj *so = ctx->so;
153 unsigned imm_ncomp, swiz, idx, i, j;
154 uint32_t *value = (uint32_t *)value_f;
155
156 /* try to merge with existing immediate (TODO: try with neg) */
157 for (idx = 0; idx < so->num_immediates; idx++) {
158 swiz = 0;
159 imm_ncomp = so->immediates[idx].ncomp;
160 for (i = 0; i < ncomp; i++) {
161 for (j = 0; j < imm_ncomp; j++) {
162 if (value[i] == so->immediates[idx].val[j])
163 break;
164 }
165 if (j == imm_ncomp) {
166 if (j == 4)
167 break;
168 so->immediates[idx].val[imm_ncomp++] = value[i];
169 }
170 swiz |= swiz_set(j, i);
171 }
172 /* matched all components */
173 if (i == ncomp)
174 break;
175 }
176
177 /* need to allocate new immediate */
178 if (idx == so->num_immediates) {
179 swiz = 0;
180 imm_ncomp = 0;
181 for (i = 0; i < ncomp; i++) {
182 for (j = 0; j < imm_ncomp; j++) {
183 if (value[i] == ctx->so->immediates[idx].val[j])
184 break;
185 }
186 if (j == imm_ncomp) {
187 so->immediates[idx].val[imm_ncomp++] = value[i];
188 }
189 swiz |= swiz_set(j, i);
190 }
191 so->num_immediates++;
192 }
193 so->immediates[idx].ncomp = imm_ncomp;
194
195 if (ncomp == 1)
196 swiz = swiz_merge(swiz, IR2_SWIZZLE_XXXX);
197
198 return ir2_src(so->first_immediate + idx, swiz, IR2_SRC_CONST);
199 }
200
201 struct ir2_src
ir2_zero(struct ir2_context * ctx)202 ir2_zero(struct ir2_context *ctx)
203 {
204 return load_const(ctx, (float[]){0.0f}, 1);
205 }
206
207 static void
update_range(struct ir2_context * ctx,struct ir2_reg * reg)208 update_range(struct ir2_context *ctx, struct ir2_reg *reg)
209 {
210 if (!reg->initialized) {
211 reg->initialized = true;
212 reg->loop_depth = ctx->loop_depth;
213 }
214
215 if (ctx->loop_depth > reg->loop_depth) {
216 reg->block_idx_free = ctx->loop_last_block[reg->loop_depth + 1];
217 } else {
218 reg->loop_depth = ctx->loop_depth;
219 reg->block_idx_free = -1;
220 }
221
222 /* for regs we want to free at the end of the loop in any case
223 * XXX dont do this for ssa
224 */
225 if (reg->loop_depth)
226 reg->block_idx_free = ctx->loop_last_block[reg->loop_depth];
227 }
228
229 static struct ir2_src
make_legacy_src(struct ir2_context * ctx,nir_legacy_src src)230 make_legacy_src(struct ir2_context *ctx, nir_legacy_src src)
231 {
232 struct ir2_src res = {};
233 struct ir2_reg *reg;
234
235 /* Handle constants specially */
236 if (src.is_ssa) {
237 nir_const_value *const_value =
238 nir_src_as_const_value(nir_src_for_ssa(src.ssa));
239
240 if (const_value) {
241 float c[src.ssa->num_components];
242 nir_const_value_to_array(c, const_value, src.ssa->num_components, f32);
243 return load_const(ctx, c, src.ssa->num_components);
244 }
245 }
246
247 /* Otherwise translate the SSA def or register */
248 if (!src.is_ssa) {
249 res.num = src.reg.handle->index;
250 res.type = IR2_SRC_REG;
251 reg = &ctx->reg[res.num];
252 } else {
253 assert(ctx->ssa_map[src.ssa->index] >= 0);
254 res.num = ctx->ssa_map[src.ssa->index];
255 res.type = IR2_SRC_SSA;
256 reg = &ctx->instr[res.num].ssa;
257 }
258
259 update_range(ctx, reg);
260 return res;
261 }
262
263 static struct ir2_src
make_src(struct ir2_context * ctx,nir_src src)264 make_src(struct ir2_context *ctx, nir_src src)
265 {
266 return make_legacy_src(ctx, nir_legacy_chase_src(&src));
267 }
268
269 static void
set_legacy_index(struct ir2_context * ctx,nir_legacy_dest dst,struct ir2_instr * instr)270 set_legacy_index(struct ir2_context *ctx, nir_legacy_dest dst,
271 struct ir2_instr *instr)
272 {
273 struct ir2_reg *reg = &instr->ssa;
274
275 if (dst.is_ssa) {
276 ctx->ssa_map[dst.ssa->index] = instr->idx;
277 } else {
278 reg = &ctx->reg[dst.reg.handle->index];
279
280 instr->is_ssa = false;
281 instr->reg = reg;
282 }
283 update_range(ctx, reg);
284 }
285
286 static void
set_index(struct ir2_context * ctx,nir_def * def,struct ir2_instr * instr)287 set_index(struct ir2_context *ctx, nir_def *def, struct ir2_instr *instr)
288 {
289 set_legacy_index(ctx, nir_legacy_chase_dest(def), instr);
290 }
291
292 static struct ir2_instr *
ir2_instr_create(struct ir2_context * ctx,int type)293 ir2_instr_create(struct ir2_context *ctx, int type)
294 {
295 struct ir2_instr *instr;
296
297 instr = &ctx->instr[ctx->instr_count++];
298 instr->idx = ctx->instr_count - 1;
299 instr->type = type;
300 instr->block_idx = ctx->block_idx;
301 instr->pred = ctx->pred;
302 instr->is_ssa = true;
303 return instr;
304 }
305
306 static struct ir2_instr *
instr_create_alu(struct ir2_context * ctx,nir_op opcode,unsigned ncomp)307 instr_create_alu(struct ir2_context *ctx, nir_op opcode, unsigned ncomp)
308 {
309 /* emit_alu will fixup instrs that don't map directly */
310 static const struct ir2_opc {
311 int8_t scalar, vector;
312 } nir_ir2_opc[nir_num_opcodes + 1] = {
313 [0 ... nir_num_opcodes - 1] = {-1, -1},
314
315 [nir_op_mov] = {MAXs, MAXv},
316 [nir_op_fneg] = {MAXs, MAXv},
317 [nir_op_fabs] = {MAXs, MAXv},
318 [nir_op_fsat] = {MAXs, MAXv},
319 [nir_op_fsign] = {-1, CNDGTEv},
320 [nir_op_fadd] = {ADDs, ADDv},
321 [nir_op_fsub] = {ADDs, ADDv},
322 [nir_op_fmul] = {MULs, MULv},
323 [nir_op_ffma] = {-1, MULADDv},
324 [nir_op_fmax] = {MAXs, MAXv},
325 [nir_op_fmin] = {MINs, MINv},
326 [nir_op_ffloor] = {FLOORs, FLOORv},
327 [nir_op_ffract] = {FRACs, FRACv},
328 [nir_op_ftrunc] = {TRUNCs, TRUNCv},
329 [nir_op_fdot2] = {-1, DOT2ADDv},
330 [nir_op_fdot3] = {-1, DOT3v},
331 [nir_op_fdot4] = {-1, DOT4v},
332 [nir_op_sge] = {-1, SETGTEv},
333 [nir_op_slt] = {-1, SETGTv},
334 [nir_op_sne] = {-1, SETNEv},
335 [nir_op_seq] = {-1, SETEv},
336 [nir_op_fcsel] = {-1, CNDEv},
337 [nir_op_frsq] = {RECIPSQ_IEEE, -1},
338 [nir_op_frcp] = {RECIP_IEEE, -1},
339 [nir_op_flog2] = {LOG_IEEE, -1},
340 [nir_op_fexp2] = {EXP_IEEE, -1},
341 [nir_op_fsqrt] = {SQRT_IEEE, -1},
342 [nir_op_fcos] = {COS, -1},
343 [nir_op_fsin] = {SIN, -1},
344 /* no fsat, fneg, fabs since source mods deal with those */
345
346 /* so we can use this function with non-nir op */
347 #define ir2_op_cube nir_num_opcodes
348 [ir2_op_cube] = {-1, CUBEv},
349 };
350
351 struct ir2_opc op = nir_ir2_opc[opcode];
352 assert(op.vector >= 0 || op.scalar >= 0);
353
354 struct ir2_instr *instr = ir2_instr_create(ctx, IR2_ALU);
355 instr->alu.vector_opc = op.vector;
356 instr->alu.scalar_opc = op.scalar;
357 instr->alu.export = -1;
358 instr->alu.write_mask = (1 << ncomp) - 1;
359 instr->src_count =
360 opcode == ir2_op_cube ? 2 : nir_op_infos[opcode].num_inputs;
361 instr->ssa.ncomp = ncomp;
362 return instr;
363 }
364
365 static struct ir2_instr *
instr_create_alu_reg(struct ir2_context * ctx,nir_op opcode,uint8_t write_mask,struct ir2_instr * share_reg)366 instr_create_alu_reg(struct ir2_context *ctx, nir_op opcode, uint8_t write_mask,
367 struct ir2_instr *share_reg)
368 {
369 struct ir2_instr *instr;
370 struct ir2_reg *reg;
371
372 reg = share_reg ? share_reg->reg : &ctx->reg[ctx->reg_count++];
373 reg->ncomp = MAX2(reg->ncomp, util_logbase2(write_mask) + 1);
374
375 instr = instr_create_alu(ctx, opcode, util_bitcount(write_mask));
376 instr->alu.write_mask = write_mask;
377 instr->reg = reg;
378 instr->is_ssa = false;
379 return instr;
380 }
381
382 static struct ir2_instr *
instr_create_alu_dest(struct ir2_context * ctx,nir_op opcode,nir_def * def)383 instr_create_alu_dest(struct ir2_context *ctx, nir_op opcode, nir_def *def)
384 {
385 struct ir2_instr *instr;
386 instr = instr_create_alu(ctx, opcode, def->num_components);
387 set_index(ctx, def, instr);
388 return instr;
389 }
390
391 static struct ir2_instr *
ir2_instr_create_fetch(struct ir2_context * ctx,nir_def * def,instr_fetch_opc_t opc)392 ir2_instr_create_fetch(struct ir2_context *ctx, nir_def *def,
393 instr_fetch_opc_t opc)
394 {
395 struct ir2_instr *instr = ir2_instr_create(ctx, IR2_FETCH);
396 instr->fetch.opc = opc;
397 instr->src_count = 1;
398 instr->ssa.ncomp = def->num_components;
399 set_index(ctx, def, instr);
400 return instr;
401 }
402
403 static struct ir2_src
make_src_noconst(struct ir2_context * ctx,nir_src src)404 make_src_noconst(struct ir2_context *ctx, nir_src src)
405 {
406 struct ir2_instr *instr;
407
408 if (nir_src_as_const_value(src)) {
409 instr = instr_create_alu(ctx, nir_op_mov, src.ssa->num_components);
410 instr->src[0] = make_src(ctx, src);
411 return ir2_src(instr->idx, 0, IR2_SRC_SSA);
412 }
413
414 return make_src(ctx, src);
415 }
416
417 static void
emit_alu(struct ir2_context * ctx,nir_alu_instr * alu)418 emit_alu(struct ir2_context *ctx, nir_alu_instr *alu)
419 {
420 const nir_op_info *info = &nir_op_infos[alu->op];
421 nir_def *def = &alu->def;
422 struct ir2_instr *instr;
423 struct ir2_src tmp;
424 unsigned ncomp;
425
426 /* Don't emit modifiers that are totally folded */
427 if (((alu->op == nir_op_fneg) || (alu->op == nir_op_fabs)) &&
428 nir_legacy_float_mod_folds(alu))
429 return;
430
431 if ((alu->op == nir_op_fsat) && nir_legacy_fsat_folds(alu))
432 return;
433
434 /* get the number of dst components */
435 ncomp = def->num_components;
436
437 instr = instr_create_alu(ctx, alu->op, ncomp);
438
439 nir_legacy_alu_dest legacy_dest =
440 nir_legacy_chase_alu_dest(&alu->def);
441 set_legacy_index(ctx, legacy_dest.dest, instr);
442 instr->alu.saturate = legacy_dest.fsat;
443 instr->alu.write_mask = legacy_dest.write_mask;
444
445 for (int i = 0; i < info->num_inputs; i++) {
446 nir_alu_src *src = &alu->src[i];
447
448 /* compress swizzle with writemask when applicable */
449 unsigned swiz = 0, j = 0;
450 for (int i = 0; i < 4; i++) {
451 if (!(legacy_dest.write_mask & 1 << i) && !info->output_size)
452 continue;
453 swiz |= swiz_set(src->swizzle[i], j++);
454 }
455
456 nir_legacy_alu_src legacy_src =
457 nir_legacy_chase_alu_src(src, true /* fuse_abs */);
458
459 instr->src[i] = make_legacy_src(ctx, legacy_src.src);
460 instr->src[i].swizzle = swiz_merge(instr->src[i].swizzle, swiz);
461 instr->src[i].negate = legacy_src.fneg;
462 instr->src[i].abs = legacy_src.fabs;
463 }
464
465 /* workarounds for NIR ops that don't map directly to a2xx ops */
466 switch (alu->op) {
467 case nir_op_fneg:
468 instr->src[0].negate = 1;
469 break;
470 case nir_op_fabs:
471 instr->src[0].abs = 1;
472 break;
473 case nir_op_fsat:
474 instr->alu.saturate = 1;
475 break;
476 case nir_op_slt:
477 tmp = instr->src[0];
478 instr->src[0] = instr->src[1];
479 instr->src[1] = tmp;
480 break;
481 case nir_op_fcsel:
482 tmp = instr->src[1];
483 instr->src[1] = instr->src[2];
484 instr->src[2] = tmp;
485 break;
486 case nir_op_fsub:
487 instr->src[1].negate = !instr->src[1].negate;
488 break;
489 case nir_op_fdot2:
490 instr->src_count = 3;
491 instr->src[2] = ir2_zero(ctx);
492 break;
493 case nir_op_fsign: {
494 /* we need an extra instruction to deal with the zero case */
495 struct ir2_instr *tmp;
496
497 /* tmp = x == 0 ? 0 : 1 */
498 tmp = instr_create_alu(ctx, nir_op_fcsel, ncomp);
499 tmp->src[0] = instr->src[0];
500 tmp->src[1] = ir2_zero(ctx);
501 tmp->src[2] = load_const(ctx, (float[]){1.0f}, 1);
502
503 /* result = x >= 0 ? tmp : -tmp */
504 instr->src[1] = ir2_src(tmp->idx, 0, IR2_SRC_SSA);
505 instr->src[2] = instr->src[1];
506 instr->src[2].negate = true;
507 instr->src_count = 3;
508 } break;
509 default:
510 break;
511 }
512 }
513
514 static void
load_input(struct ir2_context * ctx,nir_def * def,unsigned idx)515 load_input(struct ir2_context *ctx, nir_def *def, unsigned idx)
516 {
517 struct ir2_instr *instr;
518 int slot = -1;
519
520 if (ctx->so->type == MESA_SHADER_VERTEX) {
521 instr = ir2_instr_create_fetch(ctx, def, 0);
522 instr->src[0] = ir2_src(0, 0, IR2_SRC_INPUT);
523 instr->fetch.vtx.const_idx = 20 + (idx / 3);
524 instr->fetch.vtx.const_idx_sel = idx % 3;
525 return;
526 }
527
528 /* get slot from idx */
529 nir_foreach_shader_in_variable (var, ctx->nir) {
530 if (var->data.driver_location == idx) {
531 slot = var->data.location;
532 break;
533 }
534 }
535 assert(slot >= 0);
536
537 switch (slot) {
538 case VARYING_SLOT_POS:
539 /* need to extract xy with abs and add tile offset on a20x
540 * zw from fragcoord input (w inverted in fragment shader)
541 * TODO: only components that are required by fragment shader
542 */
543 instr = instr_create_alu_reg(
544 ctx, ctx->so->is_a20x ? nir_op_fadd : nir_op_mov, 3, NULL);
545 instr->src[0] = ir2_src(ctx->f->inputs_count, 0, IR2_SRC_INPUT);
546 instr->src[0].abs = true;
547 /* on a20x, C64 contains the tile offset */
548 instr->src[1] = ir2_src(64, 0, IR2_SRC_CONST);
549
550 instr = instr_create_alu_reg(ctx, nir_op_mov, 4, instr);
551 instr->src[0] = ir2_src(ctx->f->fragcoord, 0, IR2_SRC_INPUT);
552
553 instr = instr_create_alu_reg(ctx, nir_op_frcp, 8, instr);
554 instr->src[0] = ir2_src(ctx->f->fragcoord, IR2_SWIZZLE_Y, IR2_SRC_INPUT);
555
556 unsigned reg_idx = instr->reg - ctx->reg; /* XXX */
557 instr = instr_create_alu_dest(ctx, nir_op_mov, def);
558 instr->src[0] = ir2_src(reg_idx, 0, IR2_SRC_REG);
559 break;
560 default:
561 instr = instr_create_alu_dest(ctx, nir_op_mov, def);
562 instr->src[0] = ir2_src(idx, 0, IR2_SRC_INPUT);
563 break;
564 }
565 }
566
567 static unsigned
output_slot(struct ir2_context * ctx,nir_intrinsic_instr * intr)568 output_slot(struct ir2_context *ctx, nir_intrinsic_instr *intr)
569 {
570 int slot = -1;
571 unsigned idx = nir_intrinsic_base(intr);
572 nir_foreach_shader_out_variable (var, ctx->nir) {
573 if (var->data.driver_location == idx) {
574 slot = var->data.location;
575 break;
576 }
577 }
578 assert(slot != -1);
579 return slot;
580 }
581
582 static void
store_output(struct ir2_context * ctx,nir_src src,unsigned slot,unsigned ncomp)583 store_output(struct ir2_context *ctx, nir_src src, unsigned slot,
584 unsigned ncomp)
585 {
586 struct ir2_instr *instr;
587 unsigned idx = 0;
588
589 if (ctx->so->type == MESA_SHADER_VERTEX) {
590 switch (slot) {
591 case VARYING_SLOT_POS:
592 ctx->position = make_src(ctx, src);
593 idx = 62;
594 break;
595 case VARYING_SLOT_PSIZ:
596 ctx->so->writes_psize = true;
597 idx = 63;
598 break;
599 default:
600 /* find matching slot from fragment shader input */
601 for (idx = 0; idx < ctx->f->inputs_count; idx++)
602 if (ctx->f->inputs[idx].slot == slot)
603 break;
604 if (idx == ctx->f->inputs_count)
605 return;
606 }
607 } else if (slot != FRAG_RESULT_COLOR && slot != FRAG_RESULT_DATA0) {
608 /* only color output is implemented */
609 return;
610 }
611
612 instr = instr_create_alu(ctx, nir_op_mov, ncomp);
613 instr->src[0] = make_src(ctx, src);
614 instr->alu.export = idx;
615 }
616
617 static void
emit_intrinsic(struct ir2_context * ctx,nir_intrinsic_instr * intr)618 emit_intrinsic(struct ir2_context *ctx, nir_intrinsic_instr *intr)
619 {
620 struct ir2_instr *instr;
621 ASSERTED nir_const_value *const_offset;
622 unsigned idx;
623
624 switch (intr->intrinsic) {
625 case nir_intrinsic_decl_reg:
626 case nir_intrinsic_load_reg:
627 case nir_intrinsic_store_reg:
628 /* Nothing to do for these */
629 break;
630
631 case nir_intrinsic_load_input:
632 load_input(ctx, &intr->def, nir_intrinsic_base(intr));
633 break;
634 case nir_intrinsic_store_output:
635 store_output(ctx, intr->src[0], output_slot(ctx, intr),
636 intr->num_components);
637 break;
638 case nir_intrinsic_load_uniform:
639 const_offset = nir_src_as_const_value(intr->src[0]);
640 assert(const_offset); /* TODO can be false in ES2? */
641 idx = nir_intrinsic_base(intr);
642 idx += (uint32_t)const_offset[0].f32;
643 instr = instr_create_alu_dest(ctx, nir_op_mov, &intr->def);
644 instr->src[0] = ir2_src(idx, 0, IR2_SRC_CONST);
645 break;
646 case nir_intrinsic_discard:
647 case nir_intrinsic_discard_if:
648 instr = ir2_instr_create(ctx, IR2_ALU);
649 instr->alu.vector_opc = VECTOR_NONE;
650 if (intr->intrinsic == nir_intrinsic_discard_if) {
651 instr->alu.scalar_opc = KILLNEs;
652 instr->src[0] = make_src(ctx, intr->src[0]);
653 } else {
654 instr->alu.scalar_opc = KILLEs;
655 instr->src[0] = ir2_zero(ctx);
656 }
657 instr->alu.export = -1;
658 instr->src_count = 1;
659 ctx->so->has_kill = true;
660 break;
661 case nir_intrinsic_load_front_face:
662 /* gl_FrontFacing is in the sign of param.x
663 * rcp required because otherwise we can't differentiate -0.0 and +0.0
664 */
665 ctx->so->need_param = true;
666
667 struct ir2_instr *tmp = instr_create_alu(ctx, nir_op_frcp, 1);
668 tmp->src[0] = ir2_src(ctx->f->inputs_count, 0, IR2_SRC_INPUT);
669
670 instr = instr_create_alu_dest(ctx, nir_op_sge, &intr->def);
671 instr->src[0] = ir2_src(tmp->idx, 0, IR2_SRC_SSA);
672 instr->src[1] = ir2_zero(ctx);
673 break;
674 case nir_intrinsic_load_point_coord:
675 /* param.zw (note: abs might be needed like fragcoord in param.xy?) */
676 ctx->so->need_param = true;
677
678 instr = instr_create_alu_dest(ctx, nir_op_mov, &intr->def);
679 instr->src[0] =
680 ir2_src(ctx->f->inputs_count, IR2_SWIZZLE_ZW, IR2_SRC_INPUT);
681 break;
682 default:
683 compile_error(ctx, "unimplemented intr %d\n", intr->intrinsic);
684 break;
685 }
686 }
687
688 static void
emit_tex(struct ir2_context * ctx,nir_tex_instr * tex)689 emit_tex(struct ir2_context *ctx, nir_tex_instr *tex)
690 {
691 bool is_rect = false, is_cube = false;
692 struct ir2_instr *instr;
693 nir_src *coord, *lod_bias;
694
695 coord = lod_bias = NULL;
696
697 for (unsigned i = 0; i < tex->num_srcs; i++) {
698 switch (tex->src[i].src_type) {
699 case nir_tex_src_coord:
700 coord = &tex->src[i].src;
701 break;
702 case nir_tex_src_bias:
703 case nir_tex_src_lod:
704 assert(!lod_bias);
705 lod_bias = &tex->src[i].src;
706 break;
707 default:
708 compile_error(ctx, "Unhandled NIR tex src type: %d\n",
709 tex->src[i].src_type);
710 return;
711 }
712 }
713
714 switch (tex->op) {
715 case nir_texop_tex:
716 case nir_texop_txb:
717 case nir_texop_txl:
718 break;
719 default:
720 compile_error(ctx, "unimplemented texop %d\n", tex->op);
721 return;
722 }
723
724 switch (tex->sampler_dim) {
725 case GLSL_SAMPLER_DIM_2D:
726 case GLSL_SAMPLER_DIM_EXTERNAL:
727 break;
728 case GLSL_SAMPLER_DIM_RECT:
729 is_rect = true;
730 break;
731 case GLSL_SAMPLER_DIM_CUBE:
732 is_cube = true;
733 break;
734 default:
735 compile_error(ctx, "unimplemented sampler %d\n", tex->sampler_dim);
736 return;
737 }
738
739 struct ir2_src src_coord = make_src_noconst(ctx, *coord);
740
741 /* for cube maps
742 * tmp = cube(coord)
743 * tmp.xy = tmp.xy / |tmp.z| + 1.5
744 * coord = tmp.xyw
745 */
746 if (is_cube) {
747 struct ir2_instr *rcp, *coord_xy;
748 unsigned reg_idx;
749
750 instr = instr_create_alu_reg(ctx, ir2_op_cube, 15, NULL);
751 instr->src[0] = src_coord;
752 instr->src[0].swizzle = IR2_SWIZZLE_ZZXY;
753 instr->src[1] = src_coord;
754 instr->src[1].swizzle = IR2_SWIZZLE_YXZZ;
755
756 reg_idx = instr->reg - ctx->reg; /* hacky */
757
758 rcp = instr_create_alu(ctx, nir_op_frcp, 1);
759 rcp->src[0] = ir2_src(reg_idx, IR2_SWIZZLE_Z, IR2_SRC_REG);
760 rcp->src[0].abs = true;
761
762 coord_xy = instr_create_alu_reg(ctx, nir_op_ffma, 3, instr);
763 coord_xy->src[0] = ir2_src(reg_idx, 0, IR2_SRC_REG);
764 coord_xy->src[1] = ir2_src(rcp->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
765 coord_xy->src[2] = load_const(ctx, (float[]){1.5f}, 1);
766
767 src_coord = ir2_src(reg_idx, 0, IR2_SRC_REG);
768 /* TODO: lod/bias transformed by src_coord.z ? */
769 }
770
771 instr = ir2_instr_create_fetch(ctx, &tex->def, TEX_FETCH);
772 instr->src[0] = src_coord;
773 instr->src[0].swizzle = is_cube ? IR2_SWIZZLE_YXW : 0;
774 instr->fetch.tex.is_cube = is_cube;
775 instr->fetch.tex.is_rect = is_rect;
776 instr->fetch.tex.samp_id = tex->sampler_index;
777
778 /* for lod/bias, we insert an extra src for the backend to deal with */
779 if (lod_bias) {
780 instr->src[1] = make_src_noconst(ctx, *lod_bias);
781 /* backend will use 2-3 components so apply swizzle */
782 swiz_merge_p(&instr->src[1].swizzle, IR2_SWIZZLE_XXXX);
783 instr->src_count = 2;
784 }
785 }
786
787 static void
setup_input(struct ir2_context * ctx,nir_variable * in)788 setup_input(struct ir2_context *ctx, nir_variable *in)
789 {
790 struct fd2_shader_stateobj *so = ctx->so;
791 unsigned n = in->data.driver_location;
792 unsigned slot = in->data.location;
793
794 assert(glsl_type_is_vector_or_scalar(in->type) ||
795 glsl_type_is_unsized_array(in->type));
796
797 /* handle later */
798 if (ctx->so->type == MESA_SHADER_VERTEX)
799 return;
800
801 if (ctx->so->type != MESA_SHADER_FRAGMENT)
802 compile_error(ctx, "unknown shader type: %d\n", ctx->so->type);
803
804 n = ctx->f->inputs_count++;
805
806 /* half of fragcoord from param reg, half from a varying */
807 if (slot == VARYING_SLOT_POS) {
808 ctx->f->fragcoord = n;
809 so->need_param = true;
810 }
811
812 ctx->f->inputs[n].slot = slot;
813 ctx->f->inputs[n].ncomp = glsl_get_components(in->type);
814
815 /* in->data.interpolation?
816 * opengl ES 2.0 can't do flat mode, but we still get it from GALLIUM_HUD
817 */
818 }
819
820 static void
emit_undef(struct ir2_context * ctx,nir_undef_instr * undef)821 emit_undef(struct ir2_context *ctx, nir_undef_instr *undef)
822 {
823 /* TODO we don't want to emit anything for undefs */
824
825 struct ir2_instr *instr;
826
827 instr = instr_create_alu_dest(ctx, nir_op_mov, &undef->def);
828 instr->src[0] = ir2_src(0, 0, IR2_SRC_CONST);
829 }
830
831 static void
emit_instr(struct ir2_context * ctx,nir_instr * instr)832 emit_instr(struct ir2_context *ctx, nir_instr *instr)
833 {
834 switch (instr->type) {
835 case nir_instr_type_alu:
836 emit_alu(ctx, nir_instr_as_alu(instr));
837 break;
838 case nir_instr_type_deref:
839 /* ignored, handled as part of the intrinsic they are src to */
840 break;
841 case nir_instr_type_intrinsic:
842 emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
843 break;
844 case nir_instr_type_load_const:
845 /* dealt with when using nir_src */
846 break;
847 case nir_instr_type_tex:
848 emit_tex(ctx, nir_instr_as_tex(instr));
849 break;
850 case nir_instr_type_jump:
851 ctx->block_has_jump[ctx->block_idx] = true;
852 break;
853 case nir_instr_type_undef:
854 emit_undef(ctx, nir_instr_as_undef(instr));
855 break;
856 default:
857 break;
858 }
859 }
860
861 /* fragcoord.zw and a20x hw binning outputs */
862 static void
extra_position_exports(struct ir2_context * ctx,bool binning)863 extra_position_exports(struct ir2_context *ctx, bool binning)
864 {
865 struct ir2_instr *instr, *rcp, *sc, *wincoord, *off;
866
867 if (ctx->f->fragcoord < 0 && !binning)
868 return;
869
870 instr = instr_create_alu(ctx, nir_op_fmax, 1);
871 instr->src[0] = ctx->position;
872 instr->src[0].swizzle = IR2_SWIZZLE_W;
873 instr->src[1] = ir2_zero(ctx);
874
875 rcp = instr_create_alu(ctx, nir_op_frcp, 1);
876 rcp->src[0] = ir2_src(instr->idx, 0, IR2_SRC_SSA);
877
878 sc = instr_create_alu(ctx, nir_op_fmul, 4);
879 sc->src[0] = ctx->position;
880 sc->src[1] = ir2_src(rcp->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
881
882 wincoord = instr_create_alu(ctx, nir_op_ffma, 4);
883 wincoord->src[0] = ir2_src(66, 0, IR2_SRC_CONST);
884 wincoord->src[1] = ir2_src(sc->idx, 0, IR2_SRC_SSA);
885 wincoord->src[2] = ir2_src(65, 0, IR2_SRC_CONST);
886
887 /* fragcoord z/w */
888 if (ctx->f->fragcoord >= 0 && !binning) {
889 instr = instr_create_alu(ctx, nir_op_mov, 1);
890 instr->src[0] = ir2_src(wincoord->idx, IR2_SWIZZLE_Z, IR2_SRC_SSA);
891 instr->alu.export = ctx->f->fragcoord;
892
893 instr = instr_create_alu(ctx, nir_op_mov, 1);
894 instr->src[0] = ctx->position;
895 instr->src[0].swizzle = IR2_SWIZZLE_W;
896 instr->alu.export = ctx->f->fragcoord;
897 instr->alu.write_mask = 2;
898 }
899
900 if (!binning)
901 return;
902
903 off = instr_create_alu(ctx, nir_op_fadd, 1);
904 off->src[0] = ir2_src(64, 0, IR2_SRC_CONST);
905 off->src[1] = ir2_src(2, 0, IR2_SRC_INPUT);
906
907 /* 8 max set in freedreno_screen.. unneeded instrs patched out */
908 for (int i = 0; i < 8; i++) {
909 instr = instr_create_alu(ctx, nir_op_ffma, 4);
910 instr->src[0] = ir2_src(1, IR2_SWIZZLE_WYWW, IR2_SRC_CONST);
911 instr->src[1] = ir2_src(off->idx, IR2_SWIZZLE_XXXX, IR2_SRC_SSA);
912 instr->src[2] = ir2_src(3 + i, 0, IR2_SRC_CONST);
913 instr->alu.export = 32;
914
915 instr = instr_create_alu(ctx, nir_op_ffma, 4);
916 instr->src[0] = ir2_src(68 + i * 2, 0, IR2_SRC_CONST);
917 instr->src[1] = ir2_src(wincoord->idx, 0, IR2_SRC_SSA);
918 instr->src[2] = ir2_src(67 + i * 2, 0, IR2_SRC_CONST);
919 instr->alu.export = 33;
920 }
921 }
922
923 static bool emit_cf_list(struct ir2_context *ctx, struct exec_list *list);
924
925 static bool
emit_block(struct ir2_context * ctx,nir_block * block)926 emit_block(struct ir2_context *ctx, nir_block *block)
927 {
928 struct ir2_instr *instr;
929 nir_block *succs = block->successors[0];
930
931 ctx->block_idx = block->index;
932
933 nir_foreach_instr (instr, block)
934 emit_instr(ctx, instr);
935
936 if (!succs || !succs->index)
937 return false;
938
939 /* we want to be smart and always jump and have the backend cleanup
940 * but we are not, so there are two cases where jump is needed:
941 * loops (succs index lower)
942 * jumps (jump instruction seen in block)
943 */
944 if (succs->index > block->index && !ctx->block_has_jump[block->index])
945 return false;
946
947 assert(block->successors[1] == NULL);
948
949 instr = ir2_instr_create(ctx, IR2_CF);
950 instr->cf.block_idx = succs->index;
951 /* XXX can't jump to a block with different predicate */
952 return true;
953 }
954
955 static void
emit_if(struct ir2_context * ctx,nir_if * nif)956 emit_if(struct ir2_context *ctx, nir_if *nif)
957 {
958 unsigned pred = ctx->pred, pred_idx = ctx->pred_idx;
959 struct ir2_instr *instr;
960
961 /* XXX: blob seems to always use same register for condition */
962
963 instr = ir2_instr_create(ctx, IR2_ALU);
964 instr->src[0] = make_src(ctx, nif->condition);
965 instr->src_count = 1;
966 instr->ssa.ncomp = 1;
967 instr->alu.vector_opc = VECTOR_NONE;
968 instr->alu.scalar_opc = SCALAR_NONE;
969 instr->alu.export = -1;
970 instr->alu.write_mask = 1;
971 instr->pred = 0;
972
973 /* if nested, use PRED_SETNE_PUSHv */
974 if (pred) {
975 instr->alu.vector_opc = PRED_SETNE_PUSHv;
976 instr->src[1] = instr->src[0];
977 instr->src[0] = ir2_src(pred_idx, 0, IR2_SRC_SSA);
978 instr->src[0].swizzle = IR2_SWIZZLE_XXXX;
979 instr->src[1].swizzle = IR2_SWIZZLE_XXXX;
980 instr->src_count = 2;
981 } else {
982 instr->alu.scalar_opc = PRED_SETNEs;
983 }
984
985 ctx->pred_idx = instr->idx;
986 ctx->pred = 3;
987
988 emit_cf_list(ctx, &nif->then_list);
989
990 /* TODO: if these is no else branch we don't need this
991 * and if the else branch is simple, can just flip ctx->pred instead
992 */
993 instr = ir2_instr_create(ctx, IR2_ALU);
994 instr->src[0] = ir2_src(ctx->pred_idx, 0, IR2_SRC_SSA);
995 instr->src_count = 1;
996 instr->ssa.ncomp = 1;
997 instr->alu.vector_opc = VECTOR_NONE;
998 instr->alu.scalar_opc = PRED_SET_INVs;
999 instr->alu.export = -1;
1000 instr->alu.write_mask = 1;
1001 instr->pred = 0;
1002 ctx->pred_idx = instr->idx;
1003
1004 emit_cf_list(ctx, &nif->else_list);
1005
1006 /* restore predicate for nested predicates */
1007 if (pred) {
1008 instr = ir2_instr_create(ctx, IR2_ALU);
1009 instr->src[0] = ir2_src(ctx->pred_idx, 0, IR2_SRC_SSA);
1010 instr->src_count = 1;
1011 instr->ssa.ncomp = 1;
1012 instr->alu.vector_opc = VECTOR_NONE;
1013 instr->alu.scalar_opc = PRED_SET_POPs;
1014 instr->alu.export = -1;
1015 instr->alu.write_mask = 1;
1016 instr->pred = 0;
1017 ctx->pred_idx = instr->idx;
1018 }
1019
1020 /* restore ctx->pred */
1021 ctx->pred = pred;
1022 }
1023
1024 /* get the highest block idx in the loop, so we know when
1025 * we can free registers that are allocated outside the loop
1026 */
1027 static unsigned
loop_last_block(struct exec_list * list)1028 loop_last_block(struct exec_list *list)
1029 {
1030 nir_cf_node *node =
1031 exec_node_data(nir_cf_node, exec_list_get_tail(list), node);
1032 switch (node->type) {
1033 case nir_cf_node_block:
1034 return nir_cf_node_as_block(node)->index;
1035 case nir_cf_node_if:
1036 assert(0); /* XXX could this ever happen? */
1037 return 0;
1038 case nir_cf_node_loop:
1039 return loop_last_block(&nir_cf_node_as_loop(node)->body);
1040 default:
1041 compile_error(ctx, "Not supported\n");
1042 return 0;
1043 }
1044 }
1045
1046 static void
emit_loop(struct ir2_context * ctx,nir_loop * nloop)1047 emit_loop(struct ir2_context *ctx, nir_loop *nloop)
1048 {
1049 assert(!nir_loop_has_continue_construct(nloop));
1050 ctx->loop_last_block[++ctx->loop_depth] = loop_last_block(&nloop->body);
1051 emit_cf_list(ctx, &nloop->body);
1052 ctx->loop_depth--;
1053 }
1054
1055 static bool
emit_cf_list(struct ir2_context * ctx,struct exec_list * list)1056 emit_cf_list(struct ir2_context *ctx, struct exec_list *list)
1057 {
1058 bool ret = false;
1059 foreach_list_typed (nir_cf_node, node, node, list) {
1060 ret = false;
1061 switch (node->type) {
1062 case nir_cf_node_block:
1063 ret = emit_block(ctx, nir_cf_node_as_block(node));
1064 break;
1065 case nir_cf_node_if:
1066 emit_if(ctx, nir_cf_node_as_if(node));
1067 break;
1068 case nir_cf_node_loop:
1069 emit_loop(ctx, nir_cf_node_as_loop(node));
1070 break;
1071 case nir_cf_node_function:
1072 compile_error(ctx, "Not supported\n");
1073 break;
1074 }
1075 }
1076 return ret;
1077 }
1078
1079 static void
cleanup_binning(struct ir2_context * ctx)1080 cleanup_binning(struct ir2_context *ctx)
1081 {
1082 assert(ctx->so->type == MESA_SHADER_VERTEX);
1083
1084 /* kill non-position outputs for binning variant */
1085 nir_foreach_block (block, nir_shader_get_entrypoint(ctx->nir)) {
1086 nir_foreach_instr_safe (instr, block) {
1087 if (instr->type != nir_instr_type_intrinsic)
1088 continue;
1089
1090 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1091 if (intr->intrinsic != nir_intrinsic_store_output)
1092 continue;
1093
1094 if (output_slot(ctx, intr) != VARYING_SLOT_POS)
1095 nir_instr_remove(instr);
1096 }
1097 }
1098
1099 ir2_optimize_nir(ctx->nir, false);
1100 }
1101
1102 static bool
ir2_alu_to_scalar_filter_cb(const nir_instr * instr,const void * data)1103 ir2_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data)
1104 {
1105 if (instr->type != nir_instr_type_alu)
1106 return false;
1107
1108 nir_alu_instr *alu = nir_instr_as_alu(instr);
1109 switch (alu->op) {
1110 case nir_op_frsq:
1111 case nir_op_frcp:
1112 case nir_op_flog2:
1113 case nir_op_fexp2:
1114 case nir_op_fsqrt:
1115 case nir_op_fcos:
1116 case nir_op_fsin:
1117 return true;
1118 default:
1119 break;
1120 }
1121
1122 return false;
1123 }
1124
1125 void
ir2_nir_compile(struct ir2_context * ctx,bool binning)1126 ir2_nir_compile(struct ir2_context *ctx, bool binning)
1127 {
1128 struct fd2_shader_stateobj *so = ctx->so;
1129
1130 memset(ctx->ssa_map, 0xff, sizeof(ctx->ssa_map));
1131
1132 ctx->nir = nir_shader_clone(NULL, so->nir);
1133
1134 if (binning)
1135 cleanup_binning(ctx);
1136
1137 OPT_V(ctx->nir, nir_copy_prop);
1138 OPT_V(ctx->nir, nir_opt_dce);
1139 OPT_V(ctx->nir, nir_opt_move, nir_move_comparisons);
1140
1141 OPT_V(ctx->nir, nir_lower_int_to_float);
1142 OPT_V(ctx->nir, nir_lower_bool_to_float, true);
1143 while (OPT(ctx->nir, nir_opt_algebraic))
1144 ;
1145 OPT_V(ctx->nir, nir_opt_algebraic_late);
1146 OPT_V(ctx->nir, nir_lower_alu_to_scalar, ir2_alu_to_scalar_filter_cb, NULL);
1147
1148 OPT_V(ctx->nir, nir_convert_from_ssa, true);
1149
1150 OPT_V(ctx->nir, nir_move_vec_src_uses_to_dest, false);
1151 OPT_V(ctx->nir, nir_lower_vec_to_regs, NULL, NULL);
1152
1153 OPT_V(ctx->nir, nir_legacy_trivialize, true);
1154
1155 OPT_V(ctx->nir, nir_opt_dce);
1156
1157 nir_sweep(ctx->nir);
1158
1159 if (FD_DBG(DISASM)) {
1160 debug_printf("----------------------\n");
1161 nir_print_shader(ctx->nir, stdout);
1162 debug_printf("----------------------\n");
1163 }
1164
1165 /* fd2_shader_stateobj init */
1166 if (so->type == MESA_SHADER_FRAGMENT) {
1167 ctx->f->fragcoord = -1;
1168 ctx->f->inputs_count = 0;
1169 memset(ctx->f->inputs, 0, sizeof(ctx->f->inputs));
1170 }
1171
1172 /* Setup inputs: */
1173 nir_foreach_shader_in_variable (in, ctx->nir)
1174 setup_input(ctx, in);
1175
1176 if (so->type == MESA_SHADER_FRAGMENT) {
1177 unsigned idx;
1178 for (idx = 0; idx < ctx->f->inputs_count; idx++) {
1179 ctx->input[idx].ncomp = ctx->f->inputs[idx].ncomp;
1180 update_range(ctx, &ctx->input[idx]);
1181 }
1182 /* assume we have param input and kill it later if not */
1183 ctx->input[idx].ncomp = 4;
1184 update_range(ctx, &ctx->input[idx]);
1185 } else {
1186 ctx->input[0].ncomp = 1;
1187 ctx->input[2].ncomp = 1;
1188 update_range(ctx, &ctx->input[0]);
1189 update_range(ctx, &ctx->input[2]);
1190 }
1191
1192 /* And emit the body: */
1193 nir_function_impl *fxn = nir_shader_get_entrypoint(ctx->nir);
1194
1195 nir_foreach_reg_decl (decl, fxn) {
1196 assert(decl->def.index < ARRAY_SIZE(ctx->reg));
1197 ctx->reg[decl->def.index].ncomp = nir_intrinsic_num_components(decl);
1198 ctx->reg_count = MAX2(ctx->reg_count, decl->def.index + 1);
1199 }
1200
1201 nir_metadata_require(fxn, nir_metadata_block_index);
1202 emit_cf_list(ctx, &fxn->body);
1203 /* TODO emit_block(ctx, fxn->end_block); */
1204
1205 if (so->type == MESA_SHADER_VERTEX)
1206 extra_position_exports(ctx, binning);
1207
1208 ralloc_free(ctx->nir);
1209
1210 /* kill unused param input */
1211 if (so->type == MESA_SHADER_FRAGMENT && !so->need_param)
1212 ctx->input[ctx->f->inputs_count].initialized = false;
1213 }
1214