• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2012-2019 Etnaviv Project
3  * Copyright (c) 2019 Zodiac Inflight Innovations
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sub license,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Jonathan Marek <jonathan@marek.ca>
26  *    Wladimir J. van der Laan <laanwj@gmail.com>
27  */
28 
29 #include "etna_core_info.h"
30 #include "etnaviv_compiler.h"
31 #include "etnaviv_compiler_nir.h"
32 #include "etnaviv_asm.h"
33 #include "etnaviv_context.h"
34 #include "etnaviv_debug.h"
35 #include "etnaviv_nir.h"
36 #include "etnaviv_uniforms.h"
37 #include "etnaviv_util.h"
38 #include "nir.h"
39 
40 #include <math.h>
41 #include "isa/enums.h"
42 #include "util/u_memory.h"
43 #include "util/register_allocate.h"
44 #include "compiler/nir/nir_builder.h"
45 
46 #include "util/compiler.h"
47 #include "util/half_float.h"
48 
49 static bool
etna_alu_to_scalar_filter_cb(const nir_instr * instr,const void * data)50 etna_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data)
51 {
52    const struct etna_core_info *info = data;
53 
54    if (instr->type != nir_instr_type_alu)
55       return false;
56 
57    nir_alu_instr *alu = nir_instr_as_alu(instr);
58    switch (alu->op) {
59    case nir_op_frsq:
60    case nir_op_frcp:
61    case nir_op_flog2:
62    case nir_op_fexp2:
63    case nir_op_fsqrt:
64    case nir_op_fcos:
65    case nir_op_fsin:
66    case nir_op_fdiv:
67    case nir_op_imul:
68       return true;
69    /* TODO: can do better than alu_to_scalar for vector compares */
70    case nir_op_b32all_fequal2:
71    case nir_op_b32all_fequal3:
72    case nir_op_b32all_fequal4:
73    case nir_op_b32any_fnequal2:
74    case nir_op_b32any_fnequal3:
75    case nir_op_b32any_fnequal4:
76    case nir_op_b32all_iequal2:
77    case nir_op_b32all_iequal3:
78    case nir_op_b32all_iequal4:
79    case nir_op_b32any_inequal2:
80    case nir_op_b32any_inequal3:
81    case nir_op_b32any_inequal4:
82       return true;
83    case nir_op_fdot2:
84       if (!etna_core_has_feature(info, ETNA_FEATURE_HALTI2))
85          return true;
86       break;
87    default:
88       break;
89    }
90 
91    return false;
92 }
93 
94 static void
etna_emit_block_start(struct etna_compile * c,unsigned block)95 etna_emit_block_start(struct etna_compile *c, unsigned block)
96 {
97    c->block_ptr[block] = c->inst_ptr;
98 }
99 
100 static void
etna_emit_output(struct etna_compile * c,nir_variable * var,struct etna_inst_src src)101 etna_emit_output(struct etna_compile *c, nir_variable *var, struct etna_inst_src src)
102 {
103    struct etna_shader_io_file *sf = &c->variant->outfile;
104 
105    if (is_fs(c)) {
106       switch (var->data.location) {
107       case FRAG_RESULT_DATA0:
108       case FRAG_RESULT_DATA1:
109       case FRAG_RESULT_DATA2:
110       case FRAG_RESULT_DATA3:
111       case FRAG_RESULT_DATA4:
112       case FRAG_RESULT_DATA5:
113       case FRAG_RESULT_DATA6:
114       case FRAG_RESULT_DATA7:
115          c->variant->ps_color_out_reg[var->data.location - FRAG_RESULT_DATA0] = src.reg;
116          break;
117       case FRAG_RESULT_DEPTH:
118          c->variant->ps_depth_out_reg = src.reg;
119          break;
120       default:
121          unreachable("Unsupported fs output");
122       }
123       return;
124    }
125 
126    switch (var->data.location) {
127    case VARYING_SLOT_POS:
128       c->variant->vs_pos_out_reg = src.reg;
129       break;
130    case VARYING_SLOT_PSIZ:
131       c->variant->vs_pointsize_out_reg = src.reg;
132       break;
133    default:
134       assert(sf->num_reg < ETNA_NUM_INPUTS);
135       sf->reg[sf->num_reg].reg = src.reg;
136       sf->reg[sf->num_reg].slot = var->data.location;
137       sf->reg[sf->num_reg].num_components = glsl_get_components(var->type);
138       sf->num_reg++;
139       break;
140    }
141 }
142 
143 #define OPT(nir, pass, ...) ({                             \
144    bool this_progress = false;                             \
145    NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__);      \
146    this_progress;                                          \
147 })
148 
149 static void
etna_optimize_loop(nir_shader * s)150 etna_optimize_loop(nir_shader *s)
151 {
152    bool progress;
153    do {
154       progress = false;
155 
156       NIR_PASS_V(s, nir_lower_vars_to_ssa);
157       progress |= OPT(s, nir_opt_copy_prop_vars);
158       progress |= OPT(s, nir_opt_shrink_stores, true);
159       progress |= OPT(s, nir_opt_shrink_vectors, false);
160       progress |= OPT(s, nir_copy_prop);
161       progress |= OPT(s, nir_opt_dce);
162       progress |= OPT(s, nir_opt_cse);
163       progress |= OPT(s, nir_opt_peephole_select, 16, true, true);
164       progress |= OPT(s, nir_opt_intrinsics);
165       progress |= OPT(s, nir_opt_algebraic);
166       progress |= OPT(s, nir_opt_constant_folding);
167       progress |= OPT(s, nir_opt_dead_cf);
168       if (OPT(s, nir_opt_loop)) {
169          progress = true;
170          /* If nir_opt_loop makes progress, then we need to clean
171           * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
172           * to make progress.
173           */
174          OPT(s, nir_copy_prop);
175          OPT(s, nir_opt_dce);
176       }
177       progress |= OPT(s, nir_opt_loop_unroll);
178       progress |= OPT(s, nir_opt_if, nir_opt_if_optimize_phi_true_false);
179       progress |= OPT(s, nir_opt_remove_phis);
180       progress |= OPT(s, nir_opt_undef);
181    }
182    while (progress);
183 }
184 
185 static int
etna_glsl_type_size(const struct glsl_type * type,bool bindless)186 etna_glsl_type_size(const struct glsl_type *type, bool bindless)
187 {
188    return glsl_count_attribute_slots(type, false);
189 }
190 
191 static void
copy_uniform_state_to_shader(struct etna_shader_variant * sobj,uint64_t * consts,unsigned count)192 copy_uniform_state_to_shader(struct etna_shader_variant *sobj, uint64_t *consts, unsigned count)
193 {
194    struct etna_shader_uniform_info *uinfo = &sobj->uniforms;
195 
196    uinfo->count = count * 4;
197    uinfo->data = MALLOC(uinfo->count * sizeof(*uinfo->data));
198    uinfo->contents = MALLOC(uinfo->count * sizeof(*uinfo->contents));
199 
200    for (unsigned i = 0; i < uinfo->count; i++) {
201       uinfo->data[i] = consts[i];
202       uinfo->contents[i] = consts[i] >> 32;
203    }
204 
205    etna_set_shader_uniforms_dirty_flags(sobj);
206 }
207 
208 #define ALU_SWIZ(s) INST_SWIZ((s)->swizzle[0], (s)->swizzle[1], (s)->swizzle[2], (s)->swizzle[3])
209 #define SRC_DISABLE ((hw_src){})
210 #define SRC_CONST(idx, s) ((hw_src){.use=1, .rgroup = ISA_REG_GROUP_UNIFORM_0, .reg=idx, .swiz=s})
211 #define SRC_REG(idx, s) ((hw_src){.use=1, .rgroup = ISA_REG_GROUP_TEMP, .reg=idx, .swiz=s})
212 
213 typedef struct etna_inst_dst hw_dst;
214 typedef struct etna_inst_src hw_src;
215 
216 static inline hw_src
src_swizzle(hw_src src,unsigned swizzle)217 src_swizzle(hw_src src, unsigned swizzle)
218 {
219    if (src.rgroup != ISA_REG_GROUP_IMMED)
220       src.swiz = inst_swiz_compose(src.swiz, swizzle);
221 
222    return src;
223 }
224 
225 /* constants are represented as 64-bit ints
226  * 32-bit for the value and 32-bit for the type (imm, uniform, etc)
227  */
228 
229 #define CONST_VAL(a, b) (nir_const_value) {.u64 = (uint64_t)(a) << 32 | (uint64_t)(b)}
230 #define CONST(x) CONST_VAL(ETNA_UNIFORM_CONSTANT, x)
231 #define UNIFORM(x) CONST_VAL(ETNA_UNIFORM_UNIFORM, x)
232 #define TEXSCALE(x, i) CONST_VAL(ETNA_UNIFORM_TEXRECT_SCALE_X + (i), x)
233 #define TEXSIZE(x, i) CONST_VAL(ETNA_UNIFORM_TEXTURE_WIDTH + (i), x)
234 
235 static int
const_add(uint64_t * c,uint64_t value)236 const_add(uint64_t *c, uint64_t value)
237 {
238    for (unsigned i = 0; i < 4; i++) {
239       if (c[i] == value || !c[i]) {
240          c[i] = value;
241          return i;
242       }
243    }
244    return -1;
245 }
246 
247 static hw_src
const_src(struct etna_compile * c,nir_const_value * value,unsigned num_components)248 const_src(struct etna_compile *c, nir_const_value *value, unsigned num_components)
249 {
250    /* use inline immediates if possible */
251    if (c->info->halti >= 2 && num_components == 1 &&
252        value[0].u64 >> 32 == ETNA_UNIFORM_CONSTANT) {
253       uint32_t bits = value[0].u32;
254 
255       /* "float" - shifted by 12 */
256       if ((bits & 0xfff) == 0)
257          return etna_immediate_src(0, bits >> 12);
258 
259       /* "unsigned" - raw 20 bit value */
260       if (bits < (1 << 20))
261          return etna_immediate_src(2, bits);
262 
263       /* "signed" - sign extended 20-bit (sign included) value */
264       if (bits >= 0xfff80000)
265          return etna_immediate_src(1, bits);
266    }
267 
268    unsigned i;
269    int swiz = -1;
270    for (i = 0; swiz < 0; i++) {
271       uint64_t *a = &c->consts[i*4];
272       uint64_t save[4];
273       memcpy(save, a, sizeof(save));
274       swiz = 0;
275       for (unsigned j = 0; j < num_components; j++) {
276          int c = const_add(a, value[j].u64);
277          if (c < 0) {
278             memcpy(a, save, sizeof(save));
279             swiz = -1;
280             break;
281          }
282          swiz |= c << j * 2;
283       }
284    }
285 
286    assert(i <= ETNA_MAX_IMM / 4);
287    c->const_count = MAX2(c->const_count, i);
288 
289    return SRC_CONST(i - 1, swiz);
290 }
291 
292 /* how to swizzle when used as a src */
293 static const uint8_t
294 reg_swiz[NUM_REG_TYPES] = {
295    [REG_TYPE_VEC4] = INST_SWIZ_IDENTITY,
296    [REG_TYPE_VIRT_SCALAR_X] = SWIZZLE(X, X, X, X),
297    [REG_TYPE_VIRT_SCALAR_Y] = SWIZZLE(Y, Y, Y, Y),
298    [REG_TYPE_VIRT_VEC2_XY] = INST_SWIZ_IDENTITY,
299    [REG_TYPE_VIRT_VEC2T_XY] = INST_SWIZ_IDENTITY,
300    [REG_TYPE_VIRT_VEC2C_XY] = INST_SWIZ_IDENTITY,
301    [REG_TYPE_VIRT_SCALAR_Z] = SWIZZLE(Z, Z, Z, Z),
302    [REG_TYPE_VIRT_VEC2_XZ] = SWIZZLE(X, Z, X, Z),
303    [REG_TYPE_VIRT_VEC2_YZ] = SWIZZLE(Y, Z, Y, Z),
304    [REG_TYPE_VIRT_VEC2C_YZ] = SWIZZLE(Y, Z, Y, Z),
305    [REG_TYPE_VIRT_VEC3_XYZ] = INST_SWIZ_IDENTITY,
306    [REG_TYPE_VIRT_VEC3C_XYZ] = INST_SWIZ_IDENTITY,
307    [REG_TYPE_VIRT_SCALAR_W] = SWIZZLE(W, W, W, W),
308    [REG_TYPE_VIRT_VEC2_XW] = SWIZZLE(X, W, X, W),
309    [REG_TYPE_VIRT_VEC2_YW] = SWIZZLE(Y, W, Y, W),
310    [REG_TYPE_VIRT_VEC3_XYW] = SWIZZLE(X, Y, W, X),
311    [REG_TYPE_VIRT_VEC2_ZW] = SWIZZLE(Z, W, Z, W),
312    [REG_TYPE_VIRT_VEC2T_ZW] = SWIZZLE(Z, W, Z, W),
313    [REG_TYPE_VIRT_VEC2C_ZW] = SWIZZLE(Z, W, Z, W),
314    [REG_TYPE_VIRT_VEC3_XZW] = SWIZZLE(X, Z, W, X),
315    [REG_TYPE_VIRT_VEC3_YZW] = SWIZZLE(Y, Z, W, X),
316    [REG_TYPE_VIRT_VEC3C_YZW] = SWIZZLE(Y, Z, W, X),
317 };
318 
319 /* how to swizzle when used as a dest */
320 static const uint8_t
321 reg_dst_swiz[NUM_REG_TYPES] = {
322    [REG_TYPE_VEC4] = INST_SWIZ_IDENTITY,
323    [REG_TYPE_VIRT_SCALAR_X] = INST_SWIZ_IDENTITY,
324    [REG_TYPE_VIRT_SCALAR_Y] = SWIZZLE(X, X, X, X),
325    [REG_TYPE_VIRT_VEC2_XY] = INST_SWIZ_IDENTITY,
326    [REG_TYPE_VIRT_VEC2T_XY] = INST_SWIZ_IDENTITY,
327    [REG_TYPE_VIRT_VEC2C_XY] = INST_SWIZ_IDENTITY,
328    [REG_TYPE_VIRT_SCALAR_Z] = SWIZZLE(X, X, X, X),
329    [REG_TYPE_VIRT_VEC2_XZ] = SWIZZLE(X, X, Y, Y),
330    [REG_TYPE_VIRT_VEC2_YZ] = SWIZZLE(X, X, Y, Y),
331    [REG_TYPE_VIRT_VEC2C_YZ] = SWIZZLE(X, X, Y, Y),
332    [REG_TYPE_VIRT_VEC3_XYZ] = INST_SWIZ_IDENTITY,
333    [REG_TYPE_VIRT_VEC3C_XYZ] = INST_SWIZ_IDENTITY,
334    [REG_TYPE_VIRT_SCALAR_W] = SWIZZLE(X, X, X, X),
335    [REG_TYPE_VIRT_VEC2_XW] = SWIZZLE(X, X, Y, Y),
336    [REG_TYPE_VIRT_VEC2_YW] = SWIZZLE(X, X, Y, Y),
337    [REG_TYPE_VIRT_VEC3_XYW] = SWIZZLE(X, Y, Z, Z),
338    [REG_TYPE_VIRT_VEC2_ZW] = SWIZZLE(X, X, X, Y),
339    [REG_TYPE_VIRT_VEC2T_ZW] = SWIZZLE(X, X, X, Y),
340    [REG_TYPE_VIRT_VEC2C_ZW] = SWIZZLE(X, X, X, Y),
341    [REG_TYPE_VIRT_VEC3_XZW] = SWIZZLE(X, Y, Y, Z),
342    [REG_TYPE_VIRT_VEC3_YZW] = SWIZZLE(X, X, Y, Z),
343    [REG_TYPE_VIRT_VEC3C_YZW] = SWIZZLE(X, X, Y, Z),
344 };
345 
346 /* nir_src to allocated register */
347 static hw_src
ra_src(struct etna_compile * c,nir_src * src)348 ra_src(struct etna_compile *c, nir_src *src)
349 {
350    unsigned reg = ra_get_node_reg(c->g, c->live_map[src_index(c->impl, src)]);
351    return SRC_REG(reg_get_base(c, reg), reg_swiz[reg_get_type(reg)]);
352 }
353 
354 static hw_src
get_src(struct etna_compile * c,nir_src * src)355 get_src(struct etna_compile *c, nir_src *src)
356 {
357    nir_instr *instr = src->ssa->parent_instr;
358 
359    if (instr->pass_flags & BYPASS_SRC) {
360       assert(instr->type == nir_instr_type_alu);
361       nir_alu_instr *alu = nir_instr_as_alu(instr);
362       assert(alu->op == nir_op_mov);
363       return src_swizzle(get_src(c, &alu->src[0].src), ALU_SWIZ(&alu->src[0]));
364    }
365 
366    switch (instr->type) {
367    case nir_instr_type_load_const:
368       return const_src(c, nir_instr_as_load_const(instr)->value, src->ssa->num_components);
369    case nir_instr_type_intrinsic: {
370       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
371       switch (intr->intrinsic) {
372       case nir_intrinsic_load_input:
373       case nir_intrinsic_load_instance_id:
374       case nir_intrinsic_load_vertex_id:
375       case nir_intrinsic_load_uniform:
376       case nir_intrinsic_load_ubo:
377       case nir_intrinsic_load_reg:
378       case nir_intrinsic_ddx:
379       case nir_intrinsic_ddy:
380          return ra_src(c, src);
381       case nir_intrinsic_load_front_face:
382          return (hw_src) { .use = 1, .rgroup = ISA_REG_GROUP_INTERNAL };
383       case nir_intrinsic_load_frag_coord:
384          return SRC_REG(0, INST_SWIZ_IDENTITY);
385       case nir_intrinsic_load_texture_scale: {
386          int sampler = nir_src_as_int(intr->src[0]);
387          nir_const_value values[] = {
388             TEXSCALE(sampler, 0),
389             TEXSCALE(sampler, 1),
390          };
391 
392          return src_swizzle(const_src(c, values, 2), SWIZZLE(X,Y,X,X));
393       }
394       case nir_intrinsic_load_texture_size_etna: {
395          int sampler = nir_src_as_int(intr->src[0]);
396          nir_const_value values[] = {
397             TEXSIZE(sampler, 0),
398             TEXSIZE(sampler, 1),
399             TEXSIZE(sampler, 2),
400          };
401 
402          return src_swizzle(const_src(c, values, 3), SWIZZLE(X,Y,Z,X));
403       }
404       default:
405          compile_error(c, "Unhandled NIR intrinsic type: %s\n",
406                        nir_intrinsic_infos[intr->intrinsic].name);
407          break;
408       }
409    } break;
410    case nir_instr_type_alu:
411    case nir_instr_type_tex:
412       return ra_src(c, src);
413    case nir_instr_type_undef: {
414       /* return zero to deal with broken Blur demo */
415       nir_const_value value = CONST(0);
416       return src_swizzle(const_src(c, &value, 1), SWIZZLE(X,X,X,X));
417    }
418    default:
419       compile_error(c, "Unhandled NIR instruction type: %d\n", instr->type);
420       break;
421    }
422 
423    return SRC_DISABLE;
424 }
425 
426 static bool
vec_dest_has_swizzle(nir_alu_instr * vec,nir_def * ssa)427 vec_dest_has_swizzle(nir_alu_instr *vec, nir_def *ssa)
428 {
429    for (unsigned i = 0; i < vec->def.num_components; i++) {
430       if (vec->src[i].src.ssa != ssa)
431          continue;
432 
433       if (vec->src[i].swizzle[0] != i)
434          return true;
435    }
436 
437    /* don't deal with possible bypassed vec/mov chain */
438    nir_foreach_use(use_src, ssa) {
439       nir_instr *instr = nir_src_parent_instr(use_src);
440       if (instr->type != nir_instr_type_alu)
441          continue;
442 
443       nir_alu_instr *alu = nir_instr_as_alu(instr);
444 
445       switch (alu->op) {
446       case nir_op_mov:
447       case nir_op_vec2:
448       case nir_op_vec3:
449       case nir_op_vec4:
450          return true;
451       default:
452          break;
453       }
454    }
455    return false;
456 }
457 
458 /* get allocated dest register for nir_def
459  * *p_swiz tells how the components need to be placed into register
460  */
461 static hw_dst
ra_def(struct etna_compile * c,nir_def * def,unsigned * p_swiz)462 ra_def(struct etna_compile *c, nir_def *def, unsigned *p_swiz)
463 {
464    unsigned swiz = INST_SWIZ_IDENTITY, mask = 0xf;
465    def = real_def(def, &swiz, &mask);
466 
467    unsigned r = ra_get_node_reg(c->g, c->live_map[def_index(c->impl, def)]);
468    unsigned t = reg_get_type(r);
469 
470    *p_swiz = inst_swiz_compose(swiz, reg_dst_swiz[t]);
471 
472    return (hw_dst) {
473       .use = 1,
474       .reg = reg_get_base(c, r),
475       .write_mask = inst_write_mask_compose(mask, reg_writemask[t]),
476    };
477 }
478 
479 static void
emit_alu(struct etna_compile * c,nir_alu_instr * alu)480 emit_alu(struct etna_compile *c, nir_alu_instr * alu)
481 {
482    const nir_op_info *info = &nir_op_infos[alu->op];
483 
484    /* marked as dead instruction (vecN and other bypassed instr) */
485    if (is_dead_instruction(&alu->instr))
486       return;
487 
488    assert(!(alu->op >= nir_op_vec2 && alu->op <= nir_op_vec4));
489 
490    unsigned dst_swiz;
491    hw_dst dst = ra_def(c, &alu->def, &dst_swiz);
492 
493    switch (alu->op) {
494    case nir_op_fdot2:
495    case nir_op_fdot3:
496    case nir_op_fdot4:
497       /* not per-component - don't compose dst_swiz */
498       dst_swiz = INST_SWIZ_IDENTITY;
499       break;
500    default:
501       break;
502    }
503 
504    hw_src srcs[3] = {0};
505 
506    for (int i = 0; i < info->num_inputs; i++) {
507       nir_alu_src *asrc = &alu->src[i];
508       hw_src src;
509 
510       src = src_swizzle(get_src(c, &asrc->src), ALU_SWIZ(asrc));
511       src = src_swizzle(src, dst_swiz);
512 
513       if (src.rgroup != ISA_REG_GROUP_IMMED) {
514          src.neg = is_src_mod_neg(&alu->instr, i) || (alu->op == nir_op_fneg);
515          src.abs = is_src_mod_abs(&alu->instr, i) || (alu->op == nir_op_fabs);
516       } else {
517          assert(alu->op != nir_op_fabs);
518          assert(!is_src_mod_abs(&alu->instr, i) && alu->op != nir_op_fabs);
519 
520          if (src.imm_type > 0)
521             assert(!is_src_mod_neg(&alu->instr, i));
522 
523          if (is_src_mod_neg(&alu->instr, i) && src.imm_type == 0)
524             src.imm_val ^= 0x80000;
525       }
526 
527       srcs[i] = src;
528    }
529 
530    etna_emit_alu(c, alu->op, dst, srcs, alu->op == nir_op_fsat);
531 }
532 
533 static void
emit_tex(struct etna_compile * c,nir_tex_instr * tex)534 emit_tex(struct etna_compile *c, nir_tex_instr * tex)
535 {
536    unsigned dst_swiz;
537    hw_dst dst = ra_def(c, &tex->def, &dst_swiz);
538    nir_src *coord = NULL, *src1 = NULL, *src2 = NULL;
539 
540    for (unsigned i = 0; i < tex->num_srcs; i++) {
541       switch (tex->src[i].src_type) {
542       case nir_tex_src_coord:
543          coord = &tex->src[i].src;
544          break;
545       case nir_tex_src_bias:
546       case nir_tex_src_lod:
547       case nir_tex_src_ddx:
548          assert(!src1);
549          src1 = &tex->src[i].src;
550          break;
551       case nir_tex_src_comparator:
552       case nir_tex_src_ddy:
553          src2 = &tex->src[i].src;
554          break;
555       default:
556          compile_error(c, "Unhandled NIR tex src type: %d\n",
557                        tex->src[i].src_type);
558          break;
559       }
560    }
561 
562    etna_emit_tex(c, tex->op, tex->sampler_index, dst_swiz, dst, get_src(c, coord),
563                  src1 ? get_src(c, src1) : SRC_DISABLE,
564                  src2 ? get_src(c, src2) : SRC_DISABLE);
565 }
566 
567 static void
emit_intrinsic(struct etna_compile * c,nir_intrinsic_instr * intr)568 emit_intrinsic(struct etna_compile *c, nir_intrinsic_instr * intr)
569 {
570    switch (intr->intrinsic) {
571    case nir_intrinsic_store_deref:
572       etna_emit_output(c, nir_src_as_deref(intr->src[0])->var, get_src(c, &intr->src[1]));
573       break;
574    case nir_intrinsic_terminate_if:
575       etna_emit_discard(c, get_src(c, &intr->src[0]));
576       break;
577    case nir_intrinsic_terminate:
578       etna_emit_discard(c, SRC_DISABLE);
579       break;
580    case nir_intrinsic_ddx: {
581       unsigned dst_swiz;
582       struct etna_inst_dst dst = ra_def(c, &intr->def, &dst_swiz);
583       struct etna_inst_src src = get_src(c, &intr->src[0]);
584 
585       src = src_swizzle(src, dst_swiz);
586 
587       struct etna_inst inst = {
588          .dst = dst,
589          .opcode = ISA_OPC_DSX,
590          .cond = ISA_COND_TRUE,
591          .type = ISA_TYPE_F32,
592          .src[0] = src,
593          .src[1] = src,
594       };
595 
596       emit_inst(c, &inst);
597    } break;
598    case nir_intrinsic_ddy: {
599       unsigned dst_swiz;
600       struct etna_inst_dst dst = ra_def(c, &intr->def, &dst_swiz);
601       struct etna_inst_src src = get_src(c, &intr->src[0]);
602 
603       src = src_swizzle(src, dst_swiz);
604 
605       struct etna_inst inst = {
606          .dst = dst,
607          .opcode = ISA_OPC_DSY,
608          .type = ISA_TYPE_F32,
609          .cond = ISA_COND_TRUE,
610          .src[0] = src,
611          .src[1] = src,
612       };
613 
614       emit_inst(c, &inst);
615    } break;
616    case nir_intrinsic_load_uniform: {
617       unsigned dst_swiz;
618       struct etna_inst_dst dst = ra_def(c, &intr->def, &dst_swiz);
619 
620       /* TODO: rework so extra MOV isn't required, load up to 4 addresses at once */
621       emit_inst(c, &(struct etna_inst) {
622          .opcode = ISA_OPC_MOVAR,
623          .dst.use = 1,
624          .dst.write_mask = ISA_WRMASK_X___,
625          .src[0] = get_src(c, &intr->src[0]),
626       });
627       emit_inst(c, &(struct etna_inst) {
628          .opcode = ISA_OPC_MOV,
629          .dst = dst,
630          .src[0] = {
631             .use = 1,
632             .rgroup = ISA_REG_GROUP_UNIFORM_0,
633             .reg = nir_intrinsic_base(intr),
634             .swiz = dst_swiz,
635             .amode = ISA_REG_ADDRESSING_MODE_AX,
636          },
637       });
638    } break;
639    case nir_intrinsic_load_ubo: {
640       /* TODO: if offset is of the form (x + C) then add C to the base instead */
641       unsigned idx = nir_src_as_const_value(intr->src[0])[0].u32;
642       unsigned dst_swiz;
643       emit_inst(c, &(struct etna_inst) {
644          .opcode = ISA_OPC_LOAD,
645          .type = ISA_TYPE_U32,
646          .dst = ra_def(c, &intr->def, &dst_swiz),
647          .src[0] = get_src(c, &intr->src[1]),
648          .src[1] = const_src(c, &CONST_VAL(ETNA_UNIFORM_UBO_ADDR, idx), 1),
649       });
650    } break;
651    case nir_intrinsic_load_front_face:
652    case nir_intrinsic_load_frag_coord:
653       break;
654    case nir_intrinsic_load_input:
655    case nir_intrinsic_load_instance_id:
656    case nir_intrinsic_load_vertex_id:
657    case nir_intrinsic_load_texture_scale:
658    case nir_intrinsic_load_texture_size_etna:
659    case nir_intrinsic_decl_reg:
660    case nir_intrinsic_load_reg:
661    case nir_intrinsic_store_reg:
662       break;
663    default:
664       compile_error(c, "Unhandled NIR intrinsic type: %s\n",
665                     nir_intrinsic_infos[intr->intrinsic].name);
666    }
667 }
668 
669 static void
emit_instr(struct etna_compile * c,nir_instr * instr)670 emit_instr(struct etna_compile *c, nir_instr * instr)
671 {
672    switch (instr->type) {
673    case nir_instr_type_alu:
674       emit_alu(c, nir_instr_as_alu(instr));
675       break;
676    case nir_instr_type_tex:
677       emit_tex(c, nir_instr_as_tex(instr));
678       break;
679    case nir_instr_type_intrinsic:
680       emit_intrinsic(c, nir_instr_as_intrinsic(instr));
681       break;
682    case nir_instr_type_jump:
683       assert(nir_instr_is_last(instr));
684       break;
685    case nir_instr_type_load_const:
686    case nir_instr_type_undef:
687    case nir_instr_type_deref:
688       break;
689    default:
690       compile_error(c, "Unhandled NIR instruction type: %d\n", instr->type);
691       break;
692    }
693 }
694 
695 static void
emit_block(struct etna_compile * c,nir_block * block)696 emit_block(struct etna_compile *c, nir_block * block)
697 {
698    etna_emit_block_start(c, block->index);
699 
700    nir_foreach_instr(instr, block)
701       emit_instr(c, instr);
702 
703    /* succs->index < block->index is for the loop case  */
704    nir_block *succs = block->successors[0];
705    if (nir_block_ends_in_jump(block) || succs->index < block->index)
706       etna_emit_jump(c, succs->index, SRC_DISABLE);
707 }
708 
709 static void
710 emit_cf_list(struct etna_compile *c, struct exec_list *list);
711 
712 static void
emit_if(struct etna_compile * c,nir_if * nif)713 emit_if(struct etna_compile *c, nir_if * nif)
714 {
715    etna_emit_jump(c, nir_if_first_else_block(nif)->index, get_src(c, &nif->condition));
716    emit_cf_list(c, &nif->then_list);
717 
718    /* jump at end of then_list to skip else_list
719     * not needed if then_list already ends with a jump or else_list is empty
720     */
721    if (!nir_block_ends_in_jump(nir_if_last_then_block(nif)) &&
722        !nir_cf_list_is_empty_block(&nif->else_list))
723       etna_emit_jump(c, nir_if_last_then_block(nif)->successors[0]->index, SRC_DISABLE);
724 
725    emit_cf_list(c, &nif->else_list);
726 }
727 
728 static void
emit_cf_list(struct etna_compile * c,struct exec_list * list)729 emit_cf_list(struct etna_compile *c, struct exec_list *list)
730 {
731    foreach_list_typed(nir_cf_node, node, node, list) {
732       switch (node->type) {
733       case nir_cf_node_block:
734          emit_block(c, nir_cf_node_as_block(node));
735          break;
736       case nir_cf_node_if:
737          emit_if(c, nir_cf_node_as_if(node));
738          break;
739       case nir_cf_node_loop:
740          assert(!nir_loop_has_continue_construct(nir_cf_node_as_loop(node)));
741          emit_cf_list(c, &nir_cf_node_as_loop(node)->body);
742          break;
743       default:
744          compile_error(c, "Unknown NIR node type\n");
745          break;
746       }
747    }
748 }
749 
750 /* based on nir_lower_vec_to_movs */
751 static unsigned
insert_vec_mov(nir_alu_instr * vec,unsigned start_idx,nir_shader * shader)752 insert_vec_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader)
753 {
754    assert(start_idx < nir_op_infos[vec->op].num_inputs);
755    unsigned write_mask = (1u << start_idx);
756 
757    nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_mov);
758    nir_alu_src_copy(&mov->src[0], &vec->src[start_idx]);
759 
760    mov->src[0].swizzle[0] = vec->src[start_idx].swizzle[0];
761 
762    if (is_src_mod_neg(&vec->instr, start_idx))
763       set_src_mod_neg(&mov->instr, 0);
764 
765    if (is_src_mod_abs(&vec->instr, start_idx))
766       set_src_mod_abs(&mov->instr, 0);
767 
768    unsigned num_components = 1;
769 
770    for (unsigned i = start_idx + 1; i < vec->def.num_components; i++) {
771       if (nir_srcs_equal(vec->src[i].src, vec->src[start_idx].src) &&
772          is_src_mod_neg(&vec->instr, i) == is_src_mod_neg(&vec->instr, start_idx) &&
773          is_src_mod_abs(&vec->instr, i) == is_src_mod_neg(&vec->instr, start_idx)) {
774          write_mask |= (1 << i);
775          mov->src[0].swizzle[num_components] = vec->src[i].swizzle[0];
776          num_components++;
777       }
778    }
779 
780    nir_def_init(&mov->instr, &mov->def, num_components, 32);
781 
782    /* replace vec srcs with inserted mov */
783    for (unsigned i = 0, j = 0; i < 4; i++) {
784       if (!(write_mask & (1 << i)))
785          continue;
786 
787       nir_src_rewrite(&vec->src[i].src, &mov->def);
788       vec->src[i].swizzle[0] = j++;
789    }
790 
791    nir_instr_insert_before(&vec->instr, &mov->instr);
792 
793    return write_mask;
794 }
795 
796 /*
797  * Get the nir_const_value from an alu src.  Also look at
798  * the parent instruction as it could be a fabs/fneg.
799  */
get_alu_cv(nir_alu_src * src)800 static nir_const_value *get_alu_cv(nir_alu_src *src)
801  {
802    nir_const_value *cv = nir_src_as_const_value(src->src);
803 
804    if (!cv &&
805        (src->src.ssa->parent_instr->type == nir_instr_type_alu)) {
806       nir_alu_instr *parent = nir_instr_as_alu(src->src.ssa->parent_instr);
807 
808       if ((parent->op == nir_op_fabs) ||
809           (parent->op == nir_op_fneg)) {
810          cv = nir_src_as_const_value(parent->src[0].src);
811 
812          if (cv) {
813             /* Validate that we are only using ETNA_UNIFORM_CONSTANT const_values. */
814             for (unsigned i = 0; i < parent->def.num_components; i++) {
815                if (cv[i].u64 >> 32 != ETNA_UNIFORM_CONSTANT) {
816                   cv = NULL;
817                   break;
818                }
819             }
820          }
821       }
822    }
823 
824    return cv;
825  }
826 
827 /*
828  * for vecN instructions:
829  * -merge constant sources into a single src
830  * -insert movs (nir_lower_vec_to_movs equivalent)
831  * for non-vecN instructions:
832  * -try to merge constants as single constant
833  * -insert movs for multiple constants if required
834  */
835 static void
lower_alu(struct etna_compile * c,nir_alu_instr * alu)836 lower_alu(struct etna_compile *c, nir_alu_instr *alu)
837 {
838    const nir_op_info *info = &nir_op_infos[alu->op];
839 
840    nir_builder b = nir_builder_at(nir_before_instr(&alu->instr));
841 
842    switch (alu->op) {
843    case nir_op_vec2:
844    case nir_op_vec3:
845    case nir_op_vec4:
846       break;
847    default:
848       if (c->specs->has_no_oneconst_limit)
849          return;
850 
851       nir_const_value value[4] = {};
852       uint8_t swizzle[4][4] = {};
853       unsigned swiz_max = 0, num_different_const_srcs = 0;
854       int first_const = -1;
855 
856       for (unsigned i = 0; i < info->num_inputs; i++) {
857          nir_const_value *cv = get_alu_cv(&alu->src[i]);
858          if (!cv)
859             continue;
860 
861          unsigned num_components = info->input_sizes[i] ?: alu->def.num_components;
862          for (unsigned j = 0; j < num_components; j++) {
863             int idx = const_add(&value[0].u64, cv[alu->src[i].swizzle[j]].u64);
864             swizzle[i][j] = idx;
865             swiz_max = MAX2(swiz_max, (unsigned) idx);
866          }
867 
868          if (first_const == -1)
869             first_const = i;
870 
871          if (!nir_srcs_equal(alu->src[first_const].src, alu->src[i].src))
872             num_different_const_srcs++;
873       }
874 
875       /* nothing to do */
876       if (num_different_const_srcs == 0)
877          return;
878 
879       /* resolve with single combined const src */
880       if (swiz_max < 4) {
881          nir_def *def = nir_build_imm(&b, swiz_max + 1, 32, value);
882 
883          for (unsigned i = 0; i < info->num_inputs; i++) {
884             nir_const_value *cv = get_alu_cv(&alu->src[i]);
885             if (!cv)
886                continue;
887 
888             nir_src_rewrite(&alu->src[i].src, def);
889 
890             for (unsigned j = 0; j < 4; j++)
891                alu->src[i].swizzle[j] = swizzle[i][j];
892          }
893          return;
894       }
895 
896       /* resolve with movs */
897       unsigned num_const = 0;
898       for (unsigned i = 0; i < info->num_inputs; i++) {
899          nir_const_value *cv = get_alu_cv(&alu->src[i]);
900          if (!cv)
901             continue;
902 
903          num_const++;
904          if (num_const == 1)
905             continue;
906 
907          nir_def *mov = nir_mov(&b, alu->src[i].src.ssa);
908          nir_src_rewrite(&alu->src[i].src, mov);
909       }
910       return;
911    }
912 
913    nir_const_value value[4];
914    unsigned num_components = 0;
915 
916    for (unsigned i = 0; i < info->num_inputs; i++) {
917       nir_const_value *cv = get_alu_cv(&alu->src[i]);
918       if (cv)
919          value[num_components++] = cv[alu->src[i].swizzle[0]];
920    }
921 
922    /* if there is more than one constant source to the vecN, combine them
923     * into a single load_const (removing the vecN completely if all components
924     * are constant)
925     */
926    if (num_components > 1) {
927       nir_def *def = nir_build_imm(&b, num_components, 32, value);
928 
929       if (num_components == info->num_inputs) {
930          nir_def_replace(&alu->def, def);
931          return;
932       }
933 
934       for (unsigned i = 0, j = 0; i < info->num_inputs; i++) {
935          nir_const_value *cv = get_alu_cv(&alu->src[i]);
936          if (!cv)
937             continue;
938 
939          nir_src_rewrite(&alu->src[i].src, def);
940          alu->src[i].swizzle[0] = j++;
941       }
942    }
943 
944    unsigned finished_write_mask = 0;
945    for (unsigned i = 0; i < alu->def.num_components; i++) {
946       nir_def *ssa = alu->src[i].src.ssa;
947 
948       /* check that vecN instruction is only user of this */
949       bool need_mov = false;
950       nir_foreach_use_including_if(use_src, ssa) {
951          if (nir_src_is_if(use_src) || nir_src_parent_instr(use_src) != &alu->instr)
952             need_mov = true;
953       }
954 
955       nir_instr *instr = ssa->parent_instr;
956       switch (instr->type) {
957       case nir_instr_type_alu:
958       case nir_instr_type_tex:
959          break;
960       case nir_instr_type_intrinsic:
961          if (nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_input) {
962             need_mov = vec_dest_has_swizzle(alu, &nir_instr_as_intrinsic(instr)->def);
963             break;
964          }
965          FALLTHROUGH;
966       default:
967          need_mov = true;
968       }
969 
970       if (need_mov && !(finished_write_mask & (1 << i)))
971          finished_write_mask |= insert_vec_mov(alu, i, c->nir);
972    }
973 }
974 
975 static bool
emit_shader(struct etna_compile * c,unsigned * num_temps,unsigned * num_consts)976 emit_shader(struct etna_compile *c, unsigned *num_temps, unsigned *num_consts)
977 {
978    nir_shader *shader = c->nir;
979    c->impl = nir_shader_get_entrypoint(shader);
980 
981    bool have_indirect_uniform = false;
982    unsigned indirect_max = 0;
983 
984    nir_builder b = nir_builder_create(c->impl);
985 
986    /* convert non-dynamic uniform loads to constants, etc */
987    nir_foreach_block(block, c->impl) {
988       nir_foreach_instr_safe(instr, block) {
989          switch(instr->type) {
990          case nir_instr_type_alu:
991             /* deals with vecN and const srcs */
992             lower_alu(c, nir_instr_as_alu(instr));
993             break;
994          case nir_instr_type_load_const: {
995             nir_load_const_instr *load_const = nir_instr_as_load_const(instr);
996             for (unsigned  i = 0; i < load_const->def.num_components; i++)
997                load_const->value[i] = CONST(load_const->value[i].u32);
998          } break;
999          case nir_instr_type_intrinsic: {
1000             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1001             /* TODO: load_ubo can also become a constant in some cases
1002              * (at the moment it can end up emitting a LOAD with two
1003              *  uniform sources, which could be a problem on HALTI2)
1004              */
1005             if (intr->intrinsic != nir_intrinsic_load_uniform)
1006                break;
1007             nir_const_value *off = nir_src_as_const_value(intr->src[0]);
1008             if (!off || off[0].u64 >> 32 != ETNA_UNIFORM_CONSTANT) {
1009                have_indirect_uniform = true;
1010                indirect_max = nir_intrinsic_base(intr) + nir_intrinsic_range(intr);
1011                break;
1012             }
1013 
1014             unsigned base = nir_intrinsic_base(intr);
1015             /* pre halti2 uniform offset will be float */
1016             if (c->info->halti < 2)
1017                base += (unsigned) off[0].f32;
1018             else
1019                base += off[0].u32;
1020             nir_const_value value[4];
1021 
1022             for (unsigned i = 0; i < intr->def.num_components; i++)
1023                value[i] = UNIFORM(base * 4 + i);
1024 
1025             b.cursor = nir_after_instr(instr);
1026             nir_def *def = nir_build_imm(&b, intr->def.num_components, 32, value);
1027 
1028             nir_def_rewrite_uses(&intr->def, def);
1029             nir_instr_remove(instr);
1030          } break;
1031          default:
1032             break;
1033          }
1034       }
1035    }
1036 
1037    /* TODO: only emit required indirect uniform ranges */
1038    if (have_indirect_uniform) {
1039       for (unsigned i = 0; i < indirect_max * 4; i++)
1040          c->consts[i] = UNIFORM(i).u64;
1041       c->const_count = indirect_max;
1042    }
1043 
1044    /* add mov for any store output using sysval/const and for depth stores from intrinsics */
1045    nir_foreach_block(block, c->impl) {
1046       nir_foreach_instr_safe(instr, block) {
1047          if (instr->type != nir_instr_type_intrinsic)
1048             continue;
1049 
1050          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1051 
1052          switch (intr->intrinsic) {
1053          case nir_intrinsic_store_deref: {
1054             nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
1055             nir_src *src = &intr->src[1];
1056             if (nir_src_is_const(*src) || is_sysval(src->ssa->parent_instr) ||
1057                 (shader->info.stage == MESA_SHADER_FRAGMENT &&
1058                  deref->var->data.location == FRAG_RESULT_DEPTH &&
1059                  src->ssa->parent_instr->type != nir_instr_type_alu)) {
1060                b.cursor = nir_before_instr(instr);
1061                nir_src_rewrite(src, nir_mov(&b, src->ssa));
1062             }
1063          } break;
1064          default:
1065             break;
1066          }
1067       }
1068    }
1069 
1070    /* call directly to avoid validation (load_const don't pass validation at this point) */
1071    nir_convert_from_ssa(shader, true);
1072    nir_trivialize_registers(shader);
1073 
1074    etna_ra_assign(c, shader);
1075 
1076    emit_cf_list(c, &nir_shader_get_entrypoint(shader)->body);
1077 
1078    *num_temps = etna_ra_finish(c);
1079    *num_consts = c->const_count;
1080    return true;
1081 }
1082 
1083 static bool
etna_compile_check_limits(struct etna_shader_variant * v)1084 etna_compile_check_limits(struct etna_shader_variant *v)
1085 {
1086    const struct etna_core_info *info = v->shader->info;
1087    const struct etna_specs *specs = v->shader->specs;
1088    int max_uniforms = (v->stage == MESA_SHADER_VERTEX)
1089                          ? specs->max_vs_uniforms
1090                          : specs->max_ps_uniforms;
1091 
1092    if (!specs->has_icache && v->needs_icache) {
1093       DBG("Number of instructions (%d) exceeds maximum %d", v->code_size / 4,
1094           specs->max_instructions);
1095       return false;
1096    }
1097 
1098    if (v->num_temps > info->gpu.max_registers) {
1099       DBG("Number of registers (%d) exceeds maximum %d", v->num_temps,
1100           info->gpu.max_registers);
1101       return false;
1102    }
1103 
1104    if (v->uniforms.count / 4 > max_uniforms) {
1105       DBG("Number of uniforms (%d) exceeds maximum %d",
1106           v->uniforms.count / 4, max_uniforms);
1107       return false;
1108    }
1109 
1110    if (v->stage == MESA_SHADER_VERTEX) {
1111       int num_outputs = v->vs_pointsize_out_reg >= 0 ? 2 : 1;
1112 
1113       num_outputs += v->outfile.num_reg;
1114 
1115       if (num_outputs > specs->max_vs_outputs) {
1116          DBG("Number of VS outputs (%zu) exceeds maximum %d",
1117              v->outfile.num_reg, specs->max_vs_outputs);
1118          return false;
1119       }
1120    }
1121 
1122    return true;
1123 }
1124 
1125 static void
fill_vs_mystery(struct etna_shader_variant * v)1126 fill_vs_mystery(struct etna_shader_variant *v)
1127 {
1128    const struct etna_core_info *info = v->shader->info;
1129 
1130    v->input_count_unk8 = DIV_ROUND_UP(v->infile.num_reg + 4, 16); /* XXX what is this */
1131 
1132    /* fill in "mystery meat" load balancing value. This value determines how
1133     * work is scheduled between VS and PS
1134     * in the unified shader architecture. More precisely, it is determined from
1135     * the number of VS outputs, as well as chip-specific
1136     * vertex output buffer size, vertex cache size, and the number of shader
1137     * cores.
1138     *
1139     * XXX this is a conservative estimate, the "optimal" value is only known for
1140     * sure at link time because some
1141     * outputs may be unused and thus unmapped. Then again, in the general use
1142     * case with GLSL the vertex and fragment
1143     * shaders are linked already before submitting to Gallium, thus all outputs
1144     * are used.
1145     *
1146     * note: TGSI compiler counts all outputs (including position and pointsize), here
1147     * v->outfile.num_reg only counts varyings, +1 to compensate for the position output
1148     * TODO: might have a problem that we don't count pointsize when it is used
1149     */
1150 
1151    int half_out = v->outfile.num_reg / 2 + 1;
1152    assert(half_out);
1153 
1154    uint32_t b = ((20480 / (info->gpu.vertex_output_buffer_size -
1155                            2 * half_out * info->gpu.vertex_cache_size)) +
1156                  9) /
1157                 10;
1158    uint32_t a = (b + 256 / (info->gpu.shader_core_count * half_out)) / 2;
1159    v->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a, 255)) |
1160                              VIVS_VS_LOAD_BALANCING_B(MIN2(b, 255)) |
1161                              VIVS_VS_LOAD_BALANCING_C(0x3f) |
1162                              VIVS_VS_LOAD_BALANCING_D(0x0f);
1163 }
1164 
1165 bool
etna_compile_shader(struct etna_shader_variant * v)1166 etna_compile_shader(struct etna_shader_variant *v)
1167 {
1168    if (unlikely(!v))
1169       return false;
1170 
1171    struct etna_compile *c = CALLOC_STRUCT(etna_compile);
1172    if (!c)
1173       return false;
1174 
1175    c->variant = v;
1176    c->info = v->shader->info;
1177    c->specs = v->shader->specs;
1178    c->nir = nir_shader_clone(NULL, v->shader->nir);
1179 
1180    nir_shader *s = c->nir;
1181    const struct etna_specs *specs = c->specs;
1182 
1183    v->stage = s->info.stage;
1184    v->uses_discard = s->info.fs.uses_discard;
1185    v->num_loops = 0; /* TODO */
1186    v->vs_id_in_reg = -1;
1187    v->vs_pos_out_reg = -1;
1188    v->vs_pointsize_out_reg = -1;
1189    v->ps_depth_out_reg = -1;
1190 
1191    if (s->info.stage == MESA_SHADER_FRAGMENT)
1192       NIR_PASS_V(s, nir_lower_fragcolor, specs->num_rts);
1193 
1194    /*
1195     * Lower glTexCoord, fixes e.g. neverball point sprite (exit cylinder stars)
1196     * and gl4es pointsprite.trace apitrace
1197     */
1198    if (s->info.stage == MESA_SHADER_FRAGMENT && v->key.sprite_coord_enable) {
1199       NIR_PASS_V(s, nir_lower_texcoord_replace, v->key.sprite_coord_enable,
1200                  false, v->key.sprite_coord_yinvert);
1201    }
1202 
1203    /*
1204     * Remove any dead in variables before we iterate over them
1205     */
1206    NIR_PASS_V(s, nir_remove_dead_variables, nir_var_shader_in, NULL);
1207 
1208    /* setup input linking */
1209    struct etna_shader_io_file *sf = &v->infile;
1210    if (s->info.stage == MESA_SHADER_VERTEX) {
1211       nir_foreach_shader_in_variable(var, s) {
1212          unsigned idx = var->data.driver_location;
1213          sf->reg[idx].reg = idx;
1214          sf->reg[idx].slot = var->data.location;
1215          sf->reg[idx].interpolation = var->data.interpolation;
1216          sf->reg[idx].num_components = glsl_get_components(var->type);
1217          sf->num_reg = MAX2(sf->num_reg, idx+1);
1218       }
1219    } else {
1220       unsigned count = 0;
1221       nir_foreach_shader_in_variable(var, s) {
1222          unsigned idx = var->data.driver_location;
1223          sf->reg[idx].reg = idx + 1;
1224          sf->reg[idx].slot = var->data.location;
1225          if (var->data.interpolation == INTERP_MODE_NONE && v->key.flatshade &&
1226              (var->data.location == VARYING_SLOT_COL0 ||
1227               var->data.location == VARYING_SLOT_COL1)) {
1228             var->data.interpolation = INTERP_MODE_FLAT;
1229          }
1230          sf->reg[idx].interpolation = var->data.interpolation;
1231          sf->reg[idx].num_components = glsl_get_components(var->type);
1232          sf->num_reg = MAX2(sf->num_reg, idx+1);
1233          count++;
1234       }
1235       assert(sf->num_reg == count);
1236    }
1237 
1238    NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_uniform, etna_glsl_type_size,
1239             (nir_lower_io_options)0);
1240 
1241    NIR_PASS_V(s, nir_lower_vars_to_ssa);
1242    NIR_PASS_V(s, nir_lower_indirect_derefs, nir_var_all, UINT32_MAX);
1243    NIR_PASS_V(s, etna_nir_lower_texture, &v->key);
1244 
1245    NIR_PASS_V(s, nir_lower_alu_to_scalar, etna_alu_to_scalar_filter_cb, c->info);
1246    if (c->info->halti >= 2) {
1247       nir_lower_idiv_options idiv_options = {
1248          .allow_fp16 = true,
1249       };
1250       NIR_PASS_V(s, nir_lower_idiv, &idiv_options);
1251    }
1252    NIR_PASS_V(s, nir_lower_alu);
1253 
1254    etna_optimize_loop(s);
1255 
1256    /* TODO: remove this extra run if nir_opt_peephole_select is able to handle ubo's. */
1257    if (OPT(s, etna_nir_lower_ubo_to_uniform))
1258       etna_optimize_loop(s);
1259 
1260    NIR_PASS_V(s, etna_lower_io, v);
1261    NIR_PASS_V(s, nir_lower_pack);
1262    etna_optimize_loop(s);
1263 
1264    if (v->shader->specs->vs_need_z_div)
1265       NIR_PASS_V(s, nir_lower_clip_halfz);
1266 
1267    /* lower pre-halti2 to float (halti0 has integers, but only scalar..) */
1268    if (c->info->halti < 2) {
1269       /* use opt_algebraic between int_to_float and boot_to_float because
1270        * int_to_float emits ftrunc, and ftrunc lowering generates bool ops
1271        */
1272       NIR_PASS_V(s, nir_lower_int_to_float);
1273       NIR_PASS_V(s, nir_opt_algebraic);
1274       NIR_PASS_V(s, nir_lower_bool_to_float, true);
1275    } else {
1276       NIR_PASS_V(s, nir_lower_bool_to_int32);
1277    }
1278 
1279    while( OPT(s, nir_opt_vectorize, NULL, NULL) );
1280    NIR_PASS_V(s, nir_lower_alu_to_scalar, etna_alu_to_scalar_filter_cb, c->info);
1281 
1282    NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
1283    NIR_PASS_V(s, nir_opt_algebraic_late);
1284 
1285    NIR_PASS_V(s, nir_move_vec_src_uses_to_dest, false);
1286    NIR_PASS_V(s, nir_copy_prop);
1287    /* need copy prop after uses_to_dest, and before src mods: see
1288     * dEQP-GLES2.functional.shaders.random.all_features.fragment.95
1289     */
1290 
1291    NIR_PASS_V(s, nir_opt_dce);
1292    NIR_PASS_V(s, nir_opt_cse);
1293 
1294    NIR_PASS_V(s, nir_lower_bool_to_bitsize);
1295    NIR_PASS_V(s, etna_lower_alu, c->specs->has_new_transcendentals);
1296 
1297    /* needs to be the last pass that touches pass_flags! */
1298    NIR_PASS_V(s, etna_nir_lower_to_source_mods);
1299 
1300    if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS))
1301       nir_print_shader(s, stdout);
1302 
1303    unsigned block_ptr[nir_shader_get_entrypoint(s)->num_blocks];
1304    c->block_ptr = block_ptr;
1305 
1306    unsigned num_consts;
1307    ASSERTED bool ok = emit_shader(c, &v->num_temps, &num_consts);
1308    assert(ok);
1309 
1310    /* empty shader, emit NOP */
1311    if (!c->inst_ptr)
1312       emit_inst(c, &(struct etna_inst) { .opcode = ISA_OPC_NOP });
1313 
1314    /* assemble instructions, fixing up labels */
1315    uint32_t *code = MALLOC(c->inst_ptr * 16);
1316    for (unsigned i = 0; i < c->inst_ptr; i++) {
1317       struct etna_inst *inst = &c->code[i];
1318       if (inst->opcode == ISA_OPC_BRANCH || inst->opcode == ISA_OPC_BRANCH_UNARY || inst->opcode == ISA_OPC_BRANCH_BINARY)
1319          inst->imm = block_ptr[inst->imm];
1320 
1321       etna_assemble(&code[i * 4], inst, specs->has_no_oneconst_limit);
1322    }
1323 
1324    v->code_size = c->inst_ptr * 4;
1325    v->code = code;
1326    v->needs_icache = c->inst_ptr > specs->max_instructions;
1327 
1328    copy_uniform_state_to_shader(v, c->consts, num_consts);
1329 
1330    if (s->info.stage == MESA_SHADER_FRAGMENT) {
1331       v->input_count_unk8 = 31; /* XXX what is this */
1332       assert(v->ps_depth_out_reg <= 0);
1333    } else {
1334       fill_vs_mystery(v);
1335    }
1336 
1337    bool result = etna_compile_check_limits(v);
1338    ralloc_free(c->nir);
1339    FREE(c);
1340    return result;
1341 }
1342 
1343 static const struct etna_shader_inout *
etna_shader_vs_lookup(const struct etna_shader_variant * sobj,const struct etna_shader_inout * in)1344 etna_shader_vs_lookup(const struct etna_shader_variant *sobj,
1345                       const struct etna_shader_inout *in)
1346 {
1347    for (int i = 0; i < sobj->outfile.num_reg; i++)
1348       if (sobj->outfile.reg[i].slot == in->slot)
1349          return &sobj->outfile.reg[i];
1350 
1351    /*
1352     * There are valid NIR shaders pairs where the vertex shader has
1353     * a VARYING_SLOT_BFC0 shader_out and the corresponding framgent
1354     * shader has a VARYING_SLOT_COL0 shader_in.
1355     * So at link time if there is no matching VARYING_SLOT_BFC[n],
1356     * we must map VARYING_SLOT_BFC0[n] to VARYING_SLOT_COL[n].
1357     */
1358    gl_varying_slot slot;
1359 
1360    if (in->slot == VARYING_SLOT_COL0)
1361       slot = VARYING_SLOT_BFC0;
1362    else if (in->slot == VARYING_SLOT_COL1)
1363       slot = VARYING_SLOT_BFC1;
1364    else
1365       return NULL;
1366 
1367    for (int i = 0; i < sobj->outfile.num_reg; i++)
1368       if (sobj->outfile.reg[i].slot == slot)
1369          return &sobj->outfile.reg[i];
1370 
1371    return NULL;
1372 }
1373 
1374 void
etna_link_shader(struct etna_shader_link_info * info,const struct etna_shader_variant * vs,const struct etna_shader_variant * fs)1375 etna_link_shader(struct etna_shader_link_info *info,
1376                  const struct etna_shader_variant *vs,
1377                  const struct etna_shader_variant *fs)
1378 {
1379    int comp_ofs = 0;
1380    /* For each fragment input we need to find the associated vertex shader
1381     * output, which can be found by matching on semantic name and index. A
1382     * binary search could be used because the vs outputs are sorted by their
1383     * semantic index and grouped by semantic type by fill_in_vs_outputs.
1384     */
1385    assert(fs->infile.num_reg <= ETNA_NUM_INPUTS);
1386    info->pcoord_varying_comp_ofs = -1;
1387 
1388    for (int idx = 0; idx < fs->infile.num_reg; ++idx) {
1389       const struct etna_shader_inout *fsio = &fs->infile.reg[idx];
1390       const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio);
1391       struct etna_varying *varying;
1392       bool varying_is_color = fsio->slot == VARYING_SLOT_COL0 ||
1393                               fsio->slot == VARYING_SLOT_COL1;
1394 
1395       assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings));
1396 
1397       if (fsio->reg > info->num_varyings)
1398          info->num_varyings = fsio->reg;
1399 
1400       varying = &info->varyings[fsio->reg - 1];
1401       varying->num_components = fsio->num_components;
1402 
1403       if (varying_is_color) /* colors affected by flat shading */
1404          varying->pa_attributes = 0x200;
1405       else /* texture coord or other bypasses flat shading */
1406          varying->pa_attributes = 0x2f1;
1407 
1408       for (int i = 0; i < 4; i++) {
1409          if (varying_is_color)
1410             varying->use[i] = VARYING_COMPONENT_USE_COLOR;
1411          else
1412             varying->use[i] = VARYING_COMPONENT_USE_GENERIC;
1413       }
1414 
1415       switch (fsio->interpolation) {
1416       case INTERP_MODE_NONE:
1417       case INTERP_MODE_SMOOTH:
1418          varying->semantic = VARYING_INTERPOLATION_MODE_SMOOTH;
1419          break;
1420       case INTERP_MODE_NOPERSPECTIVE:
1421          varying->semantic = VARYING_INTERPOLATION_MODE_NONPERSPECTIVE;
1422          break;
1423       case INTERP_MODE_FLAT:
1424          varying->semantic = VARYING_INTERPOLATION_MODE_FLAT;
1425          break;
1426       default:
1427          unreachable("unsupported varying interpolation mode");
1428       }
1429 
1430       /* point/tex coord is an input to the PS without matching VS output,
1431        * so it gets a varying slot without being assigned a VS register.
1432        */
1433       if (fsio->slot == VARYING_SLOT_PNTC) {
1434          varying->use[0] = VARYING_COMPONENT_USE_POINTCOORD_X;
1435          varying->use[1] = VARYING_COMPONENT_USE_POINTCOORD_Y;
1436 
1437          info->pcoord_varying_comp_ofs = comp_ofs;
1438       } else if (util_varying_is_point_coord(fsio->slot, fs->key.sprite_coord_enable)) {
1439          /*
1440 	  * Do nothing, TexCoord is lowered to PointCoord above
1441 	  * and the TexCoord here is just a remnant. This needs
1442 	  * to be removed with some nir_remove_dead_variables(),
1443 	  * but that one removes all FS inputs ... why?
1444 	  */
1445       } else {
1446          /* pick a random register to use if there is no VS output */
1447          if (vsio == NULL)
1448             varying->reg = 0;
1449          else
1450             varying->reg = vsio->reg;
1451       }
1452 
1453       comp_ofs += varying->num_components;
1454    }
1455 
1456    assert(info->num_varyings == fs->infile.num_reg);
1457 }
1458