• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "nvk_cmd_buffer.h"
6 #include "nvk_physical_device.h"
7 #include "nvk_shader.h"
8 
9 #include "nir.h"
10 #include "nir_builder.h"
11 #include "nir_xfb_info.h"
12 
13 #include "nv50_ir_driver.h"
14 #include "pipe/p_defines.h"
15 #include "pipe/p_shader_tokens.h"
16 
17 #include "nvk_cl9097.h"
18 
19 uint64_t
nvk_cg_get_prog_debug(void)20 nvk_cg_get_prog_debug(void)
21 {
22    return debug_get_num_option("NV50_PROG_DEBUG", 0);
23 }
24 
25 uint64_t
nvk_cg_get_prog_optimize(void)26 nvk_cg_get_prog_optimize(void)
27 {
28    return debug_get_num_option("NV50_PROG_OPTIMIZE", 3);
29 }
30 
31 const nir_shader_compiler_options *
nvk_cg_nir_options(const struct nvk_physical_device * pdev,gl_shader_stage stage)32 nvk_cg_nir_options(const struct nvk_physical_device *pdev,
33                    gl_shader_stage stage)
34 {
35    return nv50_ir_nir_shader_compiler_options(pdev->info.chipset, stage);
36 }
37 
38 static nir_variable *
find_or_create_input(nir_builder * b,const struct glsl_type * type,const char * name,unsigned location)39 find_or_create_input(nir_builder *b, const struct glsl_type *type,
40                      const char *name, unsigned location)
41 {
42    nir_foreach_shader_in_variable(in, b->shader) {
43       if (in->data.location == location)
44          return in;
45    }
46    nir_variable *in = nir_variable_create(b->shader, nir_var_shader_in,
47                                           type, name);
48    in->data.location = location;
49    if (glsl_type_is_integer(type))
50       in->data.interpolation = INTERP_MODE_FLAT;
51    else
52       in->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
53 
54    return in;
55 }
56 
57 static bool
lower_fragcoord_instr(nir_builder * b,nir_instr * instr,UNUSED void * _data)58 lower_fragcoord_instr(nir_builder *b, nir_instr *instr, UNUSED void *_data)
59 {
60    assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
61    nir_variable *var;
62 
63    if (instr->type != nir_instr_type_intrinsic)
64       return false;
65 
66    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
67    b->cursor = nir_before_instr(&intrin->instr);
68 
69    nir_def *val;
70    switch (intrin->intrinsic) {
71    case nir_intrinsic_load_frag_coord:
72       var = find_or_create_input(b, glsl_vec4_type(),
73                                  "gl_FragCoord",
74                                  VARYING_SLOT_POS);
75       val = nir_load_var(b, var);
76       break;
77    case nir_intrinsic_load_point_coord:
78       var = find_or_create_input(b, glsl_vector_type(GLSL_TYPE_FLOAT, 2),
79                                  "gl_PointCoord",
80                                  VARYING_SLOT_PNTC);
81       val = nir_load_var(b, var);
82       break;
83    case nir_intrinsic_load_sample_pos:
84       var = find_or_create_input(b, glsl_vec4_type(),
85                                  "gl_FragCoord",
86                                  VARYING_SLOT_POS);
87       val = nir_ffract(b, nir_trim_vector(b, nir_load_var(b, var), 2));
88       break;
89    case nir_intrinsic_load_layer_id:
90       var = find_or_create_input(b, glsl_int_type(),
91                                  "gl_Layer", VARYING_SLOT_LAYER);
92       val = nir_load_var(b, var);
93       break;
94 
95    default:
96       return false;
97    }
98 
99    nir_def_rewrite_uses(&intrin->def, val);
100 
101    return true;
102 }
103 
104 void
nvk_cg_preprocess_nir(nir_shader * nir)105 nvk_cg_preprocess_nir(nir_shader *nir)
106 {
107    NIR_PASS(_, nir, nir_split_struct_vars, nir_var_function_temp);
108    NIR_PASS(_, nir, nir_lower_vars_to_ssa);
109 
110    NIR_PASS(_, nir, nir_split_var_copies);
111    NIR_PASS(_, nir, nir_lower_vars_to_ssa);
112 
113    NIR_PASS(_, nir, nir_lower_global_vars_to_local);
114    NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
115 
116    NIR_PASS(_, nir, nir_lower_system_values);
117 
118    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
119       NIR_PASS(_, nir, nir_shader_instructions_pass, lower_fragcoord_instr,
120                nir_metadata_block_index | nir_metadata_dominance, NULL);
121    }
122 
123    nvk_cg_optimize_nir(nir);
124 
125    NIR_PASS(_, nir, nir_lower_var_copies);
126 }
127 
128 void
nvk_cg_optimize_nir(nir_shader * nir)129 nvk_cg_optimize_nir(nir_shader *nir)
130 {
131    bool progress;
132 
133    do {
134       progress = false;
135 
136       NIR_PASS(progress, nir, nir_split_array_vars, nir_var_function_temp);
137       NIR_PASS(progress, nir, nir_shrink_vec_array_vars, nir_var_function_temp);
138 
139       if (!nir->info.var_copies_lowered) {
140          /* Only run this pass if nir_lower_var_copies was not called
141           * yet. That would lower away any copy_deref instructions and we
142           * don't want to introduce any more.
143           */
144          NIR_PASS(progress, nir, nir_opt_find_array_copies);
145       }
146       NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
147       NIR_PASS(progress, nir, nir_opt_dead_write_vars);
148       NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
149       NIR_PASS(progress, nir, nir_copy_prop);
150       NIR_PASS(progress, nir, nir_opt_remove_phis);
151       NIR_PASS(progress, nir, nir_opt_dce);
152       if (nir_opt_loop(nir)) {
153          progress = true;
154          NIR_PASS(progress, nir, nir_copy_prop);
155          NIR_PASS(progress, nir, nir_opt_remove_phis);
156          NIR_PASS(progress, nir, nir_opt_dce);
157       }
158       NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_optimize_phi_true_false);
159       NIR_PASS(progress, nir, nir_opt_dead_cf);
160       NIR_PASS(progress, nir, nir_opt_cse);
161       /*
162        * this should be fine, likely a backend problem,
163        * but a bunch of tessellation shaders blow up.
164        * we should revisit this when NAK is merged.
165        */
166       NIR_PASS(progress, nir, nir_opt_peephole_select, 2, true, true);
167       NIR_PASS(progress, nir, nir_opt_constant_folding);
168       NIR_PASS(progress, nir, nir_opt_algebraic);
169 
170       NIR_PASS(progress, nir, nir_opt_undef);
171 
172       if (nir->options->max_unroll_iterations) {
173          NIR_PASS(progress, nir, nir_opt_loop_unroll);
174       }
175    } while (progress);
176 
177    NIR_PASS(progress, nir, nir_opt_shrink_vectors);
178    NIR_PASS(progress, nir, nir_remove_dead_variables,
179             nir_var_function_temp | nir_var_shader_in | nir_var_shader_out, NULL);
180 }
181 
182 static bool
lower_image_size_to_txs(nir_builder * b,nir_intrinsic_instr * intrin,UNUSED void * _data)183 lower_image_size_to_txs(nir_builder *b, nir_intrinsic_instr *intrin,
184                         UNUSED void *_data)
185 {
186    if (intrin->intrinsic != nir_intrinsic_image_deref_size)
187       return false;
188 
189    b->cursor = nir_instr_remove(&intrin->instr);
190 
191    nir_deref_instr *img = nir_src_as_deref(intrin->src[0]);
192    nir_def *lod = nir_tex_type_has_lod(img->type) ?
193                       intrin->src[1].ssa : NULL;
194    nir_def *size = nir_txs_deref(b, img, lod);
195 
196    if (glsl_get_sampler_dim(img->type) == GLSL_SAMPLER_DIM_CUBE) {
197       /* Cube image descriptors are set up as simple arrays but SPIR-V wants
198        * the number of cubes.
199        */
200       if (glsl_sampler_type_is_array(img->type)) {
201          size = nir_vec3(b, nir_channel(b, size, 0),
202                             nir_channel(b, size, 1),
203                             nir_udiv_imm(b, nir_channel(b, size, 2), 6));
204       } else {
205          size = nir_vec3(b, nir_channel(b, size, 0),
206                             nir_channel(b, size, 1),
207                             nir_imm_int(b, 1));
208       }
209    }
210 
211    nir_def_rewrite_uses(&intrin->def, size);
212 
213    return true;
214 }
215 
216 static int
count_location_slots(const struct glsl_type * type,bool bindless)217 count_location_slots(const struct glsl_type *type, bool bindless)
218 {
219    return glsl_count_attribute_slots(type, false);
220 }
221 
222 static void
assign_io_locations(nir_shader * nir)223 assign_io_locations(nir_shader *nir)
224 {
225    if (nir->info.stage != MESA_SHADER_VERTEX) {
226       unsigned location = 0;
227       nir_foreach_variable_with_modes(var, nir, nir_var_shader_in) {
228          var->data.driver_location = location;
229          if (nir_is_arrayed_io(var, nir->info.stage)) {
230             location += glsl_count_attribute_slots(glsl_get_array_element(var->type), false);
231          } else {
232             location += glsl_count_attribute_slots(var->type, false);
233          }
234       }
235       nir->num_inputs = location;
236    } else {
237       nir_foreach_shader_in_variable(var, nir) {
238          assert(var->data.location >= VERT_ATTRIB_GENERIC0);
239          var->data.driver_location = var->data.location - VERT_ATTRIB_GENERIC0;
240       }
241    }
242 
243    {
244       unsigned location = 0;
245       nir_foreach_variable_with_modes(var, nir, nir_var_shader_out) {
246          var->data.driver_location = location;
247          if (nir_is_arrayed_io(var, nir->info.stage)) {
248             location += glsl_count_attribute_slots(glsl_get_array_element(var->type), false);
249          } else {
250             location += glsl_count_attribute_slots(var->type, false);
251          }
252       }
253       nir->num_outputs = location;
254    }
255 }
256 
257 static void
nak_cg_postprocess_nir(nir_shader * nir)258 nak_cg_postprocess_nir(nir_shader *nir)
259 {
260    NIR_PASS(_, nir, nir_shader_intrinsics_pass, lower_image_size_to_txs,
261             nir_metadata_block_index | nir_metadata_dominance, NULL);
262 
263    uint32_t indirect_mask = nir_var_function_temp;
264 
265    NIR_PASS(_, nir, nir_lower_indirect_derefs, indirect_mask, 16);
266 
267    nvk_cg_optimize_nir(nir);
268    if (nir->info.stage != MESA_SHADER_COMPUTE)
269       assign_io_locations(nir);
270 
271    NIR_PASS(_, nir, nir_lower_int64);
272 
273    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
274 }
275 
276 /* NOTE: Using a[0x270] in FP may cause an error even if we're using less than
277  * 124 scalar varying values.
278  */
279 static uint32_t
nvc0_shader_input_address(unsigned sn,unsigned si)280 nvc0_shader_input_address(unsigned sn, unsigned si)
281 {
282    switch (sn) {
283    case TGSI_SEMANTIC_TESSOUTER:    return 0x000 + si * 0x4;
284    case TGSI_SEMANTIC_TESSINNER:    return 0x010 + si * 0x4;
285    case TGSI_SEMANTIC_PATCH:        return 0x020 + si * 0x10;
286    case TGSI_SEMANTIC_PRIMID:       return 0x060;
287    case TGSI_SEMANTIC_LAYER:        return 0x064;
288    case TGSI_SEMANTIC_VIEWPORT_INDEX:return 0x068;
289    case TGSI_SEMANTIC_PSIZE:        return 0x06c;
290    case TGSI_SEMANTIC_POSITION:     return 0x070;
291    case TGSI_SEMANTIC_GENERIC:      return 0x080 + si * 0x10;
292    case TGSI_SEMANTIC_FOG:          return 0x2e8;
293    case TGSI_SEMANTIC_COLOR:        return 0x280 + si * 0x10;
294    case TGSI_SEMANTIC_BCOLOR:       return 0x2a0 + si * 0x10;
295    case TGSI_SEMANTIC_CLIPDIST:     return 0x2c0 + si * 0x10;
296    case TGSI_SEMANTIC_CLIPVERTEX:   return 0x270;
297    case TGSI_SEMANTIC_PCOORD:       return 0x2e0;
298    case TGSI_SEMANTIC_TESSCOORD:    return 0x2f0;
299    case TGSI_SEMANTIC_INSTANCEID:   return 0x2f8;
300    case TGSI_SEMANTIC_VERTEXID:     return 0x2fc;
301    case TGSI_SEMANTIC_TEXCOORD:     return 0x300 + si * 0x10;
302    default:
303       assert(!"invalid TGSI input semantic");
304       return ~0;
305    }
306 }
307 
308 static uint32_t
nvc0_shader_output_address(unsigned sn,unsigned si)309 nvc0_shader_output_address(unsigned sn, unsigned si)
310 {
311    switch (sn) {
312    case TGSI_SEMANTIC_TESSOUTER:     return 0x000 + si * 0x4;
313    case TGSI_SEMANTIC_TESSINNER:     return 0x010 + si * 0x4;
314    case TGSI_SEMANTIC_PATCH:         return 0x020 + si * 0x10;
315    case TGSI_SEMANTIC_PRIMID:        return 0x060;
316    case TGSI_SEMANTIC_LAYER:         return 0x064;
317    case TGSI_SEMANTIC_VIEWPORT_INDEX:return 0x068;
318    case TGSI_SEMANTIC_PSIZE:         return 0x06c;
319    case TGSI_SEMANTIC_POSITION:      return 0x070;
320    case TGSI_SEMANTIC_GENERIC:       return 0x080 + si * 0x10;
321    case TGSI_SEMANTIC_FOG:           return 0x2e8;
322    case TGSI_SEMANTIC_COLOR:         return 0x280 + si * 0x10;
323    case TGSI_SEMANTIC_BCOLOR:        return 0x2a0 + si * 0x10;
324    case TGSI_SEMANTIC_CLIPDIST:      return 0x2c0 + si * 0x10;
325    case TGSI_SEMANTIC_CLIPVERTEX:    return 0x270;
326    case TGSI_SEMANTIC_TEXCOORD:      return 0x300 + si * 0x10;
327    case TGSI_SEMANTIC_VIEWPORT_MASK: return 0x3a0;
328    case TGSI_SEMANTIC_EDGEFLAG:      return ~0;
329    default:
330       assert(!"invalid TGSI output semantic");
331       return ~0;
332    }
333 }
334 
335 static int
nvc0_vp_assign_input_slots(struct nv50_ir_prog_info_out * info)336 nvc0_vp_assign_input_slots(struct nv50_ir_prog_info_out *info)
337 {
338    unsigned i, c, n;
339 
340    for (n = 0, i = 0; i < info->numInputs; ++i) {
341       switch (info->in[i].sn) {
342       case TGSI_SEMANTIC_INSTANCEID: /* for SM4 only, in TGSI they're SVs */
343       case TGSI_SEMANTIC_VERTEXID:
344          info->in[i].mask = 0x1;
345          info->in[i].slot[0] =
346             nvc0_shader_input_address(info->in[i].sn, 0) / 4;
347          continue;
348       default:
349          break;
350       }
351       for (c = 0; c < 4; ++c)
352          info->in[i].slot[c] = (0x80 + n * 0x10 + c * 0x4) / 4;
353       ++n;
354    }
355 
356    return 0;
357 }
358 
359 static int
nvc0_sp_assign_input_slots(struct nv50_ir_prog_info_out * info)360 nvc0_sp_assign_input_slots(struct nv50_ir_prog_info_out *info)
361 {
362    unsigned offset;
363    unsigned i, c;
364 
365    for (i = 0; i < info->numInputs; ++i) {
366       offset = nvc0_shader_input_address(info->in[i].sn, info->in[i].si);
367 
368       for (c = 0; c < 4; ++c)
369          info->in[i].slot[c] = (offset + c * 0x4) / 4;
370    }
371 
372    return 0;
373 }
374 
375 static int
nvc0_fp_assign_output_slots(struct nv50_ir_prog_info_out * info)376 nvc0_fp_assign_output_slots(struct nv50_ir_prog_info_out *info)
377 {
378    unsigned count = info->prop.fp.numColourResults * 4;
379    unsigned i, c;
380 
381    /* Compute the relative position of each color output, since skipped MRT
382     * positions will not have registers allocated to them.
383     */
384    unsigned colors[8] = {0};
385    for (i = 0; i < info->numOutputs; ++i)
386       if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
387          colors[info->out[i].si] = 1;
388    for (i = 0, c = 0; i < 8; i++)
389       if (colors[i])
390          colors[i] = c++;
391    for (i = 0; i < info->numOutputs; ++i)
392       if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
393          for (c = 0; c < 4; ++c)
394             info->out[i].slot[c] = colors[info->out[i].si] * 4 + c;
395 
396    if (info->io.sampleMask < NV50_CODEGEN_MAX_VARYINGS)
397       info->out[info->io.sampleMask].slot[0] = count++;
398    else
399    if (info->target >= 0xe0)
400       count++; /* on Kepler, depth is always last colour reg + 2 */
401 
402    if (info->io.fragDepth < NV50_CODEGEN_MAX_VARYINGS)
403       info->out[info->io.fragDepth].slot[2] = count;
404 
405    return 0;
406 }
407 
408 static int
nvc0_sp_assign_output_slots(struct nv50_ir_prog_info_out * info)409 nvc0_sp_assign_output_slots(struct nv50_ir_prog_info_out *info)
410 {
411    unsigned offset;
412    unsigned i, c;
413 
414    for (i = 0; i < info->numOutputs; ++i) {
415       offset = nvc0_shader_output_address(info->out[i].sn, info->out[i].si);
416 
417       for (c = 0; c < 4; ++c)
418          info->out[i].slot[c] = (offset + c * 0x4) / 4;
419    }
420 
421    return 0;
422 }
423 
424 static int
nvc0_program_assign_varying_slots(struct nv50_ir_prog_info_out * info)425 nvc0_program_assign_varying_slots(struct nv50_ir_prog_info_out *info)
426 {
427    int ret;
428 
429    if (info->type == PIPE_SHADER_VERTEX)
430       ret = nvc0_vp_assign_input_slots(info);
431    else
432       ret = nvc0_sp_assign_input_slots(info);
433    if (ret)
434       return ret;
435 
436    if (info->type == PIPE_SHADER_FRAGMENT)
437       ret = nvc0_fp_assign_output_slots(info);
438    else
439       ret = nvc0_sp_assign_output_slots(info);
440    return ret;
441 }
442 
443 static inline void
nvk_vtgs_hdr_update_oread(struct nvk_shader * vs,uint8_t slot)444 nvk_vtgs_hdr_update_oread(struct nvk_shader *vs, uint8_t slot)
445 {
446    uint8_t min = (vs->info.hdr[4] >> 12) & 0xff;
447    uint8_t max = (vs->info.hdr[4] >> 24);
448 
449    min = MIN2(min, slot);
450    max = MAX2(max, slot);
451 
452    vs->info.hdr[4] = (max << 24) | (min << 12);
453 }
454 
455 static int
nvk_vtgp_gen_header(struct nvk_shader * vs,struct nv50_ir_prog_info_out * info)456 nvk_vtgp_gen_header(struct nvk_shader *vs, struct nv50_ir_prog_info_out *info)
457 {
458    unsigned i, c, a;
459 
460    for (i = 0; i < info->numInputs; ++i) {
461       if (info->in[i].patch)
462          continue;
463       for (c = 0; c < 4; ++c) {
464          a = info->in[i].slot[c];
465          if (info->in[i].mask & (1 << c))
466             vs->info.hdr[5 + a / 32] |= 1 << (a % 32);
467       }
468    }
469 
470    for (i = 0; i < info->numOutputs; ++i) {
471       if (info->out[i].patch)
472          continue;
473       for (c = 0; c < 4; ++c) {
474          if (!(info->out[i].mask & (1 << c)))
475             continue;
476          assert(info->out[i].slot[c] >= 0x40 / 4);
477          a = info->out[i].slot[c] - 0x40 / 4;
478          vs->info.hdr[13 + a / 32] |= 1 << (a % 32);
479          if (info->out[i].oread)
480             nvk_vtgs_hdr_update_oread(vs, info->out[i].slot[c]);
481       }
482    }
483 
484    for (i = 0; i < info->numSysVals; ++i) {
485       switch (info->sv[i].sn) {
486       case SYSTEM_VALUE_PRIMITIVE_ID:
487          vs->info.hdr[5] |= 1 << 24;
488          break;
489       case SYSTEM_VALUE_INSTANCE_ID:
490          vs->info.hdr[10] |= 1 << 30;
491          break;
492       case SYSTEM_VALUE_VERTEX_ID:
493          vs->info.hdr[10] |= 1 << 31;
494          break;
495       case SYSTEM_VALUE_TESS_COORD:
496          /* We don't have the mask, nor the slots populated. While this could
497           * be achieved, the vast majority of the time if either of the coords
498           * are read, then both will be read.
499           */
500          nvk_vtgs_hdr_update_oread(vs, 0x2f0 / 4);
501          nvk_vtgs_hdr_update_oread(vs, 0x2f4 / 4);
502          break;
503       default:
504          break;
505       }
506    }
507 
508    vs->info.vtg.writes_layer = (vs->info.hdr[13] & (1 << 9)) != 0;
509    vs->info.vtg.clip_enable = (1 << info->io.clipDistances) - 1;
510    vs->info.vtg.cull_enable =
511       ((1 << info->io.cullDistances) - 1) << info->io.clipDistances;
512 
513    return 0;
514 }
515 
516 static int
nvk_vs_gen_header(struct nvk_shader * vs,struct nv50_ir_prog_info_out * info)517 nvk_vs_gen_header(struct nvk_shader *vs, struct nv50_ir_prog_info_out *info)
518 {
519    vs->info.hdr[0] = 0x20061 | (1 << 10);
520    vs->info.hdr[4] = 0xff000;
521 
522    return nvk_vtgp_gen_header(vs, info);
523 }
524 
525 static int
nvk_gs_gen_header(struct nvk_shader * gs,const struct nir_shader * nir,struct nv50_ir_prog_info_out * info)526 nvk_gs_gen_header(struct nvk_shader *gs,
527                   const struct nir_shader *nir,
528                   struct nv50_ir_prog_info_out *info)
529 {
530    gs->info.hdr[0] = 0x20061 | (4 << 10);
531 
532    gs->info.hdr[2] = MIN2(info->prop.gp.instanceCount, 32) << 24;
533 
534    switch (info->prop.gp.outputPrim) {
535    case MESA_PRIM_POINTS:
536       gs->info.hdr[3] = 0x01000000;
537       break;
538    case MESA_PRIM_LINE_STRIP:
539       gs->info.hdr[3] = 0x06000000;
540       break;
541    case MESA_PRIM_TRIANGLE_STRIP:
542       gs->info.hdr[3] = 0x07000000;
543       break;
544    default:
545       assert(0);
546       break;
547    }
548 
549    gs->info.hdr[4] = CLAMP(info->prop.gp.maxVertices, 1, 1024);
550 
551    gs->info.hdr[0] |= nir->info.gs.active_stream_mask << 28;
552 
553    return nvk_vtgp_gen_header(gs, info);
554 }
555 
556 static void
nvk_generate_tessellation_parameters(const struct nv50_ir_prog_info_out * info,struct nvk_shader * shader)557 nvk_generate_tessellation_parameters(const struct nv50_ir_prog_info_out *info,
558                                      struct nvk_shader *shader)
559 {
560    // TODO: this is a little confusing because nouveau codegen uses
561    // MESA_PRIM_POINTS for unspecified domain and
562    // MESA_PRIM_POINTS = 0, the same as NV9097 ISOLINE enum
563    switch (info->prop.tp.domain) {
564    case MESA_PRIM_LINES:
565       shader->info.ts.domain = NAK_TS_DOMAIN_ISOLINE;
566       break;
567    case MESA_PRIM_TRIANGLES:
568       shader->info.ts.domain = NAK_TS_DOMAIN_TRIANGLE;
569       break;
570    case MESA_PRIM_QUADS:
571       shader->info.ts.domain = NAK_TS_DOMAIN_QUAD;
572       break;
573    default:
574       return;
575    }
576 
577    switch (info->prop.tp.partitioning) {
578    case PIPE_TESS_SPACING_EQUAL:
579       shader->info.ts.spacing = NAK_TS_SPACING_INTEGER;
580       break;
581    case PIPE_TESS_SPACING_FRACTIONAL_ODD:
582       shader->info.ts.spacing = NAK_TS_SPACING_FRACT_ODD;
583       break;
584    case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
585       shader->info.ts.spacing = NAK_TS_SPACING_FRACT_EVEN;
586       break;
587    default:
588       assert(!"invalid tessellator partitioning");
589       break;
590    }
591 
592    if (info->prop.tp.outputPrim == MESA_PRIM_POINTS) { // point_mode
593       shader->info.ts.prims = NAK_TS_PRIMS_POINTS;
594    } else if (info->prop.tp.domain == MESA_PRIM_LINES) { // isoline domain
595       shader->info.ts.prims = NAK_TS_PRIMS_LINES;
596    } else {  // triangle/quad domain
597       if (info->prop.tp.winding > 0) {
598          shader->info.ts.prims = NAK_TS_PRIMS_TRIANGLES_CW;
599       } else {
600          shader->info.ts.prims = NAK_TS_PRIMS_TRIANGLES_CCW;
601       }
602    }
603 }
604 
605 static int
nvk_tcs_gen_header(struct nvk_shader * tcs,struct nv50_ir_prog_info_out * info)606 nvk_tcs_gen_header(struct nvk_shader *tcs, struct nv50_ir_prog_info_out *info)
607 {
608    unsigned opcs = 6; /* output patch constants (at least the TessFactors) */
609 
610    if (info->numPatchConstants)
611       opcs = 8 + info->numPatchConstants * 4;
612 
613    tcs->info.hdr[0] = 0x20061 | (2 << 10);
614 
615    tcs->info.hdr[1] = opcs << 24;
616    tcs->info.hdr[2] = info->prop.tp.outputPatchSize << 24;
617 
618    tcs->info.hdr[4] = 0xff000; /* initial min/max parallel output read address */
619 
620    nvk_vtgp_gen_header(tcs, info);
621 
622    if (info->target >= NVISA_GM107_CHIPSET) {
623       /* On GM107+, the number of output patch components has moved in the TCP
624        * header, but it seems like blob still also uses the old position.
625        * Also, the high 8-bits are located in between the min/max parallel
626        * field and has to be set after updating the outputs. */
627       tcs->info.hdr[3] = (opcs & 0x0f) << 28;
628       tcs->info.hdr[4] |= (opcs & 0xf0) << 16;
629    }
630 
631    nvk_generate_tessellation_parameters(info, tcs);
632 
633    return 0;
634 }
635 
636 static int
nvk_tes_gen_header(struct nvk_shader * tes,struct nv50_ir_prog_info_out * info)637 nvk_tes_gen_header(struct nvk_shader *tes, struct nv50_ir_prog_info_out *info)
638 {
639    tes->info.hdr[0] = 0x20061 | (3 << 10);
640    tes->info.hdr[4] = 0xff000;
641 
642    nvk_vtgp_gen_header(tes, info);
643 
644    nvk_generate_tessellation_parameters(info, tes);
645 
646    tes->info.hdr[18] |= 0x3 << 12; /* ? */
647 
648    return 0;
649 }
650 
651 #define NVC0_INTERP_FLAT          (1 << 0)
652 #define NVC0_INTERP_PERSPECTIVE   (2 << 0)
653 #define NVC0_INTERP_LINEAR        (3 << 0)
654 #define NVC0_INTERP_CENTROID      (1 << 2)
655 
656 static uint8_t
nvk_hdr_interp_mode(const struct nv50_ir_varying * var)657 nvk_hdr_interp_mode(const struct nv50_ir_varying *var)
658 {
659    if (var->linear)
660       return NVC0_INTERP_LINEAR;
661    if (var->flat)
662       return NVC0_INTERP_FLAT;
663    return NVC0_INTERP_PERSPECTIVE;
664 }
665 
666 
667 static int
nvk_fs_gen_header(struct nvk_shader * fs,const struct nak_fs_key * key,struct nv50_ir_prog_info_out * info)668 nvk_fs_gen_header(struct nvk_shader *fs, const struct nak_fs_key *key,
669                   struct nv50_ir_prog_info_out *info)
670 {
671    unsigned i, c, a, m;
672 
673    /* just 00062 on Kepler */
674    fs->info.hdr[0] = 0x20062 | (5 << 10);
675    fs->info.hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */
676 
677    if (info->prop.fp.usesDiscard || key->zs_self_dep)
678       fs->info.hdr[0] |= 0x8000;
679    if (!info->prop.fp.separateFragData)
680       fs->info.hdr[0] |= 0x4000;
681    if (info->io.sampleMask < 80 /* PIPE_MAX_SHADER_OUTPUTS */)
682       fs->info.hdr[19] |= 0x1;
683    if (info->prop.fp.writesDepth) {
684       fs->info.hdr[19] |= 0x2;
685       fs->info.fs.writes_depth = true;
686    }
687 
688    for (i = 0; i < info->numInputs; ++i) {
689       m = nvk_hdr_interp_mode(&info->in[i]);
690       for (c = 0; c < 4; ++c) {
691          if (!(info->in[i].mask & (1 << c)))
692             continue;
693          a = info->in[i].slot[c];
694          if (info->in[i].slot[0] >= (0x060 / 4) &&
695              info->in[i].slot[0] <= (0x07c / 4)) {
696             fs->info.hdr[5] |= 1 << (24 + (a - 0x060 / 4));
697          } else
698          if (info->in[i].slot[0] >= (0x2c0 / 4) &&
699              info->in[i].slot[0] <= (0x2fc / 4)) {
700             fs->info.hdr[14] |= (1 << (a - 0x280 / 4)) & 0x07ff0000;
701          } else {
702             if (info->in[i].slot[c] < (0x040 / 4) ||
703                 info->in[i].slot[c] > (0x380 / 4))
704                continue;
705             a *= 2;
706             if (info->in[i].slot[0] >= (0x300 / 4))
707                a -= 32;
708             fs->info.hdr[4 + a / 32] |= m << (a % 32);
709          }
710       }
711    }
712    /* GM20x+ needs TGSI_SEMANTIC_POSITION to access sample locations */
713    if (info->prop.fp.readsSampleLocations && info->target >= NVISA_GM200_CHIPSET)
714       fs->info.hdr[5] |= 0x30000000;
715 
716    for (i = 0; i < info->numOutputs; ++i) {
717       if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
718          fs->info.hdr[18] |= 0xf << (4 * info->out[i].si);
719    }
720 
721    /* There are no "regular" attachments, but the shader still needs to be
722     * executed. It seems like it wants to think that it has some color
723     * outputs in order to actually run.
724     */
725    if (info->prop.fp.numColourResults == 0 &&
726        !info->prop.fp.writesDepth &&
727        info->io.sampleMask >= 80 /* PIPE_MAX_SHADER_OUTPUTS */)
728       fs->info.hdr[18] |= 0xf;
729 
730    fs->info.fs.early_fragment_tests = info->prop.fp.earlyFragTests;
731    fs->info.fs.reads_sample_mask = info->prop.fp.usesSampleMaskIn;
732    fs->info.fs.post_depth_coverage = info->prop.fp.postDepthCoverage;
733 
734    return 0;
735 }
736 
find_register_index_for_xfb_output(const struct nir_shader * nir,nir_xfb_output_info output)737 static uint8_t find_register_index_for_xfb_output(const struct nir_shader *nir,
738                                                   nir_xfb_output_info output)
739 {
740    nir_foreach_shader_out_variable(var, nir) {
741       uint32_t slots = glsl_count_vec4_slots(var->type, false, false);
742       for (uint32_t i = 0; i < slots; ++i) {
743          if (output.location == (var->data.location+i)) {
744             return var->data.driver_location+i;
745          }
746       }
747    }
748    // should not be reached
749    return 0;
750 }
751 
752 static void
nvk_fill_transform_feedback_state(struct nak_xfb_info * xfb,struct nir_shader * nir,const struct nv50_ir_prog_info_out * info)753 nvk_fill_transform_feedback_state(struct nak_xfb_info *xfb,
754                                   struct nir_shader *nir,
755                                   const struct nv50_ir_prog_info_out *info)
756 {
757    const uint8_t max_buffers = 4;
758    const uint8_t dw_bytes = 4;
759    const struct nir_xfb_info *nx = nir->xfb_info;
760    //nir_print_xfb_info(nx, stdout);
761 
762    memset(xfb, 0, sizeof(*xfb));
763 
764    for (uint8_t b = 0; b < max_buffers; ++b) {
765       xfb->stride[b] = b < nx->buffers_written ? nx->buffers[b].stride : 0;
766       xfb->attr_count[b] = 0;
767       xfb->stream[b] = nx->buffer_to_stream[b];
768    }
769    memset(xfb->attr_index, 0xff, sizeof(xfb->attr_index)); /* = skip */
770 
771    if (info->numOutputs == 0)
772       return;
773 
774    for (uint32_t i = 0; i < nx->output_count; ++i) {
775       const nir_xfb_output_info output = nx->outputs[i];
776       const uint8_t b = output.buffer;
777       const uint8_t r = find_register_index_for_xfb_output(nir, output);
778       uint32_t p = output.offset / dw_bytes;
779 
780       assert(r < info->numOutputs && p < ARRAY_SIZE(xfb->attr_index[b]));
781 
782       u_foreach_bit(c, nx->outputs[i].component_mask)
783          xfb->attr_index[b][p++] = info->out[r].slot[c];
784 
785       xfb->attr_count[b] = MAX2(xfb->attr_count[b], p);
786    }
787 
788    /* zero unused indices */
789    for (uint8_t b = 0; b < 4; ++b)
790       for (uint32_t c = xfb->attr_count[b]; c & 3; ++c)
791          xfb->attr_index[b][c] = 0;
792 }
793 
794 VkResult
nvk_cg_compile_nir(struct nvk_physical_device * pdev,nir_shader * nir,const struct nak_fs_key * fs_key,struct nvk_shader * shader)795 nvk_cg_compile_nir(struct nvk_physical_device *pdev, nir_shader *nir,
796                    const struct nak_fs_key *fs_key,
797                    struct nvk_shader *shader)
798 {
799    struct nv50_ir_prog_info *info;
800    struct nv50_ir_prog_info_out info_out = {};
801    int ret;
802 
803    nak_cg_postprocess_nir(nir);
804 
805    info = CALLOC_STRUCT(nv50_ir_prog_info);
806    if (!info)
807       return false;
808 
809    info->type = nir->info.stage;
810    info->target = pdev->info.chipset;
811    info->bin.nir = nir;
812 
813    for (unsigned i = 0; i < 3; i++)
814       shader->info.cs.local_size[i] = nir->info.workgroup_size[i];
815 
816    info->dbgFlags = nvk_cg_get_prog_debug();
817    info->optLevel = nvk_cg_get_prog_optimize();
818    info->io.auxCBSlot = 1;
819    info->io.uboInfoBase = 0;
820    info->io.drawInfoBase = nvk_root_descriptor_offset(draw.base_vertex);
821    if (nir->info.stage == MESA_SHADER_COMPUTE) {
822       info->prop.cp.gridInfoBase = 0;
823    } else {
824       info->assignSlots = nvc0_program_assign_varying_slots;
825    }
826    ret = nv50_ir_generate_code(info, &info_out);
827    if (ret)
828       return VK_ERROR_UNKNOWN;
829 
830    if (info_out.bin.fixupData) {
831       nv50_ir_apply_fixups(info_out.bin.fixupData, info_out.bin.code,
832                            fs_key && fs_key->force_sample_shading,
833                            false /* flatshade */, false /* alphatest */,
834                            fs_key && fs_key->force_sample_shading);
835    }
836 
837    shader->info.stage = nir->info.stage;
838    shader->code_ptr = (uint8_t *)info_out.bin.code;
839    shader->code_size = info_out.bin.codeSize;
840 
841    if (info_out.target >= NVISA_GV100_CHIPSET)
842       shader->info.num_gprs = MAX2(4, info_out.bin.maxGPR + 3);
843    else
844       shader->info.num_gprs = MAX2(4, info_out.bin.maxGPR + 1);
845    shader->info.num_barriers = info_out.numBarriers;
846 
847    if (info_out.bin.tlsSpace) {
848       assert(info_out.bin.tlsSpace < (1 << 24));
849       shader->info.hdr[0] |= 1 << 26;
850       shader->info.hdr[1] |= align(info_out.bin.tlsSpace, 0x10); /* l[] size */
851       shader->info.slm_size = info_out.bin.tlsSpace;
852    }
853 
854    switch (info->type) {
855    case PIPE_SHADER_VERTEX:
856       ret = nvk_vs_gen_header(shader, &info_out);
857       break;
858    case PIPE_SHADER_FRAGMENT:
859       ret = nvk_fs_gen_header(shader, fs_key, &info_out);
860       shader->info.fs.uses_sample_shading = nir->info.fs.uses_sample_shading;
861       break;
862    case PIPE_SHADER_GEOMETRY:
863       ret = nvk_gs_gen_header(shader, nir, &info_out);
864       break;
865    case PIPE_SHADER_TESS_CTRL:
866       ret = nvk_tcs_gen_header(shader, &info_out);
867       break;
868    case PIPE_SHADER_TESS_EVAL:
869       ret = nvk_tes_gen_header(shader, &info_out);
870       break;
871    case PIPE_SHADER_COMPUTE:
872       shader->info.cs.smem_size = info_out.bin.smemSize;
873       break;
874    default:
875       unreachable("Invalid shader stage");
876       break;
877    }
878    assert(ret == 0);
879 
880    if (info_out.io.globalAccess)
881       shader->info.hdr[0] |= 1 << 26;
882    if (info_out.io.globalAccess & 0x2)
883       shader->info.hdr[0] |= 1 << 16;
884    if (info_out.io.fp64)
885       shader->info.hdr[0] |= 1 << 27;
886 
887    if (nir->xfb_info)
888       nvk_fill_transform_feedback_state(&shader->info.vtg.xfb, nir, &info_out);
889 
890    return VK_SUCCESS;
891 }
892