• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "anv_private.h"
25 #include "nir_builder.h"
26 
27 /*
28  * Wa_18019110168 for gfx 12.5.
29  *
30  * This file implements workaround for HW bug, which leads to fragment shader
31  * reading incorrect per-primitive data if mesh shader, in addition to writing
32  * per-primitive data, also writes to gl_ClipDistance.
33  *
34  * The suggested solution to that bug is to not use per-primitive data by:
35  * - creating new vertices for provoking vertices shared by multiple primitives
36  * - converting per-primitive attributes read by fragment shader to flat
37  *   per-vertex attributes for the provoking vertex
38  * - modifying fragment shader to read those per-vertex attributes
39  *
40  * There are at least 2 type of failures not handled very well:
41  * - if the number of varying slots overflows, than only some attributes will
42  *   be converted, leading to corruption of those unconverted attributes
43  * - if the overall MUE size is so large it doesn't fit in URB, then URB
44  *   allocation will fail in some way; unfortunately there's no good way to
45  *   say how big MUE will be at this moment and back out
46  *
47  * This workaround needs to be applied before linking, so that unused outputs
48  * created by this code are removed at link time.
49  *
50  * This workaround can be controlled by a driconf option to either disable it,
51  * lower its scope or force enable it.
52  *
53  * Option "anv_mesh_conv_prim_attrs_to_vert_attrs" is evaluated like this:
54  *  value == 0 - disable workaround
55  *  value < 0 - enable ONLY if workaround is required
56  *  value > 0 - enable ALWAYS, even if it's not required
57  *  abs(value) >= 1 - attribute conversion
58  *  abs(value) >= 2 - attribute conversion and vertex duplication
59  *
60  *  Default: -2 (both parts of the work around, ONLY if it's required)
61  *
62  */
63 static bool
copy_primitive_count_write(nir_builder * b,nir_intrinsic_instr * intrin,void * data)64 copy_primitive_count_write(nir_builder *b,
65                            nir_intrinsic_instr *intrin,
66                            void *data)
67 {
68    if (intrin->intrinsic != nir_intrinsic_set_vertex_and_primitive_count)
69       return false;
70 
71    b->cursor = nir_after_instr(&intrin->instr);
72 
73    nir_variable *primitive_count = (nir_variable *)data;
74    nir_store_var(b, primitive_count, intrin->src[1].ssa, 0x1);
75 
76    return true;
77 }
78 
79 static nir_variable *
copy_primitive_count_writes(nir_shader * nir)80 copy_primitive_count_writes(nir_shader *nir)
81 {
82    nir_variable *primitive_count =
83       nir_local_variable_create(nir_shader_get_entrypoint(nir),
84                                 glsl_uint_type(),
85                                 "Wa_18019110168_primitive_count");
86 
87    nir_shader_intrinsics_pass(nir,
88                               copy_primitive_count_write,
89                               nir_metadata_control_flow,
90                               primitive_count);
91 
92    return primitive_count;
93 }
94 
95 static bool
anv_mesh_convert_attrs_prim_to_vert(struct nir_shader * nir,gl_varying_slot * wa_mapping,uint64_t fs_inputs,const VkGraphicsPipelineCreateInfo * pCreateInfo,void * mem_ctx,const bool dup_vertices,const bool force_conversion)96 anv_mesh_convert_attrs_prim_to_vert(struct nir_shader *nir,
97                                     gl_varying_slot *wa_mapping,
98                                     uint64_t fs_inputs,
99                                     const VkGraphicsPipelineCreateInfo *pCreateInfo,
100                                     void *mem_ctx,
101                                     const bool dup_vertices,
102                                     const bool force_conversion)
103 {
104    uint64_t per_primitive_outputs = nir->info.per_primitive_outputs;
105    per_primitive_outputs &= ~BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES);
106 
107    if (per_primitive_outputs == 0)
108       return false;
109 
110    uint64_t outputs_written = nir->info.outputs_written;
111    uint64_t other_outputs = outputs_written & ~per_primitive_outputs;
112 
113    if ((other_outputs & (VARYING_BIT_CLIP_DIST0 | VARYING_BIT_CLIP_DIST1)) == 0)
114       if (!force_conversion)
115          return false;
116 
117    uint64_t all_outputs = outputs_written;
118    unsigned attrs = 0;
119 
120    uint64_t remapped_outputs = outputs_written & per_primitive_outputs;
121    remapped_outputs &= ~BITFIELD64_BIT(VARYING_SLOT_CULL_PRIMITIVE);
122 
123    /* Skip locations not read by the fragment shader, because they will
124     * be eliminated at linking time. Note that some fs inputs may be
125     * removed only after optimizations, so it's possible that we will
126     * create too many variables.
127     */
128    remapped_outputs &= fs_inputs;
129 
130    /* Figure out the mapping between per-primitive and new per-vertex outputs. */
131    nir_foreach_shader_out_variable(var, nir) {
132       int location = var->data.location;
133 
134       if (!(BITFIELD64_BIT(location) & remapped_outputs))
135          continue;
136 
137       /* Although primitive shading rate, layer and viewport have predefined
138        * place in MUE Primitive Header (so we can't really move them anywhere),
139        * we have to copy them to per-vertex space if fragment shader reads them.
140        */
141       assert(location == VARYING_SLOT_PRIMITIVE_SHADING_RATE ||
142              location == VARYING_SLOT_LAYER ||
143              location == VARYING_SLOT_VIEWPORT ||
144              location == VARYING_SLOT_PRIMITIVE_ID ||
145              location >= VARYING_SLOT_VAR0);
146 
147       const struct glsl_type *type = var->type;
148       if (nir_is_arrayed_io(var, MESA_SHADER_MESH)) {
149          assert(glsl_type_is_array(type));
150          type = glsl_get_array_element(type);
151       }
152 
153       unsigned num_slots = glsl_count_attribute_slots(type, false);
154 
155       for (gl_varying_slot slot = VARYING_SLOT_VAR0; slot <= VARYING_SLOT_VAR31; slot++) {
156          uint64_t mask = BITFIELD64_MASK(num_slots) << slot;
157          if ((all_outputs & mask) == 0) {
158             wa_mapping[location] = slot;
159             all_outputs |= mask;
160             attrs++;
161             break;
162          }
163       }
164 
165       if (wa_mapping[location] == 0) {
166          fprintf(stderr, "Not enough space for hardware per-primitive data corruption work around.\n");
167          break;
168       }
169    }
170 
171    if (attrs == 0)
172       if (!force_conversion)
173          return false;
174 
175    unsigned provoking_vertex = 0;
176 
177    const VkPipelineRasterizationStateCreateInfo *rs_info = pCreateInfo->pRasterizationState;
178    const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *rs_pv_info =
179       vk_find_struct_const(rs_info, PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT);
180    if (rs_pv_info && rs_pv_info->provokingVertexMode == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT)
181       provoking_vertex = 2;
182 
183    unsigned vertices_per_primitive =
184          mesa_vertices_per_prim(nir->info.mesh.primitive_type);
185 
186    nir_variable *primitive_count_var = copy_primitive_count_writes(nir);
187 
188    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
189    nir_builder b = nir_builder_at(nir_after_impl(impl));
190 
191    /* wait for all subgroups to finish */
192    nir_barrier(&b, SCOPE_WORKGROUP);
193 
194    nir_def *zero = nir_imm_int(&b, 0);
195 
196    nir_def *local_invocation_index = nir_load_local_invocation_index(&b);
197 
198    nir_def *cmp = nir_ieq(&b, local_invocation_index, zero);
199    nir_if *if_stmt = nir_push_if(&b, cmp);
200    {
201       nir_variable *primitive_indices_var = NULL;
202 
203       unsigned num_other_variables = 0;
204       nir_foreach_shader_out_variable(var, b.shader) {
205          if ((BITFIELD64_BIT(var->data.location) & other_outputs) == 0)
206             continue;
207          num_other_variables++;
208       }
209 
210       nir_deref_instr **per_vertex_derefs =
211             ralloc_array(mem_ctx, nir_deref_instr *, num_other_variables);
212 
213       unsigned num_per_vertex_variables = 0;
214 
215       unsigned processed = 0;
216       nir_foreach_shader_out_variable(var, b.shader) {
217          if ((BITFIELD64_BIT(var->data.location) & other_outputs) == 0)
218             continue;
219 
220          switch (var->data.location) {
221             case VARYING_SLOT_PRIMITIVE_COUNT:
222                break;
223             case VARYING_SLOT_PRIMITIVE_INDICES:
224                primitive_indices_var = var;
225                break;
226             default: {
227                const struct glsl_type *type = var->type;
228                assert(glsl_type_is_array(type));
229                const struct glsl_type *array_element_type =
230                      glsl_get_array_element(type);
231 
232                if (dup_vertices) {
233                   /*
234                    * Resize type of array output to make space for one extra
235                    * vertex attribute for each primitive, so we ensure that
236                    * the provoking vertex is not shared between primitives.
237                    */
238                   const struct glsl_type *new_type =
239                         glsl_array_type(array_element_type,
240                                         glsl_get_length(type) +
241                                         nir->info.mesh.max_primitives_out,
242                                         0);
243 
244                   var->type = new_type;
245                }
246 
247                per_vertex_derefs[num_per_vertex_variables++] =
248                      nir_build_deref_var(&b, var);
249                break;
250             }
251          }
252 
253          ++processed;
254       }
255       assert(processed == num_other_variables);
256 
257       assert(primitive_count_var != NULL);
258       assert(primitive_indices_var != NULL);
259 
260       /* Update types of derefs to match type of variables they (de)reference. */
261       if (dup_vertices) {
262          nir_foreach_function_impl(impl, b.shader) {
263             nir_foreach_block(block, impl) {
264                nir_foreach_instr(instr, block) {
265                   if (instr->type != nir_instr_type_deref)
266                      continue;
267 
268                   nir_deref_instr *deref = nir_instr_as_deref(instr);
269                   if (deref->deref_type != nir_deref_type_var)
270                      continue;
271 
272                   if (deref->var->type != deref->type)
273                      deref->type = deref->var->type;
274                }
275             }
276          }
277       }
278 
279       /* indexed by slot of per-prim attribute */
280       struct {
281          nir_deref_instr *per_prim_deref;
282          nir_deref_instr *per_vert_deref;
283       } mapping[VARYING_SLOT_MAX] = {{NULL, NULL}, };
284 
285       /* Create new per-vertex output variables mirroring per-primitive variables
286        * and create derefs for both old and new variables.
287        */
288       nir_foreach_shader_out_variable(var, b.shader) {
289          gl_varying_slot location = var->data.location;
290 
291          if ((BITFIELD64_BIT(location) & (outputs_written & per_primitive_outputs)) == 0)
292             continue;
293          if (wa_mapping[location] == 0)
294             continue;
295 
296          const struct glsl_type *type = var->type;
297          assert(glsl_type_is_array(type));
298          const struct glsl_type *array_element_type = glsl_get_array_element(type);
299 
300          const struct glsl_type *new_type =
301                glsl_array_type(array_element_type,
302                                nir->info.mesh.max_vertices_out +
303                                (dup_vertices ? nir->info.mesh.max_primitives_out : 0),
304                                0);
305 
306          nir_variable *new_var =
307                nir_variable_create(b.shader, nir_var_shader_out, new_type, var->name);
308          assert(wa_mapping[location] >= VARYING_SLOT_VAR0);
309          assert(wa_mapping[location] <= VARYING_SLOT_VAR31);
310          new_var->data.location = wa_mapping[location];
311          new_var->data.interpolation = INTERP_MODE_FLAT;
312 
313          mapping[location].per_vert_deref = nir_build_deref_var(&b, new_var);
314          mapping[location].per_prim_deref = nir_build_deref_var(&b, var);
315       }
316 
317       nir_def *trueconst = nir_imm_true(&b);
318 
319       /*
320        * for each Primitive (0 : primitiveCount)
321        *    if VertexUsed[PrimitiveIndices[Primitive][provoking vertex]]
322        *       create 1 new vertex at offset "Vertex"
323        *       copy per vert attributes of provoking vertex to the new one
324        *       update PrimitiveIndices[Primitive][provoking vertex]
325        *       Vertex++
326        *    else
327        *       VertexUsed[PrimitiveIndices[Primitive][provoking vertex]] := true
328        *
329        *    for each attribute : mapping
330        *       copy per_prim_attr(Primitive) to per_vert_attr[Primitive][provoking vertex]
331        */
332 
333       /* primitive count */
334       nir_def *primitive_count = nir_load_var(&b, primitive_count_var);
335 
336       /* primitive index */
337       nir_variable *primitive_var =
338             nir_local_variable_create(impl, glsl_uint_type(), "Primitive");
339       nir_deref_instr *primitive_deref = nir_build_deref_var(&b, primitive_var);
340       nir_store_deref(&b, primitive_deref, zero, 1);
341 
342       /* vertex index */
343       nir_variable *vertex_var =
344             nir_local_variable_create(impl, glsl_uint_type(), "Vertex");
345       nir_deref_instr *vertex_deref = nir_build_deref_var(&b, vertex_var);
346       nir_store_deref(&b, vertex_deref, nir_imm_int(&b, nir->info.mesh.max_vertices_out), 1);
347 
348       /* used vertices bitvector */
349       const struct glsl_type *used_vertex_type =
350             glsl_array_type(glsl_bool_type(),
351                             nir->info.mesh.max_vertices_out,
352                             0);
353       nir_variable *used_vertex_var =
354             nir_local_variable_create(impl, used_vertex_type, "VertexUsed");
355       nir_deref_instr *used_vertex_deref =
356                nir_build_deref_var(&b, used_vertex_var);
357       /* Initialize it as "not used" */
358       for (unsigned i = 0; i < nir->info.mesh.max_vertices_out; ++i) {
359          nir_deref_instr *indexed_used_vertex_deref =
360                         nir_build_deref_array(&b, used_vertex_deref, nir_imm_int(&b, i));
361          nir_store_deref(&b, indexed_used_vertex_deref, nir_imm_false(&b), 1);
362       }
363 
364       nir_loop *loop = nir_push_loop(&b);
365       {
366          nir_def *primitive = nir_load_deref(&b, primitive_deref);
367          nir_def *cmp = nir_ige(&b, primitive, primitive_count);
368 
369          nir_if *loop_check = nir_push_if(&b, cmp);
370          nir_jump(&b, nir_jump_break);
371          nir_pop_if(&b, loop_check);
372 
373          nir_deref_instr *primitive_indices_deref =
374                nir_build_deref_var(&b, primitive_indices_var);
375          nir_deref_instr *indexed_primitive_indices_deref;
376          nir_def *src_vertex;
377          nir_def *prim_indices;
378 
379          /* array of vectors, we have to extract index out of array deref */
380          indexed_primitive_indices_deref = nir_build_deref_array(&b, primitive_indices_deref, primitive);
381          prim_indices = nir_load_deref(&b, indexed_primitive_indices_deref);
382          src_vertex = nir_channel(&b, prim_indices, provoking_vertex);
383 
384          nir_def *dst_vertex = nir_load_deref(&b, vertex_deref);
385 
386          nir_deref_instr *indexed_used_vertex_deref =
387                         nir_build_deref_array(&b, used_vertex_deref, src_vertex);
388          nir_def *used_vertex = nir_load_deref(&b, indexed_used_vertex_deref);
389          if (!dup_vertices)
390             used_vertex = nir_imm_false(&b);
391 
392          nir_if *vertex_used_check = nir_push_if(&b, used_vertex);
393          {
394             for (unsigned a = 0; a < num_per_vertex_variables; ++a) {
395                nir_deref_instr *attr_arr = per_vertex_derefs[a];
396                nir_deref_instr *src = nir_build_deref_array(&b, attr_arr, src_vertex);
397                nir_deref_instr *dst = nir_build_deref_array(&b, attr_arr, dst_vertex);
398 
399                nir_copy_deref(&b, dst, src);
400             }
401 
402             /* replace one component of primitive indices vector */
403             nir_def *new_val =
404                   nir_vector_insert_imm(&b, prim_indices, dst_vertex, provoking_vertex);
405 
406             /* and store complete vector */
407             nir_store_deref(&b, indexed_primitive_indices_deref, new_val,
408                             BITFIELD_MASK(vertices_per_primitive));
409 
410             nir_store_deref(&b, vertex_deref, nir_iadd_imm(&b, dst_vertex, 1), 1);
411 
412             for (unsigned i = 0; i < ARRAY_SIZE(mapping); ++i) {
413                if (!mapping[i].per_vert_deref)
414                   continue;
415 
416                nir_deref_instr *src =
417                      nir_build_deref_array(&b, mapping[i].per_prim_deref, primitive);
418                nir_deref_instr *dst =
419                      nir_build_deref_array(&b, mapping[i].per_vert_deref, dst_vertex);
420 
421                nir_copy_deref(&b, dst, src);
422             }
423          }
424          nir_push_else(&b, vertex_used_check);
425          {
426             nir_store_deref(&b, indexed_used_vertex_deref, trueconst, 1);
427 
428             for (unsigned i = 0; i < ARRAY_SIZE(mapping); ++i) {
429                if (!mapping[i].per_vert_deref)
430                   continue;
431 
432                nir_deref_instr *src =
433                      nir_build_deref_array(&b, mapping[i].per_prim_deref, primitive);
434                nir_deref_instr *dst =
435                      nir_build_deref_array(&b, mapping[i].per_vert_deref, src_vertex);
436 
437                nir_copy_deref(&b, dst, src);
438             }
439 
440          }
441          nir_pop_if(&b, vertex_used_check);
442 
443          nir_store_deref(&b, primitive_deref, nir_iadd_imm(&b, primitive, 1), 1);
444       }
445       nir_pop_loop(&b, loop);
446    }
447    nir_pop_if(&b, if_stmt); /* local_invocation_index == 0 */
448 
449    if (dup_vertices)
450       nir->info.mesh.max_vertices_out += nir->info.mesh.max_primitives_out;
451 
452    if (should_print_nir(nir)) {
453       printf("%s\n", __func__);
454       nir_print_shader(nir, stdout);
455    }
456 
457    /* deal with copy_derefs */
458    NIR_PASS(_, nir, nir_split_var_copies);
459    NIR_PASS(_, nir, nir_lower_var_copies);
460 
461    nir_shader_gather_info(nir, impl);
462 
463    return true;
464 }
465 
466 static bool
anv_frag_update_derefs_instr(struct nir_builder * b,nir_instr * instr,void * data)467 anv_frag_update_derefs_instr(struct nir_builder *b, nir_instr *instr, void *data)
468 {
469    if (instr->type != nir_instr_type_deref)
470       return false;
471 
472    nir_deref_instr *deref = nir_instr_as_deref(instr);
473    if (deref->deref_type != nir_deref_type_var)
474       return false;
475 
476    nir_variable *var = deref->var;
477    if (!(var->data.mode & nir_var_shader_in))
478       return false;
479 
480    int location = var->data.location;
481    nir_deref_instr **new_derefs = (nir_deref_instr **)data;
482    if (new_derefs[location] == NULL)
483       return false;
484 
485    nir_instr_remove(&deref->instr);
486    nir_def_rewrite_uses(&deref->def, &new_derefs[location]->def);
487 
488    return true;
489 }
490 
491 static bool
anv_frag_update_derefs(nir_shader * shader,nir_deref_instr ** mapping)492 anv_frag_update_derefs(nir_shader *shader, nir_deref_instr **mapping)
493 {
494    return nir_shader_instructions_pass(shader, anv_frag_update_derefs_instr,
495                                        nir_metadata_none, (void *)mapping);
496 }
497 
498 /* Update fragment shader inputs with new ones. */
499 static void
anv_frag_convert_attrs_prim_to_vert(struct nir_shader * nir,gl_varying_slot * wa_mapping)500 anv_frag_convert_attrs_prim_to_vert(struct nir_shader *nir,
501                                     gl_varying_slot *wa_mapping)
502 {
503    /* indexed by slot of per-prim attribute */
504    nir_deref_instr *new_derefs[VARYING_SLOT_MAX] = {NULL, };
505 
506    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
507    nir_builder b = nir_builder_at(nir_before_impl(impl));
508 
509    nir_foreach_shader_in_variable_safe(var, nir) {
510       gl_varying_slot location = var->data.location;
511       gl_varying_slot new_location = wa_mapping[location];
512       if (new_location == 0)
513          continue;
514 
515       assert(wa_mapping[new_location] == 0);
516 
517       nir_variable *new_var =
518             nir_variable_create(b.shader, nir_var_shader_in, var->type, var->name);
519       new_var->data.location = new_location;
520       new_var->data.location_frac = var->data.location_frac;
521       new_var->data.interpolation = INTERP_MODE_FLAT;
522 
523       new_derefs[location] = nir_build_deref_var(&b, new_var);
524    }
525 
526    NIR_PASS(_, nir, anv_frag_update_derefs, new_derefs);
527 
528    nir_shader_gather_info(nir, impl);
529 }
530 
531 void
anv_apply_per_prim_attr_wa(struct nir_shader * ms_nir,struct nir_shader * fs_nir,struct anv_device * device,const VkGraphicsPipelineCreateInfo * info)532 anv_apply_per_prim_attr_wa(struct nir_shader *ms_nir,
533                            struct nir_shader *fs_nir,
534                            struct anv_device *device,
535                            const VkGraphicsPipelineCreateInfo *info)
536 {
537    const struct intel_device_info *devinfo = device->info;
538 
539    int mesh_conv_prim_attrs_to_vert_attrs =
540          device->physical->instance->mesh_conv_prim_attrs_to_vert_attrs;
541    if (mesh_conv_prim_attrs_to_vert_attrs < 0 &&
542          !intel_needs_workaround(devinfo, 18019110168))
543       mesh_conv_prim_attrs_to_vert_attrs = 0;
544 
545    if (mesh_conv_prim_attrs_to_vert_attrs != 0) {
546       uint64_t fs_inputs = 0;
547       nir_foreach_shader_in_variable(var, fs_nir)
548          fs_inputs |= BITFIELD64_BIT(var->data.location);
549 
550       void *stage_ctx = ralloc_context(NULL);
551 
552       gl_varying_slot wa_mapping[VARYING_SLOT_MAX] = { 0, };
553 
554       const bool dup_vertices = abs(mesh_conv_prim_attrs_to_vert_attrs) >= 2;
555       const bool force_conversion = mesh_conv_prim_attrs_to_vert_attrs > 0;
556 
557       if (anv_mesh_convert_attrs_prim_to_vert(ms_nir, wa_mapping,
558                                               fs_inputs, info, stage_ctx,
559                                               dup_vertices, force_conversion))
560          anv_frag_convert_attrs_prim_to_vert(fs_nir, wa_mapping);
561 
562       ralloc_free(stage_ctx);
563    }
564 }
565