• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29 
30 #include "util/mesa-sha1.h"
31 #include "util/os_time.h"
32 #include "common/intel_compute_slm.h"
33 #include "common/intel_l3_config.h"
34 #include "common/intel_sample_positions.h"
35 #include "compiler/brw_disasm.h"
36 #include "anv_private.h"
37 #include "compiler/brw_nir.h"
38 #include "compiler/brw_nir_rt.h"
39 #include "compiler/intel_nir.h"
40 #include "anv_nir.h"
41 #include "nir/nir_xfb_info.h"
42 #include "spirv/nir_spirv.h"
43 #include "vk_nir_convert_ycbcr.h"
44 #include "vk_nir.h"
45 #include "vk_pipeline.h"
46 #include "vk_render_pass.h"
47 #include "vk_util.h"
48 
49 /* Eventually, this will become part of anv_CreateShader.  Unfortunately,
50  * we can't do that yet because we don't have the ability to copy nir.
51  */
52 static nir_shader *
anv_shader_stage_to_nir(struct anv_device * device,VkPipelineCreateFlags2KHR pipeline_flags,const VkPipelineShaderStageCreateInfo * stage_info,enum brw_robustness_flags robust_flags,void * mem_ctx)53 anv_shader_stage_to_nir(struct anv_device *device,
54                         VkPipelineCreateFlags2KHR pipeline_flags,
55                         const VkPipelineShaderStageCreateInfo *stage_info,
56                         enum brw_robustness_flags robust_flags,
57                         void *mem_ctx)
58 {
59    const struct anv_physical_device *pdevice = device->physical;
60    const struct brw_compiler *compiler = pdevice->compiler;
61    gl_shader_stage stage = vk_to_mesa_shader_stage(stage_info->stage);
62    const nir_shader_compiler_options *nir_options =
63       compiler->nir_options[stage];
64 
65    const struct spirv_to_nir_options spirv_options = {
66       .ubo_addr_format = anv_nir_ubo_addr_format(pdevice, robust_flags),
67       .ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_flags),
68       .phys_ssbo_addr_format = nir_address_format_64bit_global,
69       .push_const_addr_format = nir_address_format_logical,
70 
71       /* TODO: Consider changing this to an address format that has the NULL
72        * pointer equals to 0.  That might be a better format to play nice
73        * with certain code / code generators.
74        */
75       .shared_addr_format = nir_address_format_32bit_offset,
76 
77       .min_ubo_alignment = ANV_UBO_ALIGNMENT,
78       .min_ssbo_alignment = ANV_SSBO_ALIGNMENT,
79    };
80 
81    nir_shader *nir;
82    VkResult result =
83       vk_pipeline_shader_stage_to_nir(&device->vk, pipeline_flags, stage_info,
84                                       &spirv_options, nir_options,
85                                       mem_ctx, &nir);
86    if (result != VK_SUCCESS)
87       return NULL;
88 
89    if (INTEL_DEBUG(intel_debug_flag_for_shader_stage(stage))) {
90       fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n",
91               gl_shader_stage_name(stage));
92       nir_print_shader(nir, stderr);
93    }
94 
95    NIR_PASS_V(nir, nir_lower_io_to_temporaries,
96               nir_shader_get_entrypoint(nir), true, false);
97 
98    return nir;
99 }
100 
101 static VkResult
anv_pipeline_init(struct anv_pipeline * pipeline,struct anv_device * device,enum anv_pipeline_type type,VkPipelineCreateFlags2KHR flags,const VkAllocationCallbacks * pAllocator)102 anv_pipeline_init(struct anv_pipeline *pipeline,
103                   struct anv_device *device,
104                   enum anv_pipeline_type type,
105                   VkPipelineCreateFlags2KHR flags,
106                   const VkAllocationCallbacks *pAllocator)
107 {
108    VkResult result;
109 
110    memset(pipeline, 0, sizeof(*pipeline));
111 
112    vk_object_base_init(&device->vk, &pipeline->base,
113                        VK_OBJECT_TYPE_PIPELINE);
114    pipeline->device = device;
115 
116    /* It's the job of the child class to provide actual backing storage for
117     * the batch by setting batch.start, batch.next, and batch.end.
118     */
119    pipeline->batch.alloc = pAllocator ? pAllocator : &device->vk.alloc;
120    pipeline->batch.relocs = &pipeline->batch_relocs;
121    pipeline->batch.status = VK_SUCCESS;
122 
123    const bool uses_relocs = device->physical->uses_relocs;
124    result = anv_reloc_list_init(&pipeline->batch_relocs,
125                                 pipeline->batch.alloc, uses_relocs);
126    if (result != VK_SUCCESS)
127       return result;
128 
129    pipeline->mem_ctx = ralloc_context(NULL);
130 
131    pipeline->type = type;
132    pipeline->flags = flags;
133 
134    util_dynarray_init(&pipeline->executables, pipeline->mem_ctx);
135 
136    anv_pipeline_sets_layout_init(&pipeline->layout, device,
137                                  false /* independent_sets */);
138 
139    return VK_SUCCESS;
140 }
141 
142 static void
anv_pipeline_init_layout(struct anv_pipeline * pipeline,struct anv_pipeline_layout * pipeline_layout)143 anv_pipeline_init_layout(struct anv_pipeline *pipeline,
144                          struct anv_pipeline_layout *pipeline_layout)
145 {
146    if (pipeline_layout) {
147       struct anv_pipeline_sets_layout *layout = &pipeline_layout->sets_layout;
148       for (uint32_t s = 0; s < layout->num_sets; s++) {
149          if (layout->set[s].layout == NULL)
150             continue;
151 
152          anv_pipeline_sets_layout_add(&pipeline->layout, s,
153                                       layout->set[s].layout);
154       }
155    }
156 
157    anv_pipeline_sets_layout_hash(&pipeline->layout);
158    assert(!pipeline_layout ||
159           !memcmp(pipeline->layout.sha1,
160                   pipeline_layout->sets_layout.sha1,
161                   sizeof(pipeline_layout->sets_layout.sha1)));
162 }
163 
164 static void
anv_pipeline_finish(struct anv_pipeline * pipeline,struct anv_device * device)165 anv_pipeline_finish(struct anv_pipeline *pipeline,
166                     struct anv_device *device)
167 {
168    anv_pipeline_sets_layout_fini(&pipeline->layout);
169    anv_reloc_list_finish(&pipeline->batch_relocs);
170    ralloc_free(pipeline->mem_ctx);
171    vk_object_base_finish(&pipeline->base);
172 }
173 
anv_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)174 void anv_DestroyPipeline(
175     VkDevice                                    _device,
176     VkPipeline                                  _pipeline,
177     const VkAllocationCallbacks*                pAllocator)
178 {
179    ANV_FROM_HANDLE(anv_device, device, _device);
180    ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
181 
182    if (!pipeline)
183       return;
184 
185    ANV_RMV(resource_destroy, device, pipeline);
186 
187    switch (pipeline->type) {
188    case ANV_PIPELINE_GRAPHICS:
189    case ANV_PIPELINE_GRAPHICS_LIB: {
190       struct anv_graphics_base_pipeline *gfx_pipeline =
191          anv_pipeline_to_graphics_base(pipeline);
192 
193       for (unsigned s = 0; s < ARRAY_SIZE(gfx_pipeline->shaders); s++) {
194          if (gfx_pipeline->shaders[s])
195             anv_shader_bin_unref(device, gfx_pipeline->shaders[s]);
196       }
197       break;
198    }
199 
200    case ANV_PIPELINE_COMPUTE: {
201       struct anv_compute_pipeline *compute_pipeline =
202          anv_pipeline_to_compute(pipeline);
203 
204       if (compute_pipeline->cs)
205          anv_shader_bin_unref(device, compute_pipeline->cs);
206 
207       break;
208    }
209 
210    case ANV_PIPELINE_RAY_TRACING: {
211       struct anv_ray_tracing_pipeline *rt_pipeline =
212          anv_pipeline_to_ray_tracing(pipeline);
213 
214       util_dynarray_foreach(&rt_pipeline->shaders,
215                             struct anv_shader_bin *, shader) {
216          anv_shader_bin_unref(device, *shader);
217       }
218       break;
219    }
220 
221    default:
222       unreachable("invalid pipeline type");
223    }
224 
225    anv_pipeline_finish(pipeline, device);
226    vk_free2(&device->vk.alloc, pAllocator, pipeline);
227 }
228 
229 struct anv_pipeline_stage {
230    gl_shader_stage stage;
231 
232    VkPipelineCreateFlags2KHR pipeline_flags;
233    struct vk_pipeline_robustness_state rstate;
234 
235    /* VkComputePipelineCreateInfo, VkGraphicsPipelineCreateInfo or
236     * VkRayTracingPipelineCreateInfoKHR pNext field
237     */
238    const void *pipeline_pNext;
239    const VkPipelineShaderStageCreateInfo *info;
240 
241    unsigned char shader_sha1[20];
242    uint32_t      source_hash;
243 
244    union brw_any_prog_key key;
245 
246    struct {
247       gl_shader_stage stage;
248       unsigned char sha1[20];
249    } cache_key;
250 
251    nir_shader *nir;
252 
253    struct {
254       nir_shader *nir;
255       struct anv_shader_bin *bin;
256    } imported;
257 
258    struct anv_push_descriptor_info push_desc_info;
259 
260    enum gl_subgroup_size subgroup_size_type;
261 
262    enum brw_robustness_flags robust_flags;
263 
264    struct anv_pipeline_bind_map bind_map;
265 
266    bool uses_bt_for_push_descs;
267 
268    enum anv_dynamic_push_bits dynamic_push_values;
269 
270    union brw_any_prog_data prog_data;
271 
272    uint32_t num_stats;
273    struct brw_compile_stats stats[3];
274    char *disasm[3];
275 
276    VkPipelineCreationFeedback feedback;
277    uint32_t feedback_idx;
278 
279    const unsigned *code;
280 
281    struct anv_shader_bin *bin;
282 };
283 
284 static void
anv_stage_allocate_bind_map_tables(struct anv_pipeline * pipeline,struct anv_pipeline_stage * stage,void * mem_ctx)285 anv_stage_allocate_bind_map_tables(struct anv_pipeline *pipeline,
286                                    struct anv_pipeline_stage *stage,
287                                    void *mem_ctx)
288 {
289    struct anv_pipeline_binding *surface_bindings =
290       brw_shader_stage_requires_bindless_resources(stage->stage) ? NULL :
291       rzalloc_array(mem_ctx, struct anv_pipeline_binding, 256);
292    struct anv_pipeline_binding *sampler_bindings =
293       brw_shader_stage_requires_bindless_resources(stage->stage) ? NULL :
294       rzalloc_array(mem_ctx, struct anv_pipeline_binding, 256);
295    struct anv_pipeline_embedded_sampler_binding *embedded_sampler_bindings =
296       rzalloc_array(mem_ctx, struct anv_pipeline_embedded_sampler_binding,
297                     anv_pipeline_sets_layout_embedded_sampler_count(
298                        &pipeline->layout));
299 
300    stage->bind_map = (struct anv_pipeline_bind_map) {
301       .surface_to_descriptor = surface_bindings,
302       .sampler_to_descriptor = sampler_bindings,
303       .embedded_sampler_to_binding = embedded_sampler_bindings,
304    };
305 }
306 
307 static enum brw_robustness_flags
anv_get_robust_flags(const struct vk_pipeline_robustness_state * rstate)308 anv_get_robust_flags(const struct vk_pipeline_robustness_state *rstate)
309 {
310    return
311       ((rstate->storage_buffers !=
312         VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT) ?
313        BRW_ROBUSTNESS_SSBO : 0) |
314       ((rstate->uniform_buffers !=
315         VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT) ?
316        BRW_ROBUSTNESS_UBO : 0);
317 }
318 
319 static void
populate_base_prog_key(struct anv_pipeline_stage * stage,const struct anv_device * device)320 populate_base_prog_key(struct anv_pipeline_stage *stage,
321                        const struct anv_device *device)
322 {
323    stage->key.base.robust_flags = anv_get_robust_flags(&stage->rstate);
324    stage->key.base.limit_trig_input_range =
325       device->physical->instance->limit_trig_input_range;
326 }
327 
328 static void
populate_vs_prog_key(struct anv_pipeline_stage * stage,const struct anv_device * device)329 populate_vs_prog_key(struct anv_pipeline_stage *stage,
330                      const struct anv_device *device)
331 {
332    memset(&stage->key, 0, sizeof(stage->key));
333 
334    populate_base_prog_key(stage, device);
335 }
336 
337 static void
populate_tcs_prog_key(struct anv_pipeline_stage * stage,const struct anv_device * device,unsigned input_vertices)338 populate_tcs_prog_key(struct anv_pipeline_stage *stage,
339                       const struct anv_device *device,
340                       unsigned input_vertices)
341 {
342    memset(&stage->key, 0, sizeof(stage->key));
343 
344    populate_base_prog_key(stage, device);
345 
346    stage->key.tcs.input_vertices = input_vertices;
347 }
348 
349 static void
populate_tes_prog_key(struct anv_pipeline_stage * stage,const struct anv_device * device)350 populate_tes_prog_key(struct anv_pipeline_stage *stage,
351                       const struct anv_device *device)
352 {
353    memset(&stage->key, 0, sizeof(stage->key));
354 
355    populate_base_prog_key(stage, device);
356 }
357 
358 static void
populate_gs_prog_key(struct anv_pipeline_stage * stage,const struct anv_device * device)359 populate_gs_prog_key(struct anv_pipeline_stage *stage,
360                      const struct anv_device *device)
361 {
362    memset(&stage->key, 0, sizeof(stage->key));
363 
364    populate_base_prog_key(stage, device);
365 }
366 
367 static bool
pipeline_has_coarse_pixel(const BITSET_WORD * dynamic,const struct vk_multisample_state * ms,const struct vk_fragment_shading_rate_state * fsr)368 pipeline_has_coarse_pixel(const BITSET_WORD *dynamic,
369                           const struct vk_multisample_state *ms,
370                           const struct vk_fragment_shading_rate_state *fsr)
371 {
372    /* The Vulkan 1.2.199 spec says:
373     *
374     *    "If any of the following conditions are met, Cxy' must be set to
375     *    {1,1}:
376     *
377     *     * If Sample Shading is enabled.
378     *     * [...]"
379     *
380     * And "sample shading" is defined as follows:
381     *
382     *    "Sample shading is enabled for a graphics pipeline:
383     *
384     *     * If the interface of the fragment shader entry point of the
385     *       graphics pipeline includes an input variable decorated with
386     *       SampleId or SamplePosition. In this case minSampleShadingFactor
387     *       takes the value 1.0.
388     *
389     *     * Else if the sampleShadingEnable member of the
390     *       VkPipelineMultisampleStateCreateInfo structure specified when
391     *       creating the graphics pipeline is set to VK_TRUE. In this case
392     *       minSampleShadingFactor takes the value of
393     *       VkPipelineMultisampleStateCreateInfo::minSampleShading.
394     *
395     *    Otherwise, sample shading is considered disabled."
396     *
397     * The first bullet above is handled by the back-end compiler because those
398     * inputs both force per-sample dispatch.  The second bullet is handled
399     * here.  Note that this sample shading being enabled has nothing to do
400     * with minSampleShading.
401     */
402    if (ms != NULL && ms->sample_shading_enable)
403       return false;
404 
405    /* Not dynamic & pipeline has a 1x1 fragment shading rate with no
406     * possibility for element of the pipeline to change the value or fragment
407     * shading rate not specified at all.
408     */
409    if (!BITSET_TEST(dynamic, MESA_VK_DYNAMIC_FSR) &&
410        (fsr == NULL ||
411         (fsr->fragment_size.width <= 1 &&
412          fsr->fragment_size.height <= 1 &&
413          fsr->combiner_ops[0] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR &&
414          fsr->combiner_ops[1] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR)))
415       return false;
416 
417    return true;
418 }
419 
420 static void
populate_task_prog_key(struct anv_pipeline_stage * stage,const struct anv_device * device)421 populate_task_prog_key(struct anv_pipeline_stage *stage,
422                        const struct anv_device *device)
423 {
424    memset(&stage->key, 0, sizeof(stage->key));
425 
426    populate_base_prog_key(stage, device);
427 }
428 
429 static void
populate_mesh_prog_key(struct anv_pipeline_stage * stage,const struct anv_device * device,bool compact_mue)430 populate_mesh_prog_key(struct anv_pipeline_stage *stage,
431                        const struct anv_device *device,
432                        bool compact_mue)
433 {
434    memset(&stage->key, 0, sizeof(stage->key));
435 
436    populate_base_prog_key(stage, device);
437 
438    stage->key.mesh.compact_mue = compact_mue;
439 }
440 
441 static uint32_t
rp_color_mask(const struct vk_render_pass_state * rp)442 rp_color_mask(const struct vk_render_pass_state *rp)
443 {
444    if (rp == NULL || !vk_render_pass_state_has_attachment_info(rp))
445       return ((1u << MAX_RTS) - 1);
446 
447    uint32_t color_mask = 0;
448    for (uint32_t i = 0; i < rp->color_attachment_count; i++) {
449       if (rp->color_attachment_formats[i] != VK_FORMAT_UNDEFINED)
450          color_mask |= BITFIELD_BIT(i);
451    }
452 
453    /* If there is depth/stencil attachment, even if the fragment shader
454     * doesn't write the depth/stencil output, we need a valid render target so
455     * that the compiler doesn't use the null-rt which would cull the
456     * depth/stencil output.
457     */
458    if (rp->depth_attachment_format != VK_FORMAT_UNDEFINED ||
459        rp->stencil_attachment_format != VK_FORMAT_UNDEFINED)
460       color_mask |= 1;
461 
462    return color_mask;
463 }
464 
465 static void
populate_wm_prog_key(struct anv_pipeline_stage * stage,const struct anv_graphics_base_pipeline * pipeline,const BITSET_WORD * dynamic,const struct vk_multisample_state * ms,const struct vk_fragment_shading_rate_state * fsr,const struct vk_render_pass_state * rp,const enum intel_sometimes is_mesh)466 populate_wm_prog_key(struct anv_pipeline_stage *stage,
467                      const struct anv_graphics_base_pipeline *pipeline,
468                      const BITSET_WORD *dynamic,
469                      const struct vk_multisample_state *ms,
470                      const struct vk_fragment_shading_rate_state *fsr,
471                      const struct vk_render_pass_state *rp,
472                      const enum intel_sometimes is_mesh)
473 {
474    const struct anv_device *device = pipeline->base.device;
475 
476    memset(&stage->key, 0, sizeof(stage->key));
477 
478    populate_base_prog_key(stage, device);
479 
480    struct brw_wm_prog_key *key = &stage->key.wm;
481 
482    /* We set this to 0 here and set to the actual value before we call
483     * brw_compile_fs.
484     */
485    key->input_slots_valid = 0;
486 
487    /* XXX Vulkan doesn't appear to specify */
488    key->clamp_fragment_color = false;
489 
490    key->ignore_sample_mask_out = false;
491 
492    assert(rp == NULL || rp->color_attachment_count <= MAX_RTS);
493    /* Consider all inputs as valid until look at the NIR variables. */
494    key->color_outputs_valid = rp_color_mask(rp);
495    key->nr_color_regions = util_last_bit(key->color_outputs_valid);
496 
497    /* To reduce possible shader recompilations we would need to know if
498     * there is a SampleMask output variable to compute if we should emit
499     * code to workaround the issue that hardware disables alpha to coverage
500     * when there is SampleMask output.
501     *
502     * If the pipeline we compile the fragment shader in includes the output
503     * interface, then we can be sure whether alpha_coverage is enabled or not.
504     * If we don't have that output interface, then we have to compile the
505     * shader with some conditionals.
506     */
507    if (ms != NULL) {
508       /* VUID-VkGraphicsPipelineCreateInfo-rasterizerDiscardEnable-00751:
509        *
510        *   "If the pipeline is being created with fragment shader state,
511        *    pMultisampleState must be a valid pointer to a valid
512        *    VkPipelineMultisampleStateCreateInfo structure"
513        *
514        * It's also required for the fragment output interface.
515        */
516       key->multisample_fbo =
517          BITSET_TEST(dynamic, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) ?
518          INTEL_SOMETIMES :
519          ms->rasterization_samples > 1 ? INTEL_ALWAYS : INTEL_NEVER;
520       key->persample_interp =
521          BITSET_TEST(dynamic, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) ?
522          INTEL_SOMETIMES :
523          (ms->sample_shading_enable &&
524           (ms->min_sample_shading * ms->rasterization_samples) > 1) ?
525          INTEL_ALWAYS : INTEL_NEVER;
526       key->alpha_to_coverage =
527          BITSET_TEST(dynamic, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE) ?
528          INTEL_SOMETIMES :
529          (ms->alpha_to_coverage_enable ? INTEL_ALWAYS : INTEL_NEVER);
530 
531       /* TODO: We should make this dynamic */
532       if (device->physical->instance->sample_mask_out_opengl_behaviour)
533          key->ignore_sample_mask_out = !key->multisample_fbo;
534    } else {
535       /* Consider all inputs as valid until we look at the NIR variables. */
536       key->color_outputs_valid = (1u << MAX_RTS) - 1;
537       key->nr_color_regions = MAX_RTS;
538 
539       key->alpha_to_coverage = INTEL_SOMETIMES;
540       key->multisample_fbo = INTEL_SOMETIMES;
541       key->persample_interp = INTEL_SOMETIMES;
542    }
543 
544    key->mesh_input = is_mesh;
545 
546    /* Vulkan doesn't support fixed-function alpha test */
547    key->alpha_test_replicate_alpha = false;
548 
549   key->coarse_pixel =
550      device->vk.enabled_extensions.KHR_fragment_shading_rate &&
551      pipeline_has_coarse_pixel(dynamic, ms, fsr);
552 
553   key->null_push_constant_tbimr_workaround =
554      device->info->needs_null_push_constant_tbimr_workaround;
555 }
556 
557 static void
populate_cs_prog_key(struct anv_pipeline_stage * stage,const struct anv_device * device)558 populate_cs_prog_key(struct anv_pipeline_stage *stage,
559                      const struct anv_device *device)
560 {
561    memset(&stage->key, 0, sizeof(stage->key));
562 
563    populate_base_prog_key(stage, device);
564 }
565 
566 static void
populate_bs_prog_key(struct anv_pipeline_stage * stage,const struct anv_device * device,uint32_t ray_flags)567 populate_bs_prog_key(struct anv_pipeline_stage *stage,
568                      const struct anv_device *device,
569                      uint32_t ray_flags)
570 {
571    memset(&stage->key, 0, sizeof(stage->key));
572 
573    populate_base_prog_key(stage, device);
574 
575    stage->key.bs.pipeline_ray_flags = ray_flags;
576    stage->key.bs.pipeline_ray_flags = ray_flags;
577 }
578 
579 static void
anv_stage_write_shader_hash(struct anv_pipeline_stage * stage,const struct anv_device * device)580 anv_stage_write_shader_hash(struct anv_pipeline_stage *stage,
581                             const struct anv_device *device)
582 {
583    vk_pipeline_robustness_state_fill(&device->vk,
584                                      &stage->rstate,
585                                      stage->pipeline_pNext,
586                                      stage->info->pNext);
587 
588    vk_pipeline_hash_shader_stage(stage->pipeline_flags, stage->info,
589                                  &stage->rstate, stage->shader_sha1);
590 
591    stage->robust_flags = anv_get_robust_flags(&stage->rstate);
592 
593    /* Use lowest dword of source shader sha1 for shader hash. */
594    stage->source_hash = ((uint32_t*)stage->shader_sha1)[0];
595 }
596 
597 static bool
anv_graphics_pipeline_stage_fragment_dynamic(const struct anv_pipeline_stage * stage)598 anv_graphics_pipeline_stage_fragment_dynamic(const struct anv_pipeline_stage *stage)
599 {
600    if (stage->stage != MESA_SHADER_FRAGMENT)
601       return false;
602 
603    return stage->key.wm.persample_interp == INTEL_SOMETIMES ||
604           stage->key.wm.multisample_fbo == INTEL_SOMETIMES ||
605           stage->key.wm.alpha_to_coverage == INTEL_SOMETIMES;
606 }
607 
608 static void
anv_pipeline_hash_common(struct mesa_sha1 * ctx,const struct anv_pipeline * pipeline)609 anv_pipeline_hash_common(struct mesa_sha1 *ctx,
610                          const struct anv_pipeline *pipeline)
611 {
612    struct anv_device *device = pipeline->device;
613 
614    _mesa_sha1_update(ctx, pipeline->layout.sha1, sizeof(pipeline->layout.sha1));
615 
616    const bool indirect_descriptors = device->physical->indirect_descriptors;
617    _mesa_sha1_update(ctx, &indirect_descriptors, sizeof(indirect_descriptors));
618 
619    const bool rba = device->robust_buffer_access;
620    _mesa_sha1_update(ctx, &rba, sizeof(rba));
621 
622    const int spilling_rate = device->physical->compiler->spilling_rate;
623    _mesa_sha1_update(ctx, &spilling_rate, sizeof(spilling_rate));
624 }
625 
626 static void
anv_pipeline_hash_graphics(struct anv_graphics_base_pipeline * pipeline,struct anv_pipeline_stage * stages,uint32_t view_mask,unsigned char * sha1_out)627 anv_pipeline_hash_graphics(struct anv_graphics_base_pipeline *pipeline,
628                            struct anv_pipeline_stage *stages,
629                            uint32_t view_mask,
630                            unsigned char *sha1_out)
631 {
632    const struct anv_device *device = pipeline->base.device;
633    struct mesa_sha1 ctx;
634    _mesa_sha1_init(&ctx);
635 
636    anv_pipeline_hash_common(&ctx, &pipeline->base);
637 
638    _mesa_sha1_update(&ctx, &view_mask, sizeof(view_mask));
639 
640    for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
641       if (pipeline->base.active_stages & BITFIELD_BIT(s)) {
642          _mesa_sha1_update(&ctx, stages[s].shader_sha1,
643                            sizeof(stages[s].shader_sha1));
644          _mesa_sha1_update(&ctx, &stages[s].key, brw_prog_key_size(s));
645       }
646    }
647 
648    if (stages[MESA_SHADER_MESH].info || stages[MESA_SHADER_TASK].info) {
649       const uint8_t afs = device->physical->instance->assume_full_subgroups;
650       _mesa_sha1_update(&ctx, &afs, sizeof(afs));
651    }
652 
653    _mesa_sha1_final(&ctx, sha1_out);
654 }
655 
656 static void
anv_pipeline_hash_compute(struct anv_compute_pipeline * pipeline,struct anv_pipeline_stage * stage,unsigned char * sha1_out)657 anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline,
658                           struct anv_pipeline_stage *stage,
659                           unsigned char *sha1_out)
660 {
661    const struct anv_device *device = pipeline->base.device;
662    struct mesa_sha1 ctx;
663    _mesa_sha1_init(&ctx);
664 
665    anv_pipeline_hash_common(&ctx, &pipeline->base);
666 
667    const uint8_t afs = device->physical->instance->assume_full_subgroups;
668    _mesa_sha1_update(&ctx, &afs, sizeof(afs));
669 
670    const bool afswb = device->physical->instance->assume_full_subgroups_with_barrier;
671    _mesa_sha1_update(&ctx, &afswb, sizeof(afswb));
672 
673    _mesa_sha1_update(&ctx, stage->shader_sha1,
674                      sizeof(stage->shader_sha1));
675    _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
676 
677    _mesa_sha1_final(&ctx, sha1_out);
678 }
679 
680 static void
anv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline * pipeline,struct anv_pipeline_stage * stage,unsigned char * sha1_out)681 anv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline *pipeline,
682                                      struct anv_pipeline_stage *stage,
683                                      unsigned char *sha1_out)
684 {
685    struct mesa_sha1 ctx;
686    _mesa_sha1_init(&ctx);
687 
688    anv_pipeline_hash_common(&ctx, &pipeline->base);
689 
690    _mesa_sha1_update(&ctx, stage->shader_sha1, sizeof(stage->shader_sha1));
691    _mesa_sha1_update(&ctx, &stage->key, sizeof(stage->key.bs));
692 
693    _mesa_sha1_final(&ctx, sha1_out);
694 }
695 
696 static void
anv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline * pipeline,struct anv_pipeline_stage * intersection,struct anv_pipeline_stage * any_hit,unsigned char * sha1_out)697 anv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline *pipeline,
698                                               struct anv_pipeline_stage *intersection,
699                                               struct anv_pipeline_stage *any_hit,
700                                               unsigned char *sha1_out)
701 {
702    struct mesa_sha1 ctx;
703    _mesa_sha1_init(&ctx);
704 
705    _mesa_sha1_update(&ctx, pipeline->base.layout.sha1,
706                      sizeof(pipeline->base.layout.sha1));
707 
708    const bool rba = pipeline->base.device->robust_buffer_access;
709    _mesa_sha1_update(&ctx, &rba, sizeof(rba));
710 
711    _mesa_sha1_update(&ctx, intersection->shader_sha1, sizeof(intersection->shader_sha1));
712    _mesa_sha1_update(&ctx, &intersection->key, sizeof(intersection->key.bs));
713    _mesa_sha1_update(&ctx, any_hit->shader_sha1, sizeof(any_hit->shader_sha1));
714    _mesa_sha1_update(&ctx, &any_hit->key, sizeof(any_hit->key.bs));
715 
716    _mesa_sha1_final(&ctx, sha1_out);
717 }
718 
719 static VkResult
anv_pipeline_stage_get_nir(struct anv_pipeline * pipeline,struct vk_pipeline_cache * cache,void * mem_ctx,struct anv_pipeline_stage * stage)720 anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,
721                            struct vk_pipeline_cache *cache,
722                            void *mem_ctx,
723                            struct anv_pipeline_stage *stage)
724 {
725    const struct brw_compiler *compiler =
726       pipeline->device->physical->compiler;
727    const nir_shader_compiler_options *nir_options =
728       compiler->nir_options[stage->stage];
729 
730    stage->nir = anv_device_search_for_nir(pipeline->device, cache,
731                                           nir_options,
732                                           stage->shader_sha1,
733                                           mem_ctx);
734    if (stage->nir) {
735       assert(stage->nir->info.stage == stage->stage);
736       return VK_SUCCESS;
737    }
738 
739    /* VkPipelineShaderStageCreateInfo:
740     *
741     *    "If a pipeline is not found, pipeline compilation is not possible and
742     *     the implementation must fail as specified by
743     *     VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT."
744     */
745    if (vk_pipeline_shader_stage_has_identifier(stage->info))
746       return VK_PIPELINE_COMPILE_REQUIRED;
747 
748    stage->nir = anv_shader_stage_to_nir(pipeline->device,
749                                         stage->pipeline_flags, stage->info,
750                                         stage->key.base.robust_flags, mem_ctx);
751    if (stage->nir) {
752       anv_device_upload_nir(pipeline->device, cache,
753                             stage->nir, stage->shader_sha1);
754       return VK_SUCCESS;
755    }
756 
757    return vk_errorf(&pipeline->device->vk, VK_ERROR_UNKNOWN,
758                     "Unable to load NIR");
759 }
760 
761 static const struct vk_ycbcr_conversion_state *
lookup_ycbcr_conversion(const void * _sets_layout,uint32_t set,uint32_t binding,uint32_t array_index)762 lookup_ycbcr_conversion(const void *_sets_layout, uint32_t set,
763                         uint32_t binding, uint32_t array_index)
764 {
765    const struct anv_pipeline_sets_layout *sets_layout = _sets_layout;
766 
767    assert(set < MAX_SETS);
768    assert(binding < sets_layout->set[set].layout->binding_count);
769    const struct anv_descriptor_set_binding_layout *bind_layout =
770       &sets_layout->set[set].layout->binding[binding];
771 
772    if (bind_layout->immutable_samplers == NULL)
773       return NULL;
774 
775    array_index = MIN2(array_index, bind_layout->array_size - 1);
776 
777    const struct anv_sampler *sampler =
778       bind_layout->immutable_samplers[array_index];
779 
780    return sampler && sampler->vk.ycbcr_conversion ?
781           &sampler->vk.ycbcr_conversion->state : NULL;
782 }
783 
784 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)785 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
786 {
787    assert(glsl_type_is_vector_or_scalar(type));
788 
789    uint32_t comp_size = glsl_type_is_boolean(type)
790       ? 4 : glsl_get_bit_size(type) / 8;
791    unsigned length = glsl_get_vector_elements(type);
792    *size = comp_size * length,
793    *align = comp_size * (length == 3 ? 4 : length);
794 }
795 
796 static enum anv_dynamic_push_bits
anv_nir_compute_dynamic_push_bits(nir_shader * shader)797 anv_nir_compute_dynamic_push_bits(nir_shader *shader)
798 {
799    enum anv_dynamic_push_bits ret = 0;
800 
801    nir_foreach_function_impl(impl, shader) {
802       nir_foreach_block(block, impl) {
803          nir_foreach_instr(instr, block) {
804             if (instr->type != nir_instr_type_intrinsic)
805                continue;
806 
807             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
808             if (intrin->intrinsic != nir_intrinsic_load_push_constant)
809                continue;
810 
811             switch (nir_intrinsic_base(intrin)) {
812             case anv_drv_const_offset(gfx.tcs_input_vertices):
813                ret |= ANV_DYNAMIC_PUSH_INPUT_VERTICES;
814                break;
815 
816             default:
817                break;
818             }
819          }
820       }
821    }
822 
823    return ret;
824 }
825 
826 static void
anv_fixup_subgroup_size(struct anv_device * device,struct shader_info * info)827 anv_fixup_subgroup_size(struct anv_device *device, struct shader_info *info)
828 {
829    switch (info->stage) {
830    case MESA_SHADER_COMPUTE:
831    case MESA_SHADER_TASK:
832    case MESA_SHADER_MESH:
833       break;
834    default:
835       return;
836    }
837 
838    unsigned local_size = info->workgroup_size[0] *
839                          info->workgroup_size[1] *
840                          info->workgroup_size[2];
841 
842    /* Games don't always request full subgroups when they should,
843     * which can cause bugs, as they may expect bigger size of the
844     * subgroup than we choose for the execution.
845     */
846    if (device->physical->instance->assume_full_subgroups &&
847        info->uses_wide_subgroup_intrinsics &&
848        info->subgroup_size == SUBGROUP_SIZE_API_CONSTANT &&
849        local_size &&
850        local_size % BRW_SUBGROUP_SIZE == 0)
851       info->subgroup_size = SUBGROUP_SIZE_FULL_SUBGROUPS;
852 
853    if (device->physical->instance->assume_full_subgroups_with_barrier &&
854        info->stage == MESA_SHADER_COMPUTE &&
855        device->info->verx10 <= 125 &&
856        info->uses_control_barrier &&
857        info->subgroup_size == SUBGROUP_SIZE_VARYING &&
858        local_size &&
859        local_size % BRW_SUBGROUP_SIZE == 0)
860       info->subgroup_size = SUBGROUP_SIZE_FULL_SUBGROUPS;
861 
862    /* If the client requests that we dispatch full subgroups but doesn't
863     * allow us to pick a subgroup size, we have to smash it to the API
864     * value of 32.  Performance will likely be terrible in this case but
865     * there's nothing we can do about that.  The client should have chosen
866     * a size.
867     */
868    if (info->subgroup_size == SUBGROUP_SIZE_FULL_SUBGROUPS)
869       info->subgroup_size =
870          device->physical->instance->assume_full_subgroups != 0 ?
871          device->physical->instance->assume_full_subgroups : BRW_SUBGROUP_SIZE;
872 
873    /* Cooperative matrix extension requires that all invocations in a subgroup
874     * be active. As a result, when the application does not request a specific
875     * subgroup size, we must use SIMD32.
876     */
877    if (info->stage == MESA_SHADER_COMPUTE && info->cs.has_cooperative_matrix &&
878        info->subgroup_size < SUBGROUP_SIZE_REQUIRE_8) {
879       info->subgroup_size = BRW_SUBGROUP_SIZE;
880    }
881 }
882 
883 /* #define DEBUG_PRINTF_EXAMPLE 0 */
884 
885 #if DEBUG_PRINTF_EXAMPLE
886 static bool
print_ubo_load(nir_builder * b,nir_intrinsic_instr * intrin,UNUSED void * cb_data)887 print_ubo_load(nir_builder *b,
888                nir_intrinsic_instr *intrin,
889                UNUSED void *cb_data)
890 {
891    if (intrin->intrinsic != nir_intrinsic_load_uniform)
892       return false;
893 
894    b->cursor = nir_after_instr(&intrin->instr);
895    nir_printf_fmt(b, true, 64,
896                   "uniform<= pos=%02.2fx%02.2f offset=0x%08x val=0x%08x\n",
897                   nir_channel(b, nir_load_frag_coord(b), 0),
898                   nir_channel(b, nir_load_frag_coord(b), 1),
899                   intrin->src[0].ssa,
900                   &intrin->def);
901    return true;
902 }
903 #endif
904 
905 static void
anv_pipeline_lower_nir(struct anv_pipeline * pipeline,void * mem_ctx,struct anv_pipeline_stage * stage,struct anv_pipeline_sets_layout * layout,uint32_t view_mask,bool use_primitive_replication)906 anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
907                        void *mem_ctx,
908                        struct anv_pipeline_stage *stage,
909                        struct anv_pipeline_sets_layout *layout,
910                        uint32_t view_mask,
911                        bool use_primitive_replication)
912 {
913    const struct anv_physical_device *pdevice = pipeline->device->physical;
914    const struct brw_compiler *compiler = pdevice->compiler;
915 
916    struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
917    nir_shader *nir = stage->nir;
918 
919    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
920       NIR_PASS(_, nir, nir_lower_wpos_center);
921       NIR_PASS(_, nir, nir_lower_input_attachments,
922                &(nir_input_attachment_options) {
923                    .use_fragcoord_sysval = true,
924                    .use_layer_id_sysval = true,
925                });
926    }
927 
928    if (nir->info.stage == MESA_SHADER_MESH ||
929          nir->info.stage == MESA_SHADER_TASK) {
930       nir_lower_compute_system_values_options options = {
931             .lower_workgroup_id_to_index = true,
932             /* nir_lower_idiv generates expensive code */
933             .shortcut_1d_workgroup_id = compiler->devinfo->verx10 >= 125,
934       };
935 
936       NIR_PASS(_, nir, nir_lower_compute_system_values, &options);
937    }
938 
939    NIR_PASS(_, nir, nir_vk_lower_ycbcr_tex, lookup_ycbcr_conversion, layout);
940 
941    if (pipeline->type == ANV_PIPELINE_GRAPHICS ||
942        pipeline->type == ANV_PIPELINE_GRAPHICS_LIB) {
943       NIR_PASS(_, nir, anv_nir_lower_multiview, view_mask,
944                use_primitive_replication);
945    }
946 
947    if (nir->info.stage == MESA_SHADER_COMPUTE && nir->info.cs.has_cooperative_matrix) {
948       anv_fixup_subgroup_size(pipeline->device, &nir->info);
949       NIR_PASS(_, nir, brw_nir_lower_cmat, nir->info.subgroup_size);
950       NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_function_temp, 16);
951    }
952 
953    /* The patch control points are delivered through a push constant when
954     * dynamic.
955     */
956    if (nir->info.stage == MESA_SHADER_TESS_CTRL &&
957        stage->key.tcs.input_vertices == 0)
958       NIR_PASS(_, nir, anv_nir_lower_load_patch_vertices_in);
959 
960    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
961 
962    NIR_PASS(_, nir, brw_nir_lower_storage_image,
963             &(struct brw_nir_lower_storage_image_opts) {
964                /* Anv only supports Gfx9+ which has better defined typed read
965                 * behavior. It allows us to only have to care about lowering
966                 * loads.
967                 */
968                .devinfo = compiler->devinfo,
969                .lower_loads = true,
970             });
971 
972    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global,
973             nir_address_format_64bit_global);
974    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const,
975             nir_address_format_32bit_offset);
976 
977    NIR_PASS(_, nir, brw_nir_lower_ray_queries, &pdevice->info);
978 
979    stage->push_desc_info.used_descriptors =
980       anv_nir_compute_used_push_descriptors(nir, layout);
981 
982    struct anv_pipeline_push_map push_map = {};
983 
984    /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
985    NIR_PASS_V(nir, anv_nir_apply_pipeline_layout,
986               pdevice, stage->key.base.robust_flags,
987               layout->independent_sets,
988               layout, &stage->bind_map, &push_map, mem_ctx);
989 
990    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ubo,
991             anv_nir_ubo_addr_format(pdevice, stage->key.base.robust_flags));
992    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ssbo,
993             anv_nir_ssbo_addr_format(pdevice, stage->key.base.robust_flags));
994 
995    /* First run copy-prop to get rid of all of the vec() that address
996     * calculations often create and then constant-fold so that, when we
997     * get to anv_nir_lower_ubo_loads, we can detect constant offsets.
998     */
999    bool progress;
1000    do {
1001       progress = false;
1002       NIR_PASS(progress, nir, nir_opt_algebraic);
1003       NIR_PASS(progress, nir, nir_copy_prop);
1004       NIR_PASS(progress, nir, nir_opt_constant_folding);
1005       NIR_PASS(progress, nir, nir_opt_dce);
1006    } while (progress);
1007 
1008    /* Needed for anv_nir_lower_ubo_loads. */
1009    nir_divergence_analysis(nir);
1010 
1011    NIR_PASS(_, nir, anv_nir_lower_ubo_loads);
1012 
1013    enum nir_lower_non_uniform_access_type lower_non_uniform_access_types =
1014       nir_lower_non_uniform_texture_access |
1015       nir_lower_non_uniform_image_access |
1016       nir_lower_non_uniform_get_ssbo_size;
1017 
1018    /* In practice, most shaders do not have non-uniform-qualified
1019     * accesses (see
1020     * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17558#note_1475069)
1021     * thus a cheaper and likely to fail check is run first.
1022     */
1023    if (nir_has_non_uniform_access(nir, lower_non_uniform_access_types)) {
1024       NIR_PASS(_, nir, nir_opt_non_uniform_access);
1025 
1026       /* We don't support non-uniform UBOs and non-uniform SSBO access is
1027       * handled naturally by falling back to A64 messages.
1028       */
1029       NIR_PASS(_, nir, nir_lower_non_uniform_access,
1030                &(nir_lower_non_uniform_access_options) {
1031                   .types = lower_non_uniform_access_types,
1032                   .callback = NULL,
1033                });
1034 
1035       NIR_PASS(_, nir, intel_nir_lower_non_uniform_resource_intel);
1036       NIR_PASS(_, nir, intel_nir_cleanup_resource_intel);
1037       NIR_PASS(_, nir, nir_opt_dce);
1038    }
1039 
1040    NIR_PASS_V(nir, anv_nir_update_resource_intel_block);
1041 
1042    stage->dynamic_push_values = anv_nir_compute_dynamic_push_bits(nir);
1043 
1044    NIR_PASS_V(nir, anv_nir_compute_push_layout,
1045               pdevice, stage->key.base.robust_flags,
1046               anv_graphics_pipeline_stage_fragment_dynamic(stage),
1047               prog_data, &stage->bind_map, &push_map,
1048               pipeline->layout.type, mem_ctx);
1049 
1050    NIR_PASS_V(nir, anv_nir_lower_resource_intel, pdevice,
1051               pipeline->layout.type);
1052 
1053    if (gl_shader_stage_uses_workgroup(nir->info.stage)) {
1054       if (!nir->info.shared_memory_explicit_layout) {
1055          NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
1056                   nir_var_mem_shared, shared_type_info);
1057       }
1058 
1059       NIR_PASS(_, nir, nir_lower_explicit_io,
1060                nir_var_mem_shared, nir_address_format_32bit_offset);
1061 
1062       if (nir->info.zero_initialize_shared_memory &&
1063           nir->info.shared_size > 0) {
1064          /* The effective Shared Local Memory size is at least 1024 bytes and
1065           * is always rounded to a power of two, so it is OK to align the size
1066           * used by the shader to chunk_size -- which does simplify the logic.
1067           */
1068          const unsigned chunk_size = 16;
1069          const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
1070          assert(shared_size <=
1071                 intel_compute_slm_calculate_size(compiler->devinfo->ver, nir->info.shared_size));
1072 
1073          NIR_PASS(_, nir, nir_zero_initialize_shared_memory,
1074                   shared_size, chunk_size);
1075       }
1076    }
1077 
1078    if (gl_shader_stage_is_compute(nir->info.stage) ||
1079        gl_shader_stage_is_mesh(nir->info.stage)) {
1080       NIR_PASS(_, nir, brw_nir_lower_cs_intrinsics, compiler->devinfo,
1081                &stage->prog_data.cs);
1082    }
1083 
1084    stage->push_desc_info.used_set_buffer =
1085       anv_nir_loads_push_desc_buffer(nir, layout, &stage->bind_map);
1086    stage->push_desc_info.fully_promoted_ubo_descriptors =
1087       anv_nir_push_desc_ubo_fully_promoted(nir, layout, &stage->bind_map);
1088 
1089 #if DEBUG_PRINTF_EXAMPLE
1090    if (stage->stage == MESA_SHADER_FRAGMENT) {
1091       nir_shader_intrinsics_pass(nir, print_ubo_load,
1092                                  nir_metadata_none,
1093                                  NULL);
1094    }
1095 #endif
1096 
1097    stage->nir = nir;
1098 }
1099 
1100 static void
anv_pipeline_link_vs(const struct brw_compiler * compiler,struct anv_pipeline_stage * vs_stage,struct anv_pipeline_stage * next_stage)1101 anv_pipeline_link_vs(const struct brw_compiler *compiler,
1102                      struct anv_pipeline_stage *vs_stage,
1103                      struct anv_pipeline_stage *next_stage)
1104 {
1105    if (next_stage)
1106       brw_nir_link_shaders(compiler, vs_stage->nir, next_stage->nir);
1107 }
1108 
1109 static void
anv_pipeline_compile_vs(const struct brw_compiler * compiler,void * mem_ctx,struct anv_graphics_base_pipeline * pipeline,struct anv_pipeline_stage * vs_stage,uint32_t view_mask,char ** error_str)1110 anv_pipeline_compile_vs(const struct brw_compiler *compiler,
1111                         void *mem_ctx,
1112                         struct anv_graphics_base_pipeline *pipeline,
1113                         struct anv_pipeline_stage *vs_stage,
1114                         uint32_t view_mask,
1115                         char **error_str)
1116 {
1117    /* When using Primitive Replication for multiview, each view gets its own
1118     * position slot.
1119     */
1120    uint32_t pos_slots =
1121       (vs_stage->nir->info.per_view_outputs & VARYING_BIT_POS) ?
1122       MAX2(1, util_bitcount(view_mask)) : 1;
1123 
1124    /* Only position is allowed to be per-view */
1125    assert(!(vs_stage->nir->info.per_view_outputs & ~VARYING_BIT_POS));
1126 
1127    brw_compute_vue_map(compiler->devinfo,
1128                        &vs_stage->prog_data.vs.base.vue_map,
1129                        vs_stage->nir->info.outputs_written,
1130                        vs_stage->nir->info.separate_shader,
1131                        pos_slots);
1132 
1133    vs_stage->num_stats = 1;
1134 
1135    struct brw_compile_vs_params params = {
1136       .base = {
1137          .nir = vs_stage->nir,
1138          .stats = vs_stage->stats,
1139          .log_data = pipeline->base.device,
1140          .mem_ctx = mem_ctx,
1141          .source_hash = vs_stage->source_hash,
1142       },
1143       .key = &vs_stage->key.vs,
1144       .prog_data = &vs_stage->prog_data.vs,
1145    };
1146 
1147    vs_stage->code = brw_compile_vs(compiler, &params);
1148    *error_str = params.base.error_str;
1149 }
1150 
1151 static void
merge_tess_info(struct shader_info * tes_info,const struct shader_info * tcs_info)1152 merge_tess_info(struct shader_info *tes_info,
1153                 const struct shader_info *tcs_info)
1154 {
1155    /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
1156     *
1157     *    "PointMode. Controls generation of points rather than triangles
1158     *     or lines. This functionality defaults to disabled, and is
1159     *     enabled if either shader stage includes the execution mode.
1160     *
1161     * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
1162     * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
1163     * and OutputVertices, it says:
1164     *
1165     *    "One mode must be set in at least one of the tessellation
1166     *     shader stages."
1167     *
1168     * So, the fields can be set in either the TCS or TES, but they must
1169     * agree if set in both.  Our backend looks at TES, so bitwise-or in
1170     * the values from the TCS.
1171     */
1172    assert(tcs_info->tess.tcs_vertices_out == 0 ||
1173           tes_info->tess.tcs_vertices_out == 0 ||
1174           tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
1175    tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
1176 
1177    assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
1178           tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
1179           tcs_info->tess.spacing == tes_info->tess.spacing);
1180    tes_info->tess.spacing |= tcs_info->tess.spacing;
1181 
1182    assert(tcs_info->tess._primitive_mode == 0 ||
1183           tes_info->tess._primitive_mode == 0 ||
1184           tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode);
1185    tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode;
1186    tes_info->tess.ccw |= tcs_info->tess.ccw;
1187    tes_info->tess.point_mode |= tcs_info->tess.point_mode;
1188 }
1189 
1190 static void
anv_pipeline_link_tcs(const struct brw_compiler * compiler,struct anv_pipeline_stage * tcs_stage,struct anv_pipeline_stage * tes_stage)1191 anv_pipeline_link_tcs(const struct brw_compiler *compiler,
1192                       struct anv_pipeline_stage *tcs_stage,
1193                       struct anv_pipeline_stage *tes_stage)
1194 {
1195    assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL);
1196 
1197    brw_nir_link_shaders(compiler, tcs_stage->nir, tes_stage->nir);
1198 
1199    nir_lower_patch_vertices(tes_stage->nir,
1200                             tcs_stage->nir->info.tess.tcs_vertices_out,
1201                             NULL);
1202 
1203    /* Copy TCS info into the TES info */
1204    merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
1205 
1206    /* Whacking the key after cache lookup is a bit sketchy, but all of
1207     * this comes from the SPIR-V, which is part of the hash used for the
1208     * pipeline cache.  So it should be safe.
1209     */
1210    tcs_stage->key.tcs._tes_primitive_mode =
1211       tes_stage->nir->info.tess._primitive_mode;
1212 }
1213 
1214 static void
anv_pipeline_compile_tcs(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * tcs_stage,struct anv_pipeline_stage * prev_stage,char ** error_str)1215 anv_pipeline_compile_tcs(const struct brw_compiler *compiler,
1216                          void *mem_ctx,
1217                          struct anv_device *device,
1218                          struct anv_pipeline_stage *tcs_stage,
1219                          struct anv_pipeline_stage *prev_stage,
1220                          char **error_str)
1221 {
1222    tcs_stage->key.tcs.outputs_written =
1223       tcs_stage->nir->info.outputs_written;
1224    tcs_stage->key.tcs.patch_outputs_written =
1225       tcs_stage->nir->info.patch_outputs_written;
1226 
1227    tcs_stage->num_stats = 1;
1228 
1229    struct brw_compile_tcs_params params = {
1230       .base = {
1231          .nir = tcs_stage->nir,
1232          .stats = tcs_stage->stats,
1233          .log_data = device,
1234          .mem_ctx = mem_ctx,
1235          .source_hash = tcs_stage->source_hash,
1236       },
1237       .key = &tcs_stage->key.tcs,
1238       .prog_data = &tcs_stage->prog_data.tcs,
1239    };
1240 
1241    tcs_stage->code = brw_compile_tcs(compiler, &params);
1242    *error_str = params.base.error_str;
1243 }
1244 
1245 static void
anv_pipeline_link_tes(const struct brw_compiler * compiler,struct anv_pipeline_stage * tes_stage,struct anv_pipeline_stage * next_stage)1246 anv_pipeline_link_tes(const struct brw_compiler *compiler,
1247                       struct anv_pipeline_stage *tes_stage,
1248                       struct anv_pipeline_stage *next_stage)
1249 {
1250    if (next_stage)
1251       brw_nir_link_shaders(compiler, tes_stage->nir, next_stage->nir);
1252 }
1253 
1254 static void
anv_pipeline_compile_tes(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * tes_stage,struct anv_pipeline_stage * tcs_stage,char ** error_str)1255 anv_pipeline_compile_tes(const struct brw_compiler *compiler,
1256                          void *mem_ctx,
1257                          struct anv_device *device,
1258                          struct anv_pipeline_stage *tes_stage,
1259                          struct anv_pipeline_stage *tcs_stage,
1260                          char **error_str)
1261 {
1262    tes_stage->key.tes.inputs_read =
1263       tcs_stage->nir->info.outputs_written;
1264    tes_stage->key.tes.patch_inputs_read =
1265       tcs_stage->nir->info.patch_outputs_written;
1266 
1267    tes_stage->num_stats = 1;
1268 
1269    struct brw_compile_tes_params params = {
1270       .base = {
1271          .nir = tes_stage->nir,
1272          .stats = tes_stage->stats,
1273          .log_data = device,
1274          .mem_ctx = mem_ctx,
1275          .source_hash = tes_stage->source_hash,
1276       },
1277       .key = &tes_stage->key.tes,
1278       .prog_data = &tes_stage->prog_data.tes,
1279       .input_vue_map = &tcs_stage->prog_data.tcs.base.vue_map,
1280    };
1281 
1282    tes_stage->code = brw_compile_tes(compiler, &params);
1283    *error_str = params.base.error_str;
1284 }
1285 
1286 static void
anv_pipeline_link_gs(const struct brw_compiler * compiler,struct anv_pipeline_stage * gs_stage,struct anv_pipeline_stage * next_stage)1287 anv_pipeline_link_gs(const struct brw_compiler *compiler,
1288                      struct anv_pipeline_stage *gs_stage,
1289                      struct anv_pipeline_stage *next_stage)
1290 {
1291    if (next_stage)
1292       brw_nir_link_shaders(compiler, gs_stage->nir, next_stage->nir);
1293 }
1294 
1295 static void
anv_pipeline_compile_gs(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * gs_stage,struct anv_pipeline_stage * prev_stage,char ** error_str)1296 anv_pipeline_compile_gs(const struct brw_compiler *compiler,
1297                         void *mem_ctx,
1298                         struct anv_device *device,
1299                         struct anv_pipeline_stage *gs_stage,
1300                         struct anv_pipeline_stage *prev_stage,
1301                         char **error_str)
1302 {
1303    brw_compute_vue_map(compiler->devinfo,
1304                        &gs_stage->prog_data.gs.base.vue_map,
1305                        gs_stage->nir->info.outputs_written,
1306                        gs_stage->nir->info.separate_shader, 1);
1307 
1308    gs_stage->num_stats = 1;
1309 
1310    struct brw_compile_gs_params params = {
1311       .base = {
1312          .nir = gs_stage->nir,
1313          .stats = gs_stage->stats,
1314          .log_data = device,
1315          .mem_ctx = mem_ctx,
1316          .source_hash = gs_stage->source_hash,
1317       },
1318       .key = &gs_stage->key.gs,
1319       .prog_data = &gs_stage->prog_data.gs,
1320    };
1321 
1322    gs_stage->code = brw_compile_gs(compiler, &params);
1323    *error_str = params.base.error_str;
1324 }
1325 
1326 static void
anv_pipeline_link_task(const struct brw_compiler * compiler,struct anv_pipeline_stage * task_stage,struct anv_pipeline_stage * next_stage)1327 anv_pipeline_link_task(const struct brw_compiler *compiler,
1328                        struct anv_pipeline_stage *task_stage,
1329                        struct anv_pipeline_stage *next_stage)
1330 {
1331    assert(next_stage);
1332    assert(next_stage->stage == MESA_SHADER_MESH);
1333    brw_nir_link_shaders(compiler, task_stage->nir, next_stage->nir);
1334 }
1335 
1336 static void
anv_pipeline_compile_task(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * task_stage,char ** error_str)1337 anv_pipeline_compile_task(const struct brw_compiler *compiler,
1338                           void *mem_ctx,
1339                           struct anv_device *device,
1340                           struct anv_pipeline_stage *task_stage,
1341                           char **error_str)
1342 {
1343    task_stage->num_stats = 1;
1344 
1345    struct brw_compile_task_params params = {
1346       .base = {
1347          .nir = task_stage->nir,
1348          .stats = task_stage->stats,
1349          .log_data = device,
1350          .mem_ctx = mem_ctx,
1351          .source_hash = task_stage->source_hash,
1352       },
1353       .key = &task_stage->key.task,
1354       .prog_data = &task_stage->prog_data.task,
1355    };
1356 
1357    task_stage->code = brw_compile_task(compiler, &params);
1358    *error_str = params.base.error_str;
1359 }
1360 
1361 static void
anv_pipeline_link_mesh(const struct brw_compiler * compiler,struct anv_pipeline_stage * mesh_stage,struct anv_pipeline_stage * next_stage)1362 anv_pipeline_link_mesh(const struct brw_compiler *compiler,
1363                        struct anv_pipeline_stage *mesh_stage,
1364                        struct anv_pipeline_stage *next_stage)
1365 {
1366    if (next_stage) {
1367       brw_nir_link_shaders(compiler, mesh_stage->nir, next_stage->nir);
1368    }
1369 }
1370 
1371 static void
anv_pipeline_compile_mesh(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * mesh_stage,struct anv_pipeline_stage * prev_stage,char ** error_str)1372 anv_pipeline_compile_mesh(const struct brw_compiler *compiler,
1373                           void *mem_ctx,
1374                           struct anv_device *device,
1375                           struct anv_pipeline_stage *mesh_stage,
1376                           struct anv_pipeline_stage *prev_stage,
1377                           char **error_str)
1378 {
1379    mesh_stage->num_stats = 1;
1380 
1381    struct brw_compile_mesh_params params = {
1382       .base = {
1383          .nir = mesh_stage->nir,
1384          .stats = mesh_stage->stats,
1385          .log_data = device,
1386          .mem_ctx = mem_ctx,
1387          .source_hash = mesh_stage->source_hash,
1388       },
1389       .key = &mesh_stage->key.mesh,
1390       .prog_data = &mesh_stage->prog_data.mesh,
1391    };
1392 
1393    if (prev_stage) {
1394       assert(prev_stage->stage == MESA_SHADER_TASK);
1395       params.tue_map = &prev_stage->prog_data.task.map;
1396    }
1397 
1398    mesh_stage->code = brw_compile_mesh(compiler, &params);
1399    *error_str = params.base.error_str;
1400 }
1401 
1402 static void
anv_pipeline_link_fs(const struct brw_compiler * compiler,struct anv_pipeline_stage * stage,const struct vk_render_pass_state * rp)1403 anv_pipeline_link_fs(const struct brw_compiler *compiler,
1404                      struct anv_pipeline_stage *stage,
1405                      const struct vk_render_pass_state *rp)
1406 {
1407    /* Initially the valid outputs value is set to all possible render targets
1408     * valid (see populate_wm_prog_key()), before we look at the shader
1409     * variables. Here we look at the output variables of the shader an compute
1410     * a correct number of render target outputs.
1411     */
1412    stage->key.wm.color_outputs_valid = 0;
1413    nir_foreach_shader_out_variable_safe(var, stage->nir) {
1414       if (var->data.location < FRAG_RESULT_DATA0)
1415          continue;
1416 
1417       const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
1418       const unsigned array_len =
1419          glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
1420       assert(rt + array_len <= MAX_RTS);
1421 
1422       stage->key.wm.color_outputs_valid |= BITFIELD_RANGE(rt, array_len);
1423    }
1424    stage->key.wm.color_outputs_valid &= rp_color_mask(rp);
1425    stage->key.wm.nr_color_regions =
1426       util_last_bit(stage->key.wm.color_outputs_valid);
1427 
1428    unsigned num_rt_bindings;
1429    struct anv_pipeline_binding rt_bindings[MAX_RTS];
1430    if (stage->key.wm.nr_color_regions > 0) {
1431       assert(stage->key.wm.nr_color_regions <= MAX_RTS);
1432       for (unsigned rt = 0; rt < stage->key.wm.nr_color_regions; rt++) {
1433          if (stage->key.wm.color_outputs_valid & BITFIELD_BIT(rt)) {
1434             rt_bindings[rt] = (struct anv_pipeline_binding) {
1435                .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1436                .index = rt,
1437                .binding = UINT32_MAX,
1438 
1439             };
1440          } else {
1441             /* Setup a null render target */
1442             rt_bindings[rt] = (struct anv_pipeline_binding) {
1443                .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1444                .index = ANV_COLOR_OUTPUT_UNUSED,
1445                .binding = UINT32_MAX,
1446             };
1447          }
1448       }
1449       num_rt_bindings = stage->key.wm.nr_color_regions;
1450    } else if (brw_nir_fs_needs_null_rt(
1451                  compiler->devinfo, stage->nir,
1452                  stage->key.wm.multisample_fbo != INTEL_NEVER,
1453                  stage->key.wm.alpha_to_coverage != INTEL_NEVER)) {
1454       /* Setup a null render target */
1455       rt_bindings[0] = (struct anv_pipeline_binding) {
1456          .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1457          .index = ANV_COLOR_OUTPUT_DISABLED,
1458          .binding = UINT32_MAX,
1459       };
1460       num_rt_bindings = 1;
1461    } else {
1462       num_rt_bindings = 0;
1463    }
1464 
1465    assert(num_rt_bindings <= MAX_RTS);
1466    assert(stage->bind_map.surface_count == 0);
1467    typed_memcpy(stage->bind_map.surface_to_descriptor,
1468                 rt_bindings, num_rt_bindings);
1469    stage->bind_map.surface_count += num_rt_bindings;
1470 }
1471 
1472 static void
anv_pipeline_compile_fs(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * fs_stage,struct anv_pipeline_stage * prev_stage,struct anv_graphics_base_pipeline * pipeline,uint32_t view_mask,bool use_primitive_replication,char ** error_str)1473 anv_pipeline_compile_fs(const struct brw_compiler *compiler,
1474                         void *mem_ctx,
1475                         struct anv_device *device,
1476                         struct anv_pipeline_stage *fs_stage,
1477                         struct anv_pipeline_stage *prev_stage,
1478                         struct anv_graphics_base_pipeline *pipeline,
1479                         uint32_t view_mask,
1480                         bool use_primitive_replication,
1481                         char **error_str)
1482 {
1483    /* When using Primitive Replication for multiview, each view gets its own
1484     * position slot.
1485     */
1486    uint32_t pos_slots = use_primitive_replication ?
1487       MAX2(1, util_bitcount(view_mask)) : 1;
1488 
1489    /* If we have a previous stage we can use that to deduce valid slots.
1490     * Otherwise, rely on inputs of the input shader.
1491     */
1492    if (prev_stage) {
1493       fs_stage->key.wm.input_slots_valid =
1494          prev_stage->prog_data.vue.vue_map.slots_valid;
1495    } else {
1496       struct intel_vue_map prev_vue_map;
1497       brw_compute_vue_map(compiler->devinfo,
1498                           &prev_vue_map,
1499                           fs_stage->nir->info.inputs_read,
1500                           fs_stage->nir->info.separate_shader,
1501                           pos_slots);
1502 
1503       fs_stage->key.wm.input_slots_valid = prev_vue_map.slots_valid;
1504    }
1505 
1506    struct brw_compile_fs_params params = {
1507       .base = {
1508          .nir = fs_stage->nir,
1509          .stats = fs_stage->stats,
1510          .log_data = device,
1511          .mem_ctx = mem_ctx,
1512          .source_hash = fs_stage->source_hash,
1513       },
1514       .key = &fs_stage->key.wm,
1515       .prog_data = &fs_stage->prog_data.wm,
1516 
1517       .allow_spilling = true,
1518       .max_polygons = UCHAR_MAX,
1519    };
1520 
1521    if (prev_stage && prev_stage->stage == MESA_SHADER_MESH) {
1522       params.mue_map = &prev_stage->prog_data.mesh.map;
1523       /* TODO(mesh): Slots valid, do we even use/rely on it? */
1524    }
1525 
1526    fs_stage->code = brw_compile_fs(compiler, &params);
1527    *error_str = params.base.error_str;
1528 
1529    fs_stage->num_stats = (uint32_t)!!fs_stage->prog_data.wm.dispatch_multi +
1530                          (uint32_t)fs_stage->prog_data.wm.dispatch_8 +
1531                          (uint32_t)fs_stage->prog_data.wm.dispatch_16 +
1532                          (uint32_t)fs_stage->prog_data.wm.dispatch_32;
1533    assert(fs_stage->num_stats <= ARRAY_SIZE(fs_stage->stats));
1534 }
1535 
1536 static void
anv_pipeline_add_executable(struct anv_pipeline * pipeline,struct anv_pipeline_stage * stage,struct brw_compile_stats * stats,uint32_t code_offset)1537 anv_pipeline_add_executable(struct anv_pipeline *pipeline,
1538                             struct anv_pipeline_stage *stage,
1539                             struct brw_compile_stats *stats,
1540                             uint32_t code_offset)
1541 {
1542    char *nir = NULL;
1543    if (stage->nir &&
1544        (pipeline->flags &
1545         VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
1546       nir = nir_shader_as_str(stage->nir, pipeline->mem_ctx);
1547    }
1548 
1549    char *disasm = NULL;
1550    if (stage->code &&
1551        (pipeline->flags &
1552         VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
1553       char *stream_data = NULL;
1554       size_t stream_size = 0;
1555       FILE *stream = open_memstream(&stream_data, &stream_size);
1556 
1557       uint32_t push_size = 0;
1558       for (unsigned i = 0; i < 4; i++)
1559          push_size += stage->bind_map.push_ranges[i].length;
1560       if (push_size > 0) {
1561          fprintf(stream, "Push constant ranges:\n");
1562          for (unsigned i = 0; i < 4; i++) {
1563             if (stage->bind_map.push_ranges[i].length == 0)
1564                continue;
1565 
1566             fprintf(stream, "    RANGE%d (%dB): ", i,
1567                     stage->bind_map.push_ranges[i].length * 32);
1568 
1569             switch (stage->bind_map.push_ranges[i].set) {
1570             case ANV_DESCRIPTOR_SET_NULL:
1571                fprintf(stream, "NULL");
1572                break;
1573 
1574             case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS:
1575                fprintf(stream, "Vulkan push constants and API params");
1576                break;
1577 
1578             case ANV_DESCRIPTOR_SET_DESCRIPTORS_BUFFER:
1579                fprintf(stream, "Descriptor buffer (desc buffer) for set %d (start=%dB)",
1580                        stage->bind_map.push_ranges[i].index,
1581                        stage->bind_map.push_ranges[i].start * 32);
1582                break;
1583 
1584             case ANV_DESCRIPTOR_SET_DESCRIPTORS:
1585                fprintf(stream, "Descriptor buffer for set %d (start=%dB)",
1586                        stage->bind_map.push_ranges[i].index,
1587                        stage->bind_map.push_ranges[i].start * 32);
1588                break;
1589 
1590             case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:
1591                unreachable("Color attachments can't be pushed");
1592 
1593             default:
1594                fprintf(stream, "UBO (set=%d binding=%d start=%dB)",
1595                        stage->bind_map.push_ranges[i].set,
1596                        stage->bind_map.push_ranges[i].index,
1597                        stage->bind_map.push_ranges[i].start * 32);
1598                break;
1599             }
1600             fprintf(stream, "\n");
1601          }
1602          fprintf(stream, "\n");
1603       }
1604 
1605       /* Creating this is far cheaper than it looks.  It's perfectly fine to
1606        * do it for every binary.
1607        */
1608       brw_disassemble_with_errors(&pipeline->device->physical->compiler->isa,
1609                                   stage->code, code_offset, stream);
1610 
1611       fclose(stream);
1612 
1613       /* Copy it to a ralloc'd thing */
1614       disasm = ralloc_size(pipeline->mem_ctx, stream_size + 1);
1615       memcpy(disasm, stream_data, stream_size);
1616       disasm[stream_size] = 0;
1617 
1618       free(stream_data);
1619    }
1620 
1621    const struct anv_pipeline_executable exe = {
1622       .stage = stage->stage,
1623       .stats = *stats,
1624       .nir = nir,
1625       .disasm = disasm,
1626    };
1627    util_dynarray_append(&pipeline->executables,
1628                         struct anv_pipeline_executable, exe);
1629 }
1630 
1631 static void
anv_pipeline_add_executables(struct anv_pipeline * pipeline,struct anv_pipeline_stage * stage)1632 anv_pipeline_add_executables(struct anv_pipeline *pipeline,
1633                              struct anv_pipeline_stage *stage)
1634 {
1635    if (stage->stage == MESA_SHADER_FRAGMENT) {
1636       /* We pull the prog data and stats out of the anv_shader_bin because
1637        * the anv_pipeline_stage may not be fully populated if we successfully
1638        * looked up the shader in a cache.
1639        */
1640       const struct brw_wm_prog_data *wm_prog_data =
1641          (const struct brw_wm_prog_data *)stage->bin->prog_data;
1642       struct brw_compile_stats *stats = stage->bin->stats;
1643 
1644       if (wm_prog_data->dispatch_8 ||
1645           wm_prog_data->dispatch_multi) {
1646          anv_pipeline_add_executable(pipeline, stage, stats++, 0);
1647       }
1648 
1649       if (wm_prog_data->dispatch_16) {
1650          anv_pipeline_add_executable(pipeline, stage, stats++,
1651                                      wm_prog_data->prog_offset_16);
1652       }
1653 
1654       if (wm_prog_data->dispatch_32) {
1655          anv_pipeline_add_executable(pipeline, stage, stats++,
1656                                      wm_prog_data->prog_offset_32);
1657       }
1658    } else {
1659       anv_pipeline_add_executable(pipeline, stage, stage->bin->stats, 0);
1660    }
1661 }
1662 
1663 static void
anv_pipeline_account_shader(struct anv_pipeline * pipeline,struct anv_shader_bin * shader)1664 anv_pipeline_account_shader(struct anv_pipeline *pipeline,
1665                             struct anv_shader_bin *shader)
1666 {
1667    pipeline->scratch_size = MAX2(pipeline->scratch_size,
1668                                  shader->prog_data->total_scratch);
1669 
1670    pipeline->ray_queries = MAX2(pipeline->ray_queries,
1671                                 shader->prog_data->ray_queries);
1672 
1673    if (shader->push_desc_info.used_set_buffer) {
1674       pipeline->use_push_descriptor_buffer |=
1675          mesa_to_vk_shader_stage(shader->stage);
1676    }
1677    if (shader->push_desc_info.used_descriptors &
1678        ~shader->push_desc_info.fully_promoted_ubo_descriptors)
1679       pipeline->use_push_descriptor |= mesa_to_vk_shader_stage(shader->stage);
1680 }
1681 
1682 /* This function return true if a shader should not be looked at because of
1683  * fast linking. Instead we should use the shader binaries provided by
1684  * libraries.
1685  */
1686 static bool
anv_graphics_pipeline_skip_shader_compile(struct anv_graphics_base_pipeline * pipeline,struct anv_pipeline_stage * stages,bool link_optimize,gl_shader_stage stage)1687 anv_graphics_pipeline_skip_shader_compile(struct anv_graphics_base_pipeline *pipeline,
1688                                           struct anv_pipeline_stage *stages,
1689                                           bool link_optimize,
1690                                           gl_shader_stage stage)
1691 {
1692    /* Always skip non active stages */
1693    if (!anv_pipeline_base_has_stage(pipeline, stage))
1694       return true;
1695 
1696    /* When link optimizing, consider all stages */
1697    if (link_optimize)
1698       return false;
1699 
1700    /* Otherwise check if the stage was specified through
1701     * VkGraphicsPipelineCreateInfo
1702     */
1703    assert(stages[stage].info != NULL || stages[stage].imported.bin != NULL);
1704    return stages[stage].info == NULL;
1705 }
1706 
1707 static void
anv_graphics_pipeline_init_keys(struct anv_graphics_base_pipeline * pipeline,const struct vk_graphics_pipeline_state * state,struct anv_pipeline_stage * stages)1708 anv_graphics_pipeline_init_keys(struct anv_graphics_base_pipeline *pipeline,
1709                                 const struct vk_graphics_pipeline_state *state,
1710                                 struct anv_pipeline_stage *stages)
1711 {
1712    for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
1713       if (!anv_pipeline_base_has_stage(pipeline, s))
1714          continue;
1715 
1716       int64_t stage_start = os_time_get_nano();
1717 
1718       const struct anv_device *device = pipeline->base.device;
1719       switch (stages[s].stage) {
1720       case MESA_SHADER_VERTEX:
1721          populate_vs_prog_key(&stages[s], device);
1722          break;
1723       case MESA_SHADER_TESS_CTRL:
1724          populate_tcs_prog_key(&stages[s],
1725                                device,
1726                                BITSET_TEST(state->dynamic,
1727                                            MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS) ?
1728                                0 : state->ts->patch_control_points);
1729          break;
1730       case MESA_SHADER_TESS_EVAL:
1731          populate_tes_prog_key(&stages[s], device);
1732          break;
1733       case MESA_SHADER_GEOMETRY:
1734          populate_gs_prog_key(&stages[s], device);
1735          break;
1736       case MESA_SHADER_FRAGMENT: {
1737          /* Assume rasterization enabled in any of the following case :
1738           *
1739           *    - We're a pipeline library without pre-rasterization information
1740           *
1741           *    - Rasterization is not disabled in the non dynamic state
1742           *
1743           *    - Rasterization disable is dynamic
1744           */
1745          const bool raster_enabled =
1746             state->rs == NULL ||
1747             !state->rs->rasterizer_discard_enable ||
1748             BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE);
1749          enum intel_sometimes is_mesh = INTEL_NEVER;
1750          if (device->vk.enabled_extensions.EXT_mesh_shader) {
1751             if (anv_pipeline_base_has_stage(pipeline, MESA_SHADER_VERTEX))
1752                is_mesh = INTEL_NEVER;
1753             else if (anv_pipeline_base_has_stage(pipeline, MESA_SHADER_MESH))
1754                is_mesh = INTEL_ALWAYS;
1755             else {
1756                assert(pipeline->base.type == ANV_PIPELINE_GRAPHICS_LIB);
1757                is_mesh = INTEL_SOMETIMES;
1758             }
1759          }
1760          populate_wm_prog_key(&stages[s],
1761                               pipeline,
1762                               state->dynamic,
1763                               raster_enabled ? state->ms : NULL,
1764                               state->fsr, state->rp, is_mesh);
1765          break;
1766       }
1767 
1768       case MESA_SHADER_TASK:
1769          populate_task_prog_key(&stages[s], device);
1770          break;
1771 
1772       case MESA_SHADER_MESH: {
1773          const bool compact_mue =
1774             !(pipeline->base.type == ANV_PIPELINE_GRAPHICS_LIB &&
1775               !anv_pipeline_base_has_stage(pipeline, MESA_SHADER_FRAGMENT));
1776          populate_mesh_prog_key(&stages[s], device, compact_mue);
1777          break;
1778       }
1779 
1780       default:
1781          unreachable("Invalid graphics shader stage");
1782       }
1783 
1784       stages[s].feedback.duration += os_time_get_nano() - stage_start;
1785       stages[s].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
1786    }
1787 }
1788 
1789 static void
anv_graphics_lib_retain_shaders(struct anv_graphics_base_pipeline * pipeline,struct anv_pipeline_stage * stages,bool will_compile)1790 anv_graphics_lib_retain_shaders(struct anv_graphics_base_pipeline *pipeline,
1791                                 struct anv_pipeline_stage *stages,
1792                                 bool will_compile)
1793 {
1794    /* There isn't much point in retaining NIR shaders on final pipelines. */
1795    assert(pipeline->base.type == ANV_PIPELINE_GRAPHICS_LIB);
1796 
1797    struct anv_graphics_lib_pipeline *lib = (struct anv_graphics_lib_pipeline *) pipeline;
1798 
1799    for (int s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1800       if (!anv_pipeline_base_has_stage(pipeline, s))
1801          continue;
1802 
1803       memcpy(lib->retained_shaders[s].shader_sha1, stages[s].shader_sha1,
1804              sizeof(stages[s].shader_sha1));
1805 
1806       lib->retained_shaders[s].subgroup_size_type = stages[s].subgroup_size_type;
1807 
1808       nir_shader *nir = stages[s].nir != NULL ? stages[s].nir : stages[s].imported.nir;
1809       assert(nir != NULL);
1810 
1811       if (!will_compile) {
1812          lib->retained_shaders[s].nir = nir;
1813       } else {
1814          lib->retained_shaders[s].nir =
1815             nir_shader_clone(pipeline->base.mem_ctx, nir);
1816       }
1817    }
1818 }
1819 
1820 static bool
anv_graphics_pipeline_load_cached_shaders(struct anv_graphics_base_pipeline * pipeline,struct vk_pipeline_cache * cache,struct anv_pipeline_stage * stages,bool link_optimize,VkPipelineCreationFeedback * pipeline_feedback)1821 anv_graphics_pipeline_load_cached_shaders(struct anv_graphics_base_pipeline *pipeline,
1822                                           struct vk_pipeline_cache *cache,
1823                                           struct anv_pipeline_stage *stages,
1824                                           bool link_optimize,
1825                                           VkPipelineCreationFeedback *pipeline_feedback)
1826 {
1827    struct anv_device *device = pipeline->base.device;
1828    unsigned cache_hits = 0, found = 0, imported = 0;
1829 
1830    for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1831       if (!anv_pipeline_base_has_stage(pipeline, s))
1832          continue;
1833 
1834       int64_t stage_start = os_time_get_nano();
1835 
1836       bool cache_hit;
1837       stages[s].bin =
1838          anv_device_search_for_kernel(device, cache, &stages[s].cache_key,
1839                                       sizeof(stages[s].cache_key), &cache_hit);
1840       if (stages[s].bin) {
1841          found++;
1842          pipeline->shaders[s] = stages[s].bin;
1843       }
1844 
1845       if (cache_hit) {
1846          cache_hits++;
1847          stages[s].feedback.flags |=
1848             VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1849       }
1850       stages[s].feedback.duration += os_time_get_nano() - stage_start;
1851    }
1852 
1853    /* When not link optimizing, lookup the missing shader in the imported
1854     * libraries.
1855     */
1856    if (!link_optimize) {
1857       for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1858          if (!anv_pipeline_base_has_stage(pipeline, s))
1859             continue;
1860 
1861          if (pipeline->shaders[s] != NULL)
1862             continue;
1863 
1864          if (stages[s].imported.bin == NULL)
1865             continue;
1866 
1867          stages[s].bin = stages[s].imported.bin;
1868          pipeline->shaders[s] = anv_shader_bin_ref(stages[s].imported.bin);
1869          pipeline->source_hashes[s] = stages[s].source_hash;
1870          imported++;
1871       }
1872    }
1873 
1874    if ((found + imported) == __builtin_popcount(pipeline->base.active_stages)) {
1875       if (cache_hits == found && found != 0) {
1876          pipeline_feedback->flags |=
1877             VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1878       }
1879       /* We found all our shaders in the cache.  We're done. */
1880       for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1881          if (pipeline->shaders[s] == NULL)
1882             continue;
1883 
1884          /* Only add the executables when we're not importing or doing link
1885           * optimizations. The imported executables are added earlier. Link
1886           * optimization can produce different binaries.
1887           */
1888          if (stages[s].imported.bin == NULL || link_optimize)
1889             anv_pipeline_add_executables(&pipeline->base, &stages[s]);
1890          pipeline->source_hashes[s] = stages[s].source_hash;
1891       }
1892       return true;
1893    } else if (found > 0) {
1894       /* We found some but not all of our shaders. This shouldn't happen most
1895        * of the time but it can if we have a partially populated pipeline
1896        * cache.
1897        */
1898       assert(found < __builtin_popcount(pipeline->base.active_stages));
1899 
1900       /* With GPL, this might well happen if the app does an optimized
1901        * link.
1902        */
1903       if (!pipeline->base.device->vk.enabled_extensions.EXT_graphics_pipeline_library) {
1904          vk_perf(VK_LOG_OBJS(cache ? &cache->base :
1905                              &pipeline->base.device->vk.base),
1906                  "Found a partial pipeline in the cache.  This is "
1907                  "most likely caused by an incomplete pipeline cache "
1908                  "import or export");
1909       }
1910 
1911       /* We're going to have to recompile anyway, so just throw away our
1912        * references to the shaders in the cache.  We'll get them out of the
1913        * cache again as part of the compilation process.
1914        */
1915       for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1916          stages[s].feedback.flags = 0;
1917          if (pipeline->shaders[s]) {
1918             anv_shader_bin_unref(device, pipeline->shaders[s]);
1919             pipeline->shaders[s] = NULL;
1920          }
1921       }
1922    }
1923 
1924    return false;
1925 }
1926 
1927 static const gl_shader_stage graphics_shader_order[] = {
1928    MESA_SHADER_VERTEX,
1929    MESA_SHADER_TESS_CTRL,
1930    MESA_SHADER_TESS_EVAL,
1931    MESA_SHADER_GEOMETRY,
1932 
1933    MESA_SHADER_TASK,
1934    MESA_SHADER_MESH,
1935 
1936    MESA_SHADER_FRAGMENT,
1937 };
1938 
1939 /* This function loads NIR only for stages specified in
1940  * VkGraphicsPipelineCreateInfo::pStages[]
1941  */
1942 static VkResult
anv_graphics_pipeline_load_nir(struct anv_graphics_base_pipeline * pipeline,struct vk_pipeline_cache * cache,struct anv_pipeline_stage * stages,void * mem_ctx,bool need_clone)1943 anv_graphics_pipeline_load_nir(struct anv_graphics_base_pipeline *pipeline,
1944                                struct vk_pipeline_cache *cache,
1945                                struct anv_pipeline_stage *stages,
1946                                void *mem_ctx,
1947                                bool need_clone)
1948 {
1949    for (unsigned s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
1950       if (!anv_pipeline_base_has_stage(pipeline, s))
1951          continue;
1952 
1953       int64_t stage_start = os_time_get_nano();
1954 
1955       assert(stages[s].stage == s);
1956 
1957       /* Only use the create NIR from the pStages[] element if we don't have
1958        * an imported library for the same stage.
1959        */
1960       if (stages[s].imported.bin == NULL) {
1961          VkResult result = anv_pipeline_stage_get_nir(&pipeline->base, cache,
1962                                                       mem_ctx, &stages[s]);
1963          if (result != VK_SUCCESS)
1964             return result;
1965       } else {
1966          stages[s].nir = need_clone ?
1967                          nir_shader_clone(mem_ctx, stages[s].imported.nir) :
1968                          stages[s].imported.nir;
1969       }
1970 
1971       stages[s].feedback.duration += os_time_get_nano() - stage_start;
1972    }
1973 
1974    return VK_SUCCESS;
1975 }
1976 
1977 static void
anv_pipeline_nir_preprocess(struct anv_pipeline * pipeline,struct anv_pipeline_stage * stage)1978 anv_pipeline_nir_preprocess(struct anv_pipeline *pipeline,
1979                             struct anv_pipeline_stage *stage)
1980 {
1981    struct anv_device *device = pipeline->device;
1982    const struct brw_compiler *compiler = device->physical->compiler;
1983 
1984    const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
1985       .point_coord = true,
1986    };
1987    NIR_PASS(_, stage->nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
1988 
1989    const nir_opt_access_options opt_access_options = {
1990       .is_vulkan = true,
1991    };
1992    NIR_PASS(_, stage->nir, nir_opt_access, &opt_access_options);
1993 
1994    /* Vulkan uses the separate-shader linking model */
1995    stage->nir->info.separate_shader = true;
1996 
1997    struct brw_nir_compiler_opts opts = {
1998       .softfp64 = device->fp64_nir,
1999       /* Assume robustness with EXT_pipeline_robustness because this can be
2000        * turned on/off per pipeline and we have no visibility on this here.
2001        */
2002       .robust_image_access = device->vk.enabled_features.robustImageAccess ||
2003                              device->vk.enabled_features.robustImageAccess2 ||
2004                              device->vk.enabled_extensions.EXT_pipeline_robustness,
2005       .input_vertices = stage->nir->info.stage == MESA_SHADER_TESS_CTRL ?
2006                         stage->key.tcs.input_vertices : 0,
2007    };
2008    brw_preprocess_nir(compiler, stage->nir, &opts);
2009 
2010    NIR_PASS(_, stage->nir, nir_opt_barrier_modes);
2011 
2012    nir_shader_gather_info(stage->nir, nir_shader_get_entrypoint(stage->nir));
2013 }
2014 
2015 static void
anv_fill_pipeline_creation_feedback(const struct anv_graphics_base_pipeline * pipeline,VkPipelineCreationFeedback * pipeline_feedback,const VkGraphicsPipelineCreateInfo * info,struct anv_pipeline_stage * stages)2016 anv_fill_pipeline_creation_feedback(const struct anv_graphics_base_pipeline *pipeline,
2017                                     VkPipelineCreationFeedback *pipeline_feedback,
2018                                     const VkGraphicsPipelineCreateInfo *info,
2019                                     struct anv_pipeline_stage *stages)
2020 {
2021    const VkPipelineCreationFeedbackCreateInfo *create_feedback =
2022       vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
2023    if (create_feedback) {
2024       *create_feedback->pPipelineCreationFeedback = *pipeline_feedback;
2025 
2026       /* VkPipelineCreationFeedbackCreateInfo:
2027        *
2028        *    "An implementation must set or clear the
2029        *     VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT in
2030        *     VkPipelineCreationFeedback::flags for pPipelineCreationFeedback
2031        *     and every element of pPipelineStageCreationFeedbacks."
2032        *
2033        */
2034       for (uint32_t i = 0; i < create_feedback->pipelineStageCreationFeedbackCount; i++) {
2035          create_feedback->pPipelineStageCreationFeedbacks[i].flags &=
2036             ~VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2037       }
2038       /* This part is not really specified in the Vulkan spec at the moment.
2039        * We're kind of guessing what the CTS wants. We might need to update
2040        * when https://gitlab.khronos.org/vulkan/vulkan/-/issues/3115 is
2041        * clarified.
2042        */
2043       for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
2044          if (!anv_pipeline_base_has_stage(pipeline, s))
2045             continue;
2046 
2047          if (stages[s].feedback_idx < create_feedback->pipelineStageCreationFeedbackCount) {
2048             create_feedback->pPipelineStageCreationFeedbacks[
2049                stages[s].feedback_idx] = stages[s].feedback;
2050          }
2051       }
2052    }
2053 }
2054 
2055 static uint32_t
anv_graphics_pipeline_imported_shader_count(struct anv_pipeline_stage * stages)2056 anv_graphics_pipeline_imported_shader_count(struct anv_pipeline_stage *stages)
2057 {
2058    uint32_t count = 0;
2059    for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
2060       if (stages[s].imported.bin != NULL)
2061          count++;
2062    }
2063    return count;
2064 }
2065 
2066 static VkResult
anv_graphics_pipeline_compile(struct anv_graphics_base_pipeline * pipeline,struct anv_pipeline_stage * stages,struct vk_pipeline_cache * cache,VkPipelineCreationFeedback * pipeline_feedback,const VkGraphicsPipelineCreateInfo * info,const struct vk_graphics_pipeline_state * state)2067 anv_graphics_pipeline_compile(struct anv_graphics_base_pipeline *pipeline,
2068                               struct anv_pipeline_stage *stages,
2069                               struct vk_pipeline_cache *cache,
2070                               VkPipelineCreationFeedback *pipeline_feedback,
2071                               const VkGraphicsPipelineCreateInfo *info,
2072                               const struct vk_graphics_pipeline_state *state)
2073 {
2074    int64_t pipeline_start = os_time_get_nano();
2075 
2076    struct anv_device *device = pipeline->base.device;
2077    const struct intel_device_info *devinfo = device->info;
2078    const struct brw_compiler *compiler = device->physical->compiler;
2079 
2080    /* Setup the shaders given in this VkGraphicsPipelineCreateInfo::pStages[].
2081     * Other shaders imported from libraries should have been added by
2082     * anv_graphics_pipeline_import_lib().
2083     */
2084    uint32_t shader_count = anv_graphics_pipeline_imported_shader_count(stages);
2085    for (uint32_t i = 0; i < info->stageCount; i++) {
2086       gl_shader_stage stage = vk_to_mesa_shader_stage(info->pStages[i].stage);
2087 
2088       /* If a pipeline library is loaded in this stage, we should ignore the
2089        * pStages[] entry of the same stage.
2090        */
2091       if (stages[stage].imported.bin != NULL)
2092          continue;
2093 
2094       stages[stage].stage = stage;
2095       stages[stage].pipeline_flags = pipeline->base.flags;
2096       stages[stage].pipeline_pNext = info->pNext;
2097       stages[stage].info = &info->pStages[i];
2098       stages[stage].feedback_idx = shader_count++;
2099 
2100       anv_stage_write_shader_hash(&stages[stage], device);
2101    }
2102 
2103    /* Prepare shader keys for all shaders in pipeline->base.active_stages
2104     * (this includes libraries) before generating the hash for cache look up.
2105     *
2106     * We're doing this because the spec states that :
2107     *
2108     *    "When an implementation is looking up a pipeline in a pipeline cache,
2109     *     if that pipeline is being created using linked libraries,
2110     *     implementations should always return an equivalent pipeline created
2111     *     with VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT if available,
2112     *     whether or not that bit was specified."
2113     *
2114     * So even if the application does not request link optimization, we have
2115     * to do our cache lookup with the entire set of shader sha1s so that we
2116     * can find what would be the best optimized pipeline in the case as if we
2117     * had compiled all the shaders together and known the full graphics state.
2118     */
2119    anv_graphics_pipeline_init_keys(pipeline, state, stages);
2120 
2121    uint32_t view_mask = state->rp ? state->rp->view_mask : 0;
2122 
2123    unsigned char sha1[20];
2124    anv_pipeline_hash_graphics(pipeline, stages, view_mask, sha1);
2125 
2126    for (unsigned s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
2127       if (!anv_pipeline_base_has_stage(pipeline, s))
2128          continue;
2129 
2130       stages[s].cache_key.stage = s;
2131       memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1));
2132    }
2133 
2134    const bool retain_shaders =
2135       pipeline->base.flags & VK_PIPELINE_CREATE_2_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT;
2136    const bool link_optimize =
2137       pipeline->base.flags & VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT;
2138 
2139    VkResult result = VK_SUCCESS;
2140    const bool skip_cache_lookup =
2141       (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
2142 
2143    if (!skip_cache_lookup) {
2144       bool found_all_shaders =
2145          anv_graphics_pipeline_load_cached_shaders(pipeline, cache, stages,
2146                                                    link_optimize,
2147                                                    pipeline_feedback);
2148 
2149       if (found_all_shaders) {
2150          /* If we need to retain shaders, we need to also load from the NIR
2151           * cache.
2152           */
2153          if (pipeline->base.type == ANV_PIPELINE_GRAPHICS_LIB && retain_shaders) {
2154             result = anv_graphics_pipeline_load_nir(pipeline, cache,
2155                                                     stages,
2156                                                     pipeline->base.mem_ctx,
2157                                                     false /* need_clone */);
2158             if (result != VK_SUCCESS) {
2159                vk_perf(VK_LOG_OBJS(cache ? &cache->base :
2160                                    &pipeline->base.device->vk.base),
2161                        "Found all ISA shaders in the cache but not all NIR shaders.");
2162             } else {
2163                anv_graphics_lib_retain_shaders(pipeline, stages, false /* will_compile */);
2164             }
2165          }
2166 
2167          if (result == VK_SUCCESS)
2168             goto done;
2169 
2170          for (unsigned s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
2171             if (!anv_pipeline_base_has_stage(pipeline, s))
2172                continue;
2173 
2174             if (stages[s].nir) {
2175                ralloc_free(stages[s].nir);
2176                stages[s].nir = NULL;
2177             }
2178 
2179             assert(pipeline->shaders[s] != NULL);
2180             anv_shader_bin_unref(device, pipeline->shaders[s]);
2181             pipeline->shaders[s] = NULL;
2182          }
2183       }
2184    }
2185 
2186    if (pipeline->base.flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR)
2187       return VK_PIPELINE_COMPILE_REQUIRED;
2188 
2189    void *tmp_ctx = ralloc_context(NULL);
2190 
2191    result = anv_graphics_pipeline_load_nir(pipeline, cache, stages,
2192                                            tmp_ctx, link_optimize /* need_clone */);
2193    if (result != VK_SUCCESS)
2194       goto fail;
2195 
2196    /* Retain shaders now if asked, this only applies to libraries */
2197    if (pipeline->base.type == ANV_PIPELINE_GRAPHICS_LIB && retain_shaders)
2198       anv_graphics_lib_retain_shaders(pipeline, stages, true /* will_compile */);
2199 
2200    /* The following steps will be executed for shaders we need to compile :
2201     *
2202     *    - specified through VkGraphicsPipelineCreateInfo::pStages[]
2203     *
2204     *    - or compiled from libraries with retained shaders (libraries
2205     *      compiled with CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT) if the
2206     *      pipeline has the CREATE_LINK_TIME_OPTIMIZATION_BIT flag.
2207     */
2208 
2209    /* Preprocess all NIR shaders. */
2210    for (int s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
2211       if (anv_graphics_pipeline_skip_shader_compile(pipeline, stages,
2212                                                     link_optimize, s))
2213          continue;
2214 
2215       anv_stage_allocate_bind_map_tables(&pipeline->base, &stages[s], tmp_ctx);
2216 
2217       anv_pipeline_nir_preprocess(&pipeline->base, &stages[s]);
2218    }
2219 
2220    if (stages[MESA_SHADER_MESH].info && stages[MESA_SHADER_FRAGMENT].info) {
2221       anv_apply_per_prim_attr_wa(stages[MESA_SHADER_MESH].nir,
2222                                  stages[MESA_SHADER_FRAGMENT].nir,
2223                                  device,
2224                                  info);
2225    }
2226 
2227    /* Walk backwards to link */
2228    struct anv_pipeline_stage *next_stage = NULL;
2229    for (int i = ARRAY_SIZE(graphics_shader_order) - 1; i >= 0; i--) {
2230       gl_shader_stage s = graphics_shader_order[i];
2231       if (anv_graphics_pipeline_skip_shader_compile(pipeline, stages,
2232                                                     link_optimize, s))
2233          continue;
2234 
2235       struct anv_pipeline_stage *stage = &stages[s];
2236 
2237       switch (s) {
2238       case MESA_SHADER_VERTEX:
2239          anv_pipeline_link_vs(compiler, stage, next_stage);
2240          break;
2241       case MESA_SHADER_TESS_CTRL:
2242          anv_pipeline_link_tcs(compiler, stage, next_stage);
2243          break;
2244       case MESA_SHADER_TESS_EVAL:
2245          anv_pipeline_link_tes(compiler, stage, next_stage);
2246          break;
2247       case MESA_SHADER_GEOMETRY:
2248          anv_pipeline_link_gs(compiler, stage, next_stage);
2249          break;
2250       case MESA_SHADER_TASK:
2251          anv_pipeline_link_task(compiler, stage, next_stage);
2252          break;
2253       case MESA_SHADER_MESH:
2254          anv_pipeline_link_mesh(compiler, stage, next_stage);
2255          break;
2256       case MESA_SHADER_FRAGMENT:
2257          anv_pipeline_link_fs(compiler, stage, state->rp);
2258          break;
2259       default:
2260          unreachable("Invalid graphics shader stage");
2261       }
2262 
2263       next_stage = stage;
2264    }
2265 
2266    bool use_primitive_replication = false;
2267    if (devinfo->ver >= 12 && view_mask != 0) {
2268       /* For some pipelines HW Primitive Replication can be used instead of
2269        * instancing to implement Multiview.  This depend on how viewIndex is
2270        * used in all the active shaders, so this check can't be done per
2271        * individual shaders.
2272        */
2273       nir_shader *shaders[ANV_GRAPHICS_SHADER_STAGE_COUNT] = {};
2274       for (unsigned s = 0; s < ARRAY_SIZE(shaders); s++)
2275          shaders[s] = stages[s].nir;
2276 
2277       use_primitive_replication =
2278          anv_check_for_primitive_replication(device,
2279                                              pipeline->base.active_stages,
2280                                              shaders, view_mask);
2281    }
2282 
2283    struct anv_pipeline_stage *prev_stage = NULL;
2284    for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
2285       gl_shader_stage s = graphics_shader_order[i];
2286       if (anv_graphics_pipeline_skip_shader_compile(pipeline, stages,
2287                                                     link_optimize, s))
2288          continue;
2289 
2290       struct anv_pipeline_stage *stage = &stages[s];
2291 
2292       int64_t stage_start = os_time_get_nano();
2293 
2294       anv_pipeline_lower_nir(&pipeline->base, tmp_ctx, stage,
2295                              &pipeline->base.layout, view_mask,
2296                              use_primitive_replication);
2297 
2298       struct shader_info *cur_info = &stage->nir->info;
2299 
2300       if (prev_stage && compiler->nir_options[s]->unify_interfaces) {
2301          struct shader_info *prev_info = &prev_stage->nir->info;
2302 
2303          prev_info->outputs_written |= cur_info->inputs_read &
2304                   ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
2305          cur_info->inputs_read |= prev_info->outputs_written &
2306                   ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
2307          prev_info->patch_outputs_written |= cur_info->patch_inputs_read;
2308          cur_info->patch_inputs_read |= prev_info->patch_outputs_written;
2309       }
2310 
2311       anv_fixup_subgroup_size(device, cur_info);
2312 
2313       stage->feedback.duration += os_time_get_nano() - stage_start;
2314 
2315       prev_stage = stage;
2316    }
2317 
2318    /* In the case the platform can write the primitive variable shading rate
2319     * and KHR_fragment_shading_rate is enabled :
2320     *    - there can be a fragment shader but we don't have it yet
2321     *    - the fragment shader needs fragment shading rate
2322     *
2323     * figure out the last geometry stage that should write the primitive
2324     * shading rate, and ensure it is marked as used there. The backend will
2325     * write a default value if the shader doesn't actually write it.
2326     *
2327     * We iterate backwards in the stage and stop on the first shader that can
2328     * set the value.
2329     *
2330     * Don't apply this to MESH stages, as this is a per primitive thing.
2331     */
2332    if (devinfo->has_coarse_pixel_primitive_and_cb &&
2333        device->vk.enabled_extensions.KHR_fragment_shading_rate &&
2334        pipeline_has_coarse_pixel(state->dynamic, state->ms, state->fsr) &&
2335        (!stages[MESA_SHADER_FRAGMENT].info ||
2336         stages[MESA_SHADER_FRAGMENT].key.wm.coarse_pixel) &&
2337        stages[MESA_SHADER_MESH].nir == NULL) {
2338       struct anv_pipeline_stage *last_psr = NULL;
2339 
2340       for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
2341          gl_shader_stage s =
2342             graphics_shader_order[ARRAY_SIZE(graphics_shader_order) - i - 1];
2343 
2344          if (anv_graphics_pipeline_skip_shader_compile(pipeline, stages,
2345                                                        link_optimize, s) ||
2346              !gl_shader_stage_can_set_fragment_shading_rate(s))
2347             continue;
2348 
2349          last_psr = &stages[s];
2350          break;
2351       }
2352 
2353       /* Only set primitive shading rate if there is a pre-rasterization
2354        * shader in this pipeline/pipeline-library.
2355        */
2356       if (last_psr)
2357          last_psr->nir->info.outputs_written |= VARYING_BIT_PRIMITIVE_SHADING_RATE;
2358    }
2359 
2360    prev_stage = NULL;
2361    for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
2362       gl_shader_stage s = graphics_shader_order[i];
2363       struct anv_pipeline_stage *stage = &stages[s];
2364 
2365       if (anv_graphics_pipeline_skip_shader_compile(pipeline, stages, link_optimize, s))
2366          continue;
2367 
2368       int64_t stage_start = os_time_get_nano();
2369 
2370       void *stage_ctx = ralloc_context(NULL);
2371       char *error_str = NULL;
2372 
2373       switch (s) {
2374       case MESA_SHADER_VERTEX:
2375          anv_pipeline_compile_vs(compiler, stage_ctx, pipeline,
2376                                  stage, view_mask, &error_str);
2377          break;
2378       case MESA_SHADER_TESS_CTRL:
2379          anv_pipeline_compile_tcs(compiler, stage_ctx, device,
2380                                   stage, prev_stage, &error_str);
2381          break;
2382       case MESA_SHADER_TESS_EVAL:
2383          anv_pipeline_compile_tes(compiler, stage_ctx, device,
2384                                   stage, prev_stage, &error_str);
2385          break;
2386       case MESA_SHADER_GEOMETRY:
2387          anv_pipeline_compile_gs(compiler, stage_ctx, device,
2388                                  stage, prev_stage, &error_str);
2389          break;
2390       case MESA_SHADER_TASK:
2391          anv_pipeline_compile_task(compiler, stage_ctx, device,
2392                                    stage, &error_str);
2393          break;
2394       case MESA_SHADER_MESH:
2395          anv_pipeline_compile_mesh(compiler, stage_ctx, device,
2396                                    stage, prev_stage, &error_str);
2397          break;
2398       case MESA_SHADER_FRAGMENT:
2399          anv_pipeline_compile_fs(compiler, stage_ctx, device,
2400                                  stage, prev_stage, pipeline,
2401                                  view_mask, use_primitive_replication,
2402                                  &error_str);
2403          break;
2404       default:
2405          unreachable("Invalid graphics shader stage");
2406       }
2407       if (stage->code == NULL) {
2408          if (error_str)
2409             result = vk_errorf(pipeline, VK_ERROR_UNKNOWN, "%s", error_str);
2410          else
2411             result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2412          ralloc_free(stage_ctx);
2413          goto fail;
2414       }
2415 
2416       anv_nir_validate_push_layout(device->physical, &stage->prog_data.base,
2417                                    &stage->bind_map);
2418 
2419       struct anv_shader_upload_params upload_params = {
2420          .stage               = s,
2421          .key_data            = &stage->cache_key,
2422          .key_size            = sizeof(stage->cache_key),
2423          .kernel_data         = stage->code,
2424          .kernel_size         = stage->prog_data.base.program_size,
2425          .prog_data           = &stage->prog_data.base,
2426          .prog_data_size      = brw_prog_data_size(s),
2427          .stats               = stage->stats,
2428          .num_stats           = stage->num_stats,
2429          .xfb_info            = stage->nir->xfb_info,
2430          .bind_map            = &stage->bind_map,
2431          .push_desc_info      = &stage->push_desc_info,
2432          .dynamic_push_values = stage->dynamic_push_values,
2433       };
2434 
2435       stage->bin =
2436          anv_device_upload_kernel(device, cache, &upload_params);
2437       if (!stage->bin) {
2438          ralloc_free(stage_ctx);
2439          result = vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2440          goto fail;
2441       }
2442 
2443       anv_pipeline_add_executables(&pipeline->base, stage);
2444       pipeline->source_hashes[s] = stage->source_hash;
2445       pipeline->shaders[s] = stage->bin;
2446 
2447       ralloc_free(stage_ctx);
2448 
2449       stage->feedback.duration += os_time_get_nano() - stage_start;
2450 
2451       prev_stage = stage;
2452    }
2453 
2454    /* Finally add the imported shaders that were not compiled as part of this
2455     * step.
2456     */
2457    for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
2458       if (!anv_pipeline_base_has_stage(pipeline, s))
2459          continue;
2460 
2461       if (pipeline->shaders[s] != NULL)
2462          continue;
2463 
2464       /* We should have recompiled everything with link optimization. */
2465       assert(!link_optimize);
2466 
2467       struct anv_pipeline_stage *stage = &stages[s];
2468 
2469       pipeline->source_hashes[s] = stage->source_hash;
2470       pipeline->shaders[s] = anv_shader_bin_ref(stage->imported.bin);
2471    }
2472 
2473    ralloc_free(tmp_ctx);
2474 
2475 done:
2476 
2477    /* Write the feedback index into the pipeline */
2478    for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
2479       if (!anv_pipeline_base_has_stage(pipeline, s))
2480          continue;
2481 
2482       struct anv_pipeline_stage *stage = &stages[s];
2483       pipeline->feedback_index[s] = stage->feedback_idx;
2484       pipeline->robust_flags[s] = stage->robust_flags;
2485 
2486       anv_pipeline_account_shader(&pipeline->base, pipeline->shaders[s]);
2487    }
2488 
2489    pipeline_feedback->duration = os_time_get_nano() - pipeline_start;
2490 
2491    if (pipeline->shaders[MESA_SHADER_FRAGMENT]) {
2492       pipeline->fragment_dynamic =
2493          anv_graphics_pipeline_stage_fragment_dynamic(
2494             &stages[MESA_SHADER_FRAGMENT]);
2495    }
2496 
2497    return VK_SUCCESS;
2498 
2499 fail:
2500    ralloc_free(tmp_ctx);
2501 
2502    for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
2503       if (pipeline->shaders[s])
2504          anv_shader_bin_unref(device, pipeline->shaders[s]);
2505    }
2506 
2507    return result;
2508 }
2509 
2510 static VkResult
anv_pipeline_compile_cs(struct anv_compute_pipeline * pipeline,struct vk_pipeline_cache * cache,const VkComputePipelineCreateInfo * info)2511 anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
2512                         struct vk_pipeline_cache *cache,
2513                         const VkComputePipelineCreateInfo *info)
2514 {
2515    ASSERTED const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
2516    assert(sinfo->stage == VK_SHADER_STAGE_COMPUTE_BIT);
2517 
2518    VkPipelineCreationFeedback pipeline_feedback = {
2519       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
2520    };
2521    int64_t pipeline_start = os_time_get_nano();
2522 
2523    struct anv_device *device = pipeline->base.device;
2524    const struct brw_compiler *compiler = device->physical->compiler;
2525 
2526    struct anv_pipeline_stage stage = {
2527       .stage = MESA_SHADER_COMPUTE,
2528       .info = &info->stage,
2529       .pipeline_flags = pipeline->base.flags,
2530       .pipeline_pNext = info->pNext,
2531       .cache_key = {
2532          .stage = MESA_SHADER_COMPUTE,
2533       },
2534       .feedback = {
2535          .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
2536       },
2537    };
2538    anv_stage_write_shader_hash(&stage, device);
2539 
2540    populate_cs_prog_key(&stage, device);
2541 
2542    const bool skip_cache_lookup =
2543       (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
2544 
2545    anv_pipeline_hash_compute(pipeline, &stage, stage.cache_key.sha1);
2546 
2547    bool cache_hit = false;
2548    if (!skip_cache_lookup) {
2549       stage.bin = anv_device_search_for_kernel(device, cache,
2550                                                &stage.cache_key,
2551                                                sizeof(stage.cache_key),
2552                                                &cache_hit);
2553    }
2554 
2555    if (stage.bin == NULL &&
2556        (pipeline->base.flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT))
2557       return VK_PIPELINE_COMPILE_REQUIRED;
2558 
2559    void *mem_ctx = ralloc_context(NULL);
2560    if (stage.bin == NULL) {
2561       int64_t stage_start = os_time_get_nano();
2562 
2563       anv_stage_allocate_bind_map_tables(&pipeline->base, &stage, mem_ctx);
2564 
2565       VkResult result = anv_pipeline_stage_get_nir(&pipeline->base, cache,
2566                                                    mem_ctx, &stage);
2567       if (result != VK_SUCCESS) {
2568          ralloc_free(mem_ctx);
2569          return result;
2570       }
2571 
2572       anv_pipeline_nir_preprocess(&pipeline->base, &stage);
2573 
2574       anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage,
2575                              &pipeline->base.layout, 0 /* view_mask */,
2576                              false /* use_primitive_replication */);
2577 
2578       anv_fixup_subgroup_size(device, &stage.nir->info);
2579 
2580       stage.num_stats = 1;
2581 
2582       struct brw_compile_cs_params params = {
2583          .base = {
2584             .nir = stage.nir,
2585             .stats = stage.stats,
2586             .log_data = device,
2587             .mem_ctx = mem_ctx,
2588             .source_hash = stage.source_hash,
2589          },
2590          .key = &stage.key.cs,
2591          .prog_data = &stage.prog_data.cs,
2592       };
2593 
2594       stage.code = brw_compile_cs(compiler, &params);
2595       if (stage.code == NULL) {
2596          VkResult result;
2597 
2598          if (params.base.error_str)
2599             result = vk_errorf(pipeline, VK_ERROR_UNKNOWN, "%s", params.base.error_str);
2600          else
2601             result = vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2602 
2603          ralloc_free(mem_ctx);
2604          return result;
2605       }
2606 
2607       anv_nir_validate_push_layout(device->physical, &stage.prog_data.base,
2608                                    &stage.bind_map);
2609 
2610       struct anv_shader_upload_params upload_params = {
2611          .stage               = MESA_SHADER_COMPUTE,
2612          .key_data            = &stage.cache_key,
2613          .key_size            = sizeof(stage.cache_key),
2614          .kernel_data         = stage.code,
2615          .kernel_size         = stage.prog_data.base.program_size,
2616          .prog_data           = &stage.prog_data.base,
2617          .prog_data_size      = sizeof(stage.prog_data.cs),
2618          .stats               = stage.stats,
2619          .num_stats           = stage.num_stats,
2620          .bind_map            = &stage.bind_map,
2621          .push_desc_info      = &stage.push_desc_info,
2622          .dynamic_push_values = stage.dynamic_push_values,
2623       };
2624 
2625       stage.bin = anv_device_upload_kernel(device, cache, &upload_params);
2626       if (!stage.bin) {
2627          ralloc_free(mem_ctx);
2628          return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2629       }
2630 
2631       stage.feedback.duration = os_time_get_nano() - stage_start;
2632    }
2633 
2634    anv_pipeline_account_shader(&pipeline->base, stage.bin);
2635    anv_pipeline_add_executables(&pipeline->base, &stage);
2636    pipeline->source_hash = stage.source_hash;
2637 
2638    ralloc_free(mem_ctx);
2639 
2640    if (cache_hit) {
2641       stage.feedback.flags |=
2642          VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
2643       pipeline_feedback.flags |=
2644          VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
2645    }
2646    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
2647 
2648    const VkPipelineCreationFeedbackCreateInfo *create_feedback =
2649       vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
2650    if (create_feedback) {
2651       *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
2652 
2653       if (create_feedback->pipelineStageCreationFeedbackCount) {
2654          assert(create_feedback->pipelineStageCreationFeedbackCount == 1);
2655          create_feedback->pPipelineStageCreationFeedbacks[0] = stage.feedback;
2656       }
2657    }
2658 
2659    pipeline->cs = stage.bin;
2660 
2661    return VK_SUCCESS;
2662 }
2663 
2664 static VkResult
anv_compute_pipeline_create(struct anv_device * device,struct vk_pipeline_cache * cache,const VkComputePipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)2665 anv_compute_pipeline_create(struct anv_device *device,
2666                             struct vk_pipeline_cache *cache,
2667                             const VkComputePipelineCreateInfo *pCreateInfo,
2668                             const VkAllocationCallbacks *pAllocator,
2669                             VkPipeline *pPipeline)
2670 {
2671    struct anv_compute_pipeline *pipeline;
2672    VkResult result;
2673 
2674    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO);
2675 
2676    pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
2677                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2678    if (pipeline == NULL)
2679       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2680 
2681    result = anv_pipeline_init(&pipeline->base, device,
2682                               ANV_PIPELINE_COMPUTE,
2683                               vk_compute_pipeline_create_flags(pCreateInfo),
2684                               pAllocator);
2685    if (result != VK_SUCCESS) {
2686       vk_free2(&device->vk.alloc, pAllocator, pipeline);
2687       return result;
2688    }
2689 
2690 
2691    ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
2692    anv_pipeline_init_layout(&pipeline->base, pipeline_layout);
2693 
2694    pipeline->base.active_stages = VK_SHADER_STAGE_COMPUTE_BIT;
2695 
2696    anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS,
2697                          pipeline->batch_data, sizeof(pipeline->batch_data));
2698 
2699    result = anv_pipeline_compile_cs(pipeline, cache, pCreateInfo);
2700    if (result != VK_SUCCESS) {
2701       anv_pipeline_finish(&pipeline->base, device);
2702       vk_free2(&device->vk.alloc, pAllocator, pipeline);
2703       return result;
2704    }
2705 
2706    anv_genX(device->info, compute_pipeline_emit)(pipeline);
2707 
2708    ANV_RMV(compute_pipeline_create, device, pipeline, false);
2709 
2710    *pPipeline = anv_pipeline_to_handle(&pipeline->base);
2711 
2712    return pipeline->base.batch.status;
2713 }
2714 
anv_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)2715 VkResult anv_CreateComputePipelines(
2716     VkDevice                                    _device,
2717     VkPipelineCache                             pipelineCache,
2718     uint32_t                                    count,
2719     const VkComputePipelineCreateInfo*          pCreateInfos,
2720     const VkAllocationCallbacks*                pAllocator,
2721     VkPipeline*                                 pPipelines)
2722 {
2723    ANV_FROM_HANDLE(anv_device, device, _device);
2724    ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache);
2725 
2726    VkResult result = VK_SUCCESS;
2727 
2728    unsigned i;
2729    for (i = 0; i < count; i++) {
2730       const VkPipelineCreateFlags2KHR flags =
2731          vk_compute_pipeline_create_flags(&pCreateInfos[i]);
2732       VkResult res = anv_compute_pipeline_create(device, pipeline_cache,
2733                                                  &pCreateInfos[i],
2734                                                  pAllocator, &pPipelines[i]);
2735 
2736       if (res != VK_SUCCESS) {
2737          result = res;
2738          if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
2739             break;
2740          pPipelines[i] = VK_NULL_HANDLE;
2741       }
2742    }
2743 
2744    for (; i < count; i++)
2745       pPipelines[i] = VK_NULL_HANDLE;
2746 
2747    return result;
2748 }
2749 
2750 /**
2751  * Calculate the desired L3 partitioning based on the current state of the
2752  * pipeline.  For now this simply returns the conservative defaults calculated
2753  * by get_default_l3_weights(), but we could probably do better by gathering
2754  * more statistics from the pipeline state (e.g. guess of expected URB usage
2755  * and bound surfaces), or by using feed-back from performance counters.
2756  */
2757 void
anv_pipeline_setup_l3_config(struct anv_pipeline * pipeline,bool needs_slm)2758 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)
2759 {
2760    const struct intel_device_info *devinfo = pipeline->device->info;
2761 
2762    const struct intel_l3_weights w =
2763       intel_get_default_l3_weights(devinfo, true, needs_slm);
2764 
2765    pipeline->l3_config = intel_get_l3_config(devinfo, w);
2766 }
2767 
2768 static uint32_t
get_vs_input_elements(const struct brw_vs_prog_data * vs_prog_data)2769 get_vs_input_elements(const struct brw_vs_prog_data *vs_prog_data)
2770 {
2771    /* Pull inputs_read out of the VS prog data */
2772    const uint64_t inputs_read = vs_prog_data->inputs_read;
2773    const uint64_t double_inputs_read =
2774       vs_prog_data->double_inputs_read & inputs_read;
2775    assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0);
2776    const uint32_t elements = inputs_read >> VERT_ATTRIB_GENERIC0;
2777    const uint32_t elements_double = double_inputs_read >> VERT_ATTRIB_GENERIC0;
2778 
2779    return __builtin_popcount(elements) -
2780           __builtin_popcount(elements_double) / 2;
2781 }
2782 
2783 static void
anv_graphics_pipeline_emit(struct anv_graphics_pipeline * pipeline,const struct vk_graphics_pipeline_state * state)2784 anv_graphics_pipeline_emit(struct anv_graphics_pipeline *pipeline,
2785                            const struct vk_graphics_pipeline_state *state)
2786 {
2787    pipeline->view_mask = state->rp->view_mask;
2788 
2789    anv_pipeline_setup_l3_config(&pipeline->base.base, false);
2790 
2791    if (anv_pipeline_is_primitive(pipeline)) {
2792       const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
2793 
2794       /* The total number of vertex elements we need to program. We might need
2795        * a couple more to implement some of the draw parameters.
2796        */
2797       pipeline->svgs_count =
2798          (vs_prog_data->uses_vertexid ||
2799           vs_prog_data->uses_instanceid ||
2800           vs_prog_data->uses_firstvertex ||
2801           vs_prog_data->uses_baseinstance) + vs_prog_data->uses_drawid;
2802 
2803       pipeline->vs_input_elements = get_vs_input_elements(vs_prog_data);
2804 
2805       pipeline->vertex_input_elems =
2806          (BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_VI) ?
2807           0 : pipeline->vs_input_elements) + pipeline->svgs_count;
2808 
2809       /* Our implementation of VK_KHR_multiview uses instancing to draw the
2810        * different views when primitive replication cannot be used.  If the
2811        * client asks for instancing, we need to multiply by the client's
2812        * instance count at draw time and instance divisor in the vertex
2813        * bindings by the number of views ensure that we repeat the client's
2814        * per-instance data once for each view.
2815        */
2816       const bool uses_primitive_replication =
2817          anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map.num_pos_slots > 1;
2818       pipeline->instance_multiplier = 1;
2819       if (pipeline->view_mask && !uses_primitive_replication)
2820          pipeline->instance_multiplier = util_bitcount(pipeline->view_mask);
2821    } else {
2822       assert(anv_pipeline_is_mesh(pipeline));
2823       /* TODO(mesh): Mesh vs. Multiview with Instancing. */
2824    }
2825 
2826 
2827    pipeline->dynamic_patch_control_points =
2828       anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL) &&
2829       BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS) &&
2830       (pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->dynamic_push_values &
2831        ANV_DYNAMIC_PUSH_INPUT_VERTICES);
2832 
2833    if (pipeline->base.shaders[MESA_SHADER_FRAGMENT] && state->ms) {
2834       pipeline->sample_shading_enable = state->ms->sample_shading_enable;
2835       pipeline->min_sample_shading = state->ms->min_sample_shading;
2836    }
2837 
2838    /* Mark all color output as unused by default */
2839    memset(pipeline->color_output_mapping,
2840           ANV_COLOR_OUTPUT_UNUSED,
2841           sizeof(pipeline->color_output_mapping));
2842 
2843    if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
2844       /* Count the number of color attachments in the binding table */
2845       const struct anv_pipeline_bind_map *bind_map =
2846          &pipeline->base.shaders[MESA_SHADER_FRAGMENT]->bind_map;
2847 
2848       if (state->cal != NULL) {
2849          /* Build a map of fragment color output to attachment */
2850          uint8_t rt_to_att[MAX_RTS];
2851          memset(rt_to_att, ANV_COLOR_OUTPUT_DISABLED, MAX_RTS);
2852          for (uint32_t i = 0; i < MAX_RTS; i++) {
2853             if (state->cal->color_map[i] != MESA_VK_ATTACHMENT_UNUSED)
2854                rt_to_att[state->cal->color_map[i]] = i;
2855          }
2856 
2857          /* For each fragment shader output if not unused apply the remapping
2858           * to pipeline->color_output_mapping
2859           */
2860          unsigned i;
2861          for (i = 0; i < MIN2(bind_map->surface_count, MAX_RTS); i++) {
2862             if (bind_map->surface_to_descriptor[i].set !=
2863                 ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS)
2864                break;
2865 
2866             uint32_t index = bind_map->surface_to_descriptor[i].index;
2867             if (index >= MAX_RTS) {
2868                assert(index <= 0xff);
2869                pipeline->color_output_mapping[i] = index;
2870             } else {
2871                pipeline->color_output_mapping[i] = rt_to_att[i];
2872             }
2873          }
2874          pipeline->num_color_outputs = i;
2875       }
2876    }
2877 
2878    const struct anv_device *device = pipeline->base.base.device;
2879    const struct intel_device_info *devinfo = device->info;
2880    anv_genX(devinfo, graphics_pipeline_emit)(pipeline, state);
2881 }
2882 
2883 static void
anv_graphics_pipeline_import_layout(struct anv_graphics_base_pipeline * pipeline,struct anv_pipeline_sets_layout * layout)2884 anv_graphics_pipeline_import_layout(struct anv_graphics_base_pipeline *pipeline,
2885                                     struct anv_pipeline_sets_layout *layout)
2886 {
2887    pipeline->base.layout.independent_sets |= layout->independent_sets;
2888 
2889    for (uint32_t s = 0; s < layout->num_sets; s++) {
2890       if (layout->set[s].layout == NULL)
2891          continue;
2892 
2893       anv_pipeline_sets_layout_add(&pipeline->base.layout, s,
2894                                    layout->set[s].layout);
2895    }
2896 }
2897 
2898 static void
anv_graphics_pipeline_import_lib(struct anv_graphics_base_pipeline * pipeline,bool link_optimize,bool retain_shaders,struct anv_pipeline_stage * stages,struct anv_graphics_lib_pipeline * lib)2899 anv_graphics_pipeline_import_lib(struct anv_graphics_base_pipeline *pipeline,
2900                                  bool link_optimize,
2901                                  bool retain_shaders,
2902                                  struct anv_pipeline_stage *stages,
2903                                  struct anv_graphics_lib_pipeline *lib)
2904 {
2905    struct anv_pipeline_sets_layout *lib_layout =
2906       &lib->base.base.layout;
2907    anv_graphics_pipeline_import_layout(pipeline, lib_layout);
2908 
2909    /* We can't have shaders specified twice through libraries. */
2910    assert((pipeline->base.active_stages & lib->base.base.active_stages) == 0);
2911 
2912    /* VK_EXT_graphics_pipeline_library:
2913     *
2914     *    "To perform link time optimizations,
2915     *     VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT must
2916     *     be specified on all pipeline libraries that are being linked
2917     *     together. Implementations should retain any additional information
2918     *     needed to perform optimizations at the final link step when this bit
2919     *     is present."
2920     */
2921    assert(!link_optimize || lib->retain_shaders);
2922 
2923    pipeline->base.active_stages |= lib->base.base.active_stages;
2924 
2925    /* Propagate the fragment dynamic flag, unless we're doing link
2926     * optimization, in that case we'll have all the state information and this
2927     * will never be dynamic.
2928     */
2929    if (!link_optimize) {
2930       if (lib->base.fragment_dynamic) {
2931          assert(lib->base.base.active_stages & VK_SHADER_STAGE_FRAGMENT_BIT);
2932          pipeline->fragment_dynamic = true;
2933       }
2934    }
2935 
2936    uint32_t shader_count = anv_graphics_pipeline_imported_shader_count(stages);
2937    for (uint32_t s = 0; s < ARRAY_SIZE(lib->base.shaders); s++) {
2938       if (lib->base.shaders[s] == NULL)
2939          continue;
2940 
2941       stages[s].stage = s;
2942       stages[s].pipeline_flags = pipeline->base.flags;
2943       stages[s].feedback_idx = shader_count + lib->base.feedback_index[s];
2944       stages[s].robust_flags = lib->base.robust_flags[s];
2945 
2946       /* Always import the shader sha1, this will be used for cache lookup. */
2947       memcpy(stages[s].shader_sha1, lib->retained_shaders[s].shader_sha1,
2948              sizeof(stages[s].shader_sha1));
2949       stages[s].source_hash = lib->base.source_hashes[s];
2950 
2951       stages[s].subgroup_size_type = lib->retained_shaders[s].subgroup_size_type;
2952       stages[s].imported.nir = lib->retained_shaders[s].nir;
2953       stages[s].imported.bin = lib->base.shaders[s];
2954    }
2955 
2956    /* When not link optimizing, import the executables (shader descriptions
2957     * for VK_KHR_pipeline_executable_properties). With link optimization there
2958     * is a chance it'll produce different binaries, so we'll add the optimized
2959     * version later.
2960     */
2961    if (!link_optimize) {
2962       util_dynarray_foreach(&lib->base.base.executables,
2963                             struct anv_pipeline_executable, exe) {
2964          util_dynarray_append(&pipeline->base.executables,
2965                               struct anv_pipeline_executable, *exe);
2966       }
2967    }
2968 }
2969 
2970 static void
anv_graphics_lib_validate_shaders(struct anv_graphics_lib_pipeline * lib,bool retained_shaders)2971 anv_graphics_lib_validate_shaders(struct anv_graphics_lib_pipeline *lib,
2972                                   bool retained_shaders)
2973 {
2974    for (uint32_t s = 0; s < ARRAY_SIZE(lib->retained_shaders); s++) {
2975       if (anv_pipeline_base_has_stage(&lib->base, s)) {
2976          assert(!retained_shaders || lib->retained_shaders[s].nir != NULL);
2977          assert(lib->base.shaders[s] != NULL);
2978       }
2979    }
2980 }
2981 
2982 static VkResult
anv_graphics_lib_pipeline_create(struct anv_device * device,struct vk_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)2983 anv_graphics_lib_pipeline_create(struct anv_device *device,
2984                                  struct vk_pipeline_cache *cache,
2985                                  const VkGraphicsPipelineCreateInfo *pCreateInfo,
2986                                  const VkAllocationCallbacks *pAllocator,
2987                                  VkPipeline *pPipeline)
2988 {
2989    struct anv_pipeline_stage stages[ANV_GRAPHICS_SHADER_STAGE_COUNT] = {};
2990    VkPipelineCreationFeedback pipeline_feedback = {
2991       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
2992    };
2993    int64_t pipeline_start = os_time_get_nano();
2994 
2995    struct anv_graphics_lib_pipeline *pipeline;
2996    VkResult result;
2997 
2998    const VkPipelineCreateFlags2KHR flags =
2999       vk_graphics_pipeline_create_flags(pCreateInfo);
3000    assert(flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR);
3001 
3002    const VkPipelineLibraryCreateInfoKHR *libs_info =
3003       vk_find_struct_const(pCreateInfo->pNext,
3004                            PIPELINE_LIBRARY_CREATE_INFO_KHR);
3005 
3006    pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
3007                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3008    if (pipeline == NULL)
3009       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3010 
3011    result = anv_pipeline_init(&pipeline->base.base, device,
3012                               ANV_PIPELINE_GRAPHICS_LIB, flags,
3013                               pAllocator);
3014    if (result != VK_SUCCESS) {
3015       vk_free2(&device->vk.alloc, pAllocator, pipeline);
3016       if (result == VK_PIPELINE_COMPILE_REQUIRED)
3017          *pPipeline = VK_NULL_HANDLE;
3018       return result;
3019    }
3020 
3021    /* Capture the retain state before we compile/load any shader. */
3022    pipeline->retain_shaders =
3023       (flags & VK_PIPELINE_CREATE_2_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT) != 0;
3024 
3025    /* If we have libraries, import them first. */
3026    if (libs_info) {
3027       for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
3028          ANV_FROM_HANDLE(anv_pipeline, pipeline_lib, libs_info->pLibraries[i]);
3029          struct anv_graphics_lib_pipeline *gfx_pipeline_lib =
3030             anv_pipeline_to_graphics_lib(pipeline_lib);
3031 
3032          vk_graphics_pipeline_state_merge(&pipeline->state, &gfx_pipeline_lib->state);
3033          anv_graphics_pipeline_import_lib(&pipeline->base,
3034                                           false /* link_optimize */,
3035                                           pipeline->retain_shaders,
3036                                           stages, gfx_pipeline_lib);
3037       }
3038    }
3039 
3040    result = vk_graphics_pipeline_state_fill(&device->vk,
3041                                             &pipeline->state, pCreateInfo,
3042                                             NULL /* driver_rp */,
3043                                             0 /* driver_rp_flags */,
3044                                             &pipeline->all_state, NULL, 0, NULL);
3045    if (result != VK_SUCCESS) {
3046       anv_pipeline_finish(&pipeline->base.base, device);
3047       vk_free2(&device->vk.alloc, pAllocator, pipeline);
3048       return result;
3049    }
3050 
3051    pipeline->base.base.active_stages = pipeline->state.shader_stages;
3052 
3053    /* After we've imported all the libraries' layouts, import the pipeline
3054     * layout and hash the whole lot.
3055     */
3056    ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
3057    if (pipeline_layout != NULL) {
3058       anv_graphics_pipeline_import_layout(&pipeline->base,
3059                                           &pipeline_layout->sets_layout);
3060    }
3061 
3062    anv_pipeline_sets_layout_hash(&pipeline->base.base.layout);
3063 
3064    /* Compile shaders. We can skip this if there are no active stage in that
3065     * pipeline.
3066     */
3067    if (pipeline->base.base.active_stages != 0) {
3068       result = anv_graphics_pipeline_compile(&pipeline->base, stages,
3069                                              cache, &pipeline_feedback,
3070                                              pCreateInfo, &pipeline->state);
3071       if (result != VK_SUCCESS) {
3072          anv_pipeline_finish(&pipeline->base.base, device);
3073          vk_free2(&device->vk.alloc, pAllocator, pipeline);
3074          return result;
3075       }
3076    }
3077 
3078    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
3079 
3080    anv_fill_pipeline_creation_feedback(&pipeline->base, &pipeline_feedback,
3081                                        pCreateInfo, stages);
3082 
3083    anv_graphics_lib_validate_shaders(
3084       pipeline,
3085       flags & VK_PIPELINE_CREATE_2_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT);
3086 
3087    *pPipeline = anv_pipeline_to_handle(&pipeline->base.base);
3088 
3089    return VK_SUCCESS;
3090 }
3091 
3092 static VkResult
anv_graphics_pipeline_create(struct anv_device * device,struct vk_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)3093 anv_graphics_pipeline_create(struct anv_device *device,
3094                              struct vk_pipeline_cache *cache,
3095                              const VkGraphicsPipelineCreateInfo *pCreateInfo,
3096                              const VkAllocationCallbacks *pAllocator,
3097                              VkPipeline *pPipeline)
3098 {
3099    struct anv_pipeline_stage stages[ANV_GRAPHICS_SHADER_STAGE_COUNT] = {};
3100    VkPipelineCreationFeedback pipeline_feedback = {
3101       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
3102    };
3103    int64_t pipeline_start = os_time_get_nano();
3104 
3105    struct anv_graphics_pipeline *pipeline;
3106    VkResult result;
3107 
3108    const VkPipelineCreateFlags2KHR flags =
3109       vk_graphics_pipeline_create_flags(pCreateInfo);
3110    assert((flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR) == 0);
3111 
3112    const VkPipelineLibraryCreateInfoKHR *libs_info =
3113       vk_find_struct_const(pCreateInfo->pNext,
3114                            PIPELINE_LIBRARY_CREATE_INFO_KHR);
3115 
3116    pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
3117                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
3118    if (pipeline == NULL)
3119       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3120 
3121    /* Initialize some information required by shaders */
3122    result = anv_pipeline_init(&pipeline->base.base, device,
3123                               ANV_PIPELINE_GRAPHICS, flags,
3124                               pAllocator);
3125    if (result != VK_SUCCESS) {
3126       vk_free2(&device->vk.alloc, pAllocator, pipeline);
3127       return result;
3128    }
3129 
3130    const bool link_optimize =
3131       (flags & VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT) != 0;
3132 
3133    struct vk_graphics_pipeline_all_state all;
3134    struct vk_graphics_pipeline_state state = { };
3135 
3136    /* If we have libraries, import them first. */
3137    if (libs_info) {
3138       for (uint32_t i = 0; i < libs_info->libraryCount; i++) {
3139          ANV_FROM_HANDLE(anv_pipeline, pipeline_lib, libs_info->pLibraries[i]);
3140          struct anv_graphics_lib_pipeline *gfx_pipeline_lib =
3141             anv_pipeline_to_graphics_lib(pipeline_lib);
3142 
3143          /* If we have link time optimization, all libraries must be created
3144           * with
3145           * VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT.
3146           */
3147          assert(!link_optimize || gfx_pipeline_lib->retain_shaders);
3148 
3149          vk_graphics_pipeline_state_merge(&state, &gfx_pipeline_lib->state);
3150          anv_graphics_pipeline_import_lib(&pipeline->base,
3151                                           link_optimize,
3152                                           false,
3153                                           stages,
3154                                           gfx_pipeline_lib);
3155       }
3156    }
3157 
3158    result = vk_graphics_pipeline_state_fill(&device->vk, &state, pCreateInfo,
3159                                             NULL /* driver_rp */,
3160                                             0 /* driver_rp_flags */,
3161                                             &all, NULL, 0, NULL);
3162    if (result != VK_SUCCESS) {
3163       anv_pipeline_finish(&pipeline->base.base, device);
3164       vk_free2(&device->vk.alloc, pAllocator, pipeline);
3165       return result;
3166    }
3167 
3168    pipeline->dynamic_state.vi = &pipeline->vertex_input;
3169    pipeline->dynamic_state.ms.sample_locations = &pipeline->base.sample_locations;
3170    vk_dynamic_graphics_state_fill(&pipeline->dynamic_state, &state);
3171 
3172    pipeline->base.base.active_stages = state.shader_stages;
3173 
3174    /* Sanity check on the shaders */
3175    assert(pipeline->base.base.active_stages & VK_SHADER_STAGE_VERTEX_BIT ||
3176           pipeline->base.base.active_stages & VK_SHADER_STAGE_MESH_BIT_EXT);
3177 
3178    if (anv_pipeline_is_mesh(pipeline)) {
3179       assert(device->physical->vk.supported_extensions.EXT_mesh_shader);
3180    }
3181 
3182    /* After we've imported all the libraries' layouts, import the pipeline
3183     * layout and hash the whole lot.
3184     */
3185    ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
3186    if (pipeline_layout != NULL) {
3187       anv_graphics_pipeline_import_layout(&pipeline->base,
3188                                           &pipeline_layout->sets_layout);
3189    }
3190 
3191    anv_pipeline_sets_layout_hash(&pipeline->base.base.layout);
3192 
3193    /* Compile shaders, all required information should be have been copied in
3194     * the previous step. We can skip this if there are no active stage in that
3195     * pipeline.
3196     */
3197    result = anv_graphics_pipeline_compile(&pipeline->base, stages,
3198                                           cache, &pipeline_feedback,
3199                                           pCreateInfo, &state);
3200    if (result != VK_SUCCESS) {
3201       anv_pipeline_finish(&pipeline->base.base, device);
3202       vk_free2(&device->vk.alloc, pAllocator, pipeline);
3203       return result;
3204    }
3205 
3206    /* Prepare a batch for the commands and emit all the non dynamic ones.
3207     */
3208    anv_batch_set_storage(&pipeline->base.base.batch, ANV_NULL_ADDRESS,
3209                          pipeline->batch_data, sizeof(pipeline->batch_data));
3210 
3211    if (pipeline->base.base.active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
3212       pipeline->base.base.active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
3213 
3214    if (anv_pipeline_is_mesh(pipeline))
3215       assert(device->physical->vk.supported_extensions.EXT_mesh_shader);
3216 
3217    anv_graphics_pipeline_emit(pipeline, &state);
3218 
3219    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
3220 
3221    anv_fill_pipeline_creation_feedback(&pipeline->base, &pipeline_feedback,
3222                                        pCreateInfo, stages);
3223 
3224    ANV_RMV(graphics_pipeline_create, device, pipeline, false);
3225 
3226    *pPipeline = anv_pipeline_to_handle(&pipeline->base.base);
3227 
3228    return pipeline->base.base.batch.status;
3229 }
3230 
anv_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)3231 VkResult anv_CreateGraphicsPipelines(
3232     VkDevice                                    _device,
3233     VkPipelineCache                             pipelineCache,
3234     uint32_t                                    count,
3235     const VkGraphicsPipelineCreateInfo*         pCreateInfos,
3236     const VkAllocationCallbacks*                pAllocator,
3237     VkPipeline*                                 pPipelines)
3238 {
3239    ANV_FROM_HANDLE(anv_device, device, _device);
3240    ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache);
3241 
3242    VkResult result = VK_SUCCESS;
3243 
3244    unsigned i;
3245    for (i = 0; i < count; i++) {
3246       assert(pCreateInfos[i].sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
3247 
3248       const VkPipelineCreateFlags2KHR flags =
3249          vk_graphics_pipeline_create_flags(&pCreateInfos[i]);
3250       VkResult res;
3251       if (flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR) {
3252          res = anv_graphics_lib_pipeline_create(device, pipeline_cache,
3253                                                 &pCreateInfos[i],
3254                                                 pAllocator,
3255                                                 &pPipelines[i]);
3256       } else {
3257          res = anv_graphics_pipeline_create(device,
3258                                             pipeline_cache,
3259                                             &pCreateInfos[i],
3260                                             pAllocator, &pPipelines[i]);
3261       }
3262 
3263       if (res != VK_SUCCESS) {
3264          result = res;
3265          if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
3266             break;
3267          pPipelines[i] = VK_NULL_HANDLE;
3268       }
3269    }
3270 
3271    for (; i < count; i++)
3272       pPipelines[i] = VK_NULL_HANDLE;
3273 
3274    return result;
3275 }
3276 
3277 static bool
should_remat_cb(nir_instr * instr,void * data)3278 should_remat_cb(nir_instr *instr, void *data)
3279 {
3280    if (instr->type != nir_instr_type_intrinsic)
3281       return false;
3282 
3283    return nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_resource_intel;
3284 }
3285 
3286 static VkResult
compile_upload_rt_shader(struct anv_ray_tracing_pipeline * pipeline,struct vk_pipeline_cache * cache,nir_shader * nir,struct anv_pipeline_stage * stage,void * mem_ctx)3287 compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline,
3288                          struct vk_pipeline_cache *cache,
3289                          nir_shader *nir,
3290                          struct anv_pipeline_stage *stage,
3291                          void *mem_ctx)
3292 {
3293    const struct brw_compiler *compiler =
3294       pipeline->base.device->physical->compiler;
3295    const struct intel_device_info *devinfo = compiler->devinfo;
3296 
3297    nir_shader **resume_shaders = NULL;
3298    uint32_t num_resume_shaders = 0;
3299    if (nir->info.stage != MESA_SHADER_COMPUTE) {
3300       const nir_lower_shader_calls_options opts = {
3301          .address_format = nir_address_format_64bit_global,
3302          .stack_alignment = BRW_BTD_STACK_ALIGN,
3303          .localized_loads = true,
3304          .vectorizer_callback = brw_nir_should_vectorize_mem,
3305          .vectorizer_data = NULL,
3306          .should_remat_callback = should_remat_cb,
3307       };
3308 
3309       NIR_PASS(_, nir, nir_lower_shader_calls, &opts,
3310                &resume_shaders, &num_resume_shaders, mem_ctx);
3311       NIR_PASS(_, nir, brw_nir_lower_shader_calls, &stage->key.bs);
3312       NIR_PASS_V(nir, brw_nir_lower_rt_intrinsics, devinfo);
3313    }
3314 
3315    for (unsigned i = 0; i < num_resume_shaders; i++) {
3316       NIR_PASS(_,resume_shaders[i], brw_nir_lower_shader_calls, &stage->key.bs);
3317       NIR_PASS_V(resume_shaders[i], brw_nir_lower_rt_intrinsics, devinfo);
3318    }
3319 
3320    struct brw_compile_bs_params params = {
3321       .base = {
3322          .nir = nir,
3323          .stats = stage->stats,
3324          .log_data = pipeline->base.device,
3325          .mem_ctx = mem_ctx,
3326          .source_hash = stage->source_hash,
3327       },
3328       .key = &stage->key.bs,
3329       .prog_data = &stage->prog_data.bs,
3330       .num_resume_shaders = num_resume_shaders,
3331       .resume_shaders = resume_shaders,
3332    };
3333 
3334    stage->code = brw_compile_bs(compiler, &params);
3335    if (stage->code == NULL) {
3336       VkResult result;
3337 
3338       if (params.base.error_str)
3339          result = vk_errorf(pipeline, VK_ERROR_UNKNOWN, "%s", params.base.error_str);
3340       else
3341          result = vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
3342 
3343       return result;
3344    }
3345 
3346    struct anv_shader_upload_params upload_params = {
3347       .stage               = stage->stage,
3348       .key_data            = &stage->cache_key,
3349       .key_size            = sizeof(stage->cache_key),
3350       .kernel_data         = stage->code,
3351       .kernel_size         = stage->prog_data.base.program_size,
3352       .prog_data           = &stage->prog_data.base,
3353       .prog_data_size      = brw_prog_data_size(stage->stage),
3354       .stats               = stage->stats,
3355       .num_stats           = 1,
3356       .bind_map            = &stage->bind_map,
3357       .push_desc_info      = &stage->push_desc_info,
3358       .dynamic_push_values = stage->dynamic_push_values,
3359    };
3360 
3361    stage->bin =
3362       anv_device_upload_kernel(pipeline->base.device, cache, &upload_params);
3363    if (stage->bin == NULL)
3364       return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
3365 
3366    anv_pipeline_add_executables(&pipeline->base, stage);
3367 
3368    return VK_SUCCESS;
3369 }
3370 
3371 static bool
is_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR * info)3372 is_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR *info)
3373 {
3374    if (info->pDynamicState == NULL)
3375       return false;
3376 
3377    for (unsigned i = 0; i < info->pDynamicState->dynamicStateCount; i++) {
3378       if (info->pDynamicState->pDynamicStates[i] ==
3379           VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR)
3380          return true;
3381    }
3382 
3383    return false;
3384 }
3385 
3386 static void
anv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline * pipeline,const VkRayTracingPipelineCreateInfoKHR * info,uint32_t * stack_max)3387 anv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline *pipeline,
3388                                         const VkRayTracingPipelineCreateInfoKHR *info,
3389                                         uint32_t *stack_max)
3390 {
3391    if (is_rt_stack_size_dynamic(info)) {
3392       pipeline->stack_size = 0; /* 0 means dynamic */
3393    } else {
3394       /* From the Vulkan spec:
3395        *
3396        *    "If the stack size is not set explicitly, the stack size for a
3397        *    pipeline is:
3398        *
3399        *       rayGenStackMax +
3400        *       min(1, maxPipelineRayRecursionDepth) ×
3401        *       max(closestHitStackMax, missStackMax,
3402        *           intersectionStackMax + anyHitStackMax) +
3403        *       max(0, maxPipelineRayRecursionDepth-1) ×
3404        *       max(closestHitStackMax, missStackMax) +
3405        *       2 × callableStackMax"
3406        */
3407       pipeline->stack_size =
3408          stack_max[MESA_SHADER_RAYGEN] +
3409          MIN2(1, info->maxPipelineRayRecursionDepth) *
3410          MAX4(stack_max[MESA_SHADER_CLOSEST_HIT],
3411               stack_max[MESA_SHADER_MISS],
3412               stack_max[MESA_SHADER_INTERSECTION],
3413               stack_max[MESA_SHADER_ANY_HIT]) +
3414          MAX2(0, (int)info->maxPipelineRayRecursionDepth - 1) *
3415          MAX2(stack_max[MESA_SHADER_CLOSEST_HIT],
3416               stack_max[MESA_SHADER_MISS]) +
3417          2 * stack_max[MESA_SHADER_CALLABLE];
3418 
3419       /* This is an extremely unlikely case but we need to set it to some
3420        * non-zero value so that we don't accidentally think it's dynamic.
3421        * Our minimum stack size is 2KB anyway so we could set to any small
3422        * value we like.
3423        */
3424       if (pipeline->stack_size == 0)
3425          pipeline->stack_size = 1;
3426    }
3427 }
3428 
3429 static enum brw_rt_ray_flags
anv_pipeline_get_pipeline_ray_flags(VkPipelineCreateFlags2KHR flags)3430 anv_pipeline_get_pipeline_ray_flags(VkPipelineCreateFlags2KHR flags)
3431 {
3432    uint32_t ray_flags = 0;
3433 
3434    const bool rt_skip_triangles =
3435       flags & VK_PIPELINE_CREATE_2_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR;
3436    const bool rt_skip_aabbs =
3437       flags & VK_PIPELINE_CREATE_2_RAY_TRACING_SKIP_AABBS_BIT_KHR;
3438    assert(!(rt_skip_triangles && rt_skip_aabbs));
3439 
3440    if (rt_skip_triangles)
3441       ray_flags |= BRW_RT_RAY_FLAG_SKIP_TRIANGLES;
3442    else if (rt_skip_aabbs)
3443       ray_flags |= BRW_RT_RAY_FLAG_SKIP_AABBS;
3444 
3445    return ray_flags;
3446 }
3447 
3448 static struct anv_pipeline_stage *
anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline * pipeline,const VkRayTracingPipelineCreateInfoKHR * info,void * tmp_pipeline_ctx)3449 anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline,
3450                                      const VkRayTracingPipelineCreateInfoKHR *info,
3451                                      void *tmp_pipeline_ctx)
3452 {
3453    struct anv_device *device = pipeline->base.device;
3454    /* Create enough stage entries for all shader modules plus potential
3455     * combinaisons in the groups.
3456     */
3457    struct anv_pipeline_stage *stages =
3458       rzalloc_array(tmp_pipeline_ctx, struct anv_pipeline_stage, info->stageCount);
3459 
3460    enum brw_rt_ray_flags ray_flags =
3461       anv_pipeline_get_pipeline_ray_flags(pipeline->base.flags);
3462 
3463    for (uint32_t i = 0; i < info->stageCount; i++) {
3464       const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
3465       if (vk_pipeline_shader_stage_is_null(sinfo))
3466          continue;
3467 
3468       int64_t stage_start = os_time_get_nano();
3469 
3470       stages[i] = (struct anv_pipeline_stage) {
3471          .stage = vk_to_mesa_shader_stage(sinfo->stage),
3472          .pipeline_flags = pipeline->base.flags,
3473          .pipeline_pNext = info->pNext,
3474          .info = sinfo,
3475          .cache_key = {
3476             .stage = vk_to_mesa_shader_stage(sinfo->stage),
3477          },
3478          .feedback = {
3479             .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
3480          },
3481       };
3482 
3483       anv_stage_allocate_bind_map_tables(&pipeline->base, &stages[i],
3484                                          tmp_pipeline_ctx);
3485 
3486       pipeline->base.active_stages |= sinfo->stage;
3487 
3488       anv_stage_write_shader_hash(&stages[i], device);
3489 
3490       populate_bs_prog_key(&stages[i],
3491                            pipeline->base.device,
3492                            ray_flags);
3493 
3494       if (stages[i].stage != MESA_SHADER_INTERSECTION) {
3495          anv_pipeline_hash_ray_tracing_shader(pipeline, &stages[i],
3496                                               stages[i].cache_key.sha1);
3497       }
3498 
3499       stages[i].feedback.duration += os_time_get_nano() - stage_start;
3500    }
3501 
3502    for (uint32_t i = 0; i < info->groupCount; i++) {
3503       const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];
3504 
3505       if (ginfo->type != VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR)
3506          continue;
3507 
3508       int64_t stage_start = os_time_get_nano();
3509 
3510       uint32_t intersection_idx = ginfo->intersectionShader;
3511       assert(intersection_idx < info->stageCount);
3512 
3513       uint32_t any_hit_idx = ginfo->anyHitShader;
3514       if (any_hit_idx != VK_SHADER_UNUSED_KHR) {
3515          assert(any_hit_idx < info->stageCount);
3516          anv_pipeline_hash_ray_tracing_combined_shader(pipeline,
3517                                                        &stages[intersection_idx],
3518                                                        &stages[any_hit_idx],
3519                                                        stages[intersection_idx].cache_key.sha1);
3520       } else {
3521          anv_pipeline_hash_ray_tracing_shader(pipeline,
3522                                               &stages[intersection_idx],
3523                                               stages[intersection_idx].cache_key.sha1);
3524       }
3525 
3526       stages[intersection_idx].feedback.duration += os_time_get_nano() - stage_start;
3527    }
3528 
3529    return stages;
3530 }
3531 
3532 static bool
anv_ray_tracing_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline * pipeline,struct vk_pipeline_cache * cache,const VkRayTracingPipelineCreateInfoKHR * info,struct anv_pipeline_stage * stages,VkPipelineCreationFeedback * pipeline_feedback)3533 anv_ray_tracing_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline *pipeline,
3534                                              struct vk_pipeline_cache *cache,
3535                                              const VkRayTracingPipelineCreateInfoKHR *info,
3536                                              struct anv_pipeline_stage *stages,
3537                                              VkPipelineCreationFeedback *pipeline_feedback)
3538 {
3539    uint32_t shaders = 0, found = 0, cache_hits = 0;
3540    for (uint32_t i = 0; i < info->stageCount; i++) {
3541       if (stages[i].info == NULL)
3542          continue;
3543 
3544       shaders++;
3545 
3546       int64_t stage_start = os_time_get_nano();
3547 
3548       bool cache_hit;
3549       stages[i].bin = anv_device_search_for_kernel(pipeline->base.device, cache,
3550                                                    &stages[i].cache_key,
3551                                                    sizeof(stages[i].cache_key),
3552                                                    &cache_hit);
3553       if (cache_hit) {
3554          cache_hits++;
3555          stages[i].feedback.flags |=
3556             VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
3557       }
3558 
3559       if (stages[i].bin != NULL) {
3560          found++;
3561          anv_pipeline_add_executables(&pipeline->base, &stages[i]);
3562       }
3563 
3564       stages[i].feedback.duration += os_time_get_nano() - stage_start;
3565    }
3566 
3567    if (cache_hits == shaders) {
3568       pipeline_feedback->flags |=
3569          VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
3570    }
3571 
3572    return found == shaders;
3573 }
3574 
3575 static VkResult
anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline * pipeline,void * tmp_pipeline_ctx,struct anv_pipeline_stage * stages,struct vk_pipeline_cache * cache,const VkRayTracingPipelineCreateInfoKHR * info)3576 anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline,
3577                                  void *tmp_pipeline_ctx,
3578                                  struct anv_pipeline_stage *stages,
3579                                  struct vk_pipeline_cache *cache,
3580                                  const VkRayTracingPipelineCreateInfoKHR *info)
3581 {
3582    const struct intel_device_info *devinfo = pipeline->base.device->info;
3583    VkResult result;
3584 
3585    VkPipelineCreationFeedback pipeline_feedback = {
3586       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
3587    };
3588    int64_t pipeline_start = os_time_get_nano();
3589 
3590    const bool skip_cache_lookup =
3591       (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
3592 
3593    if (!skip_cache_lookup &&
3594        anv_ray_tracing_pipeline_load_cached_shaders(pipeline, cache, info, stages,
3595                                                     &pipeline_feedback)) {
3596       goto done;
3597    }
3598 
3599    if (pipeline->base.flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR)
3600       return VK_PIPELINE_COMPILE_REQUIRED;
3601 
3602    for (uint32_t i = 0; i < info->stageCount; i++) {
3603       if (stages[i].info == NULL)
3604          continue;
3605 
3606       /* Intersection and any-hit need to fetch the nir always,
3607        * so that they can be handled correctly below in the group section.
3608        * For the other stages, if we found them in the cache, skip this part.
3609        */
3610       if (!(stages[i].stage == MESA_SHADER_INTERSECTION ||
3611             stages[i].stage == MESA_SHADER_ANY_HIT) &&
3612           stages[i].bin != NULL)
3613          continue;
3614 
3615       int64_t stage_start = os_time_get_nano();
3616 
3617       VkResult result = anv_pipeline_stage_get_nir(&pipeline->base, cache,
3618                                                    tmp_pipeline_ctx,
3619                                                    &stages[i]);
3620       if (result != VK_SUCCESS)
3621          return result;
3622 
3623       anv_pipeline_nir_preprocess(&pipeline->base, &stages[i]);
3624 
3625       anv_pipeline_lower_nir(&pipeline->base, tmp_pipeline_ctx, &stages[i],
3626                              &pipeline->base.layout, 0 /* view_mask */,
3627                              false /* use_primitive_replication */);
3628 
3629       stages[i].feedback.duration += os_time_get_nano() - stage_start;
3630    }
3631 
3632    for (uint32_t i = 0; i < info->stageCount; i++) {
3633       if (stages[i].info == NULL)
3634          continue;
3635 
3636       /* Shader found in cache already. */
3637       if (stages[i].bin != NULL)
3638          continue;
3639 
3640       /* We handle intersection shaders as part of the group */
3641       if (stages[i].stage == MESA_SHADER_INTERSECTION)
3642          continue;
3643 
3644       int64_t stage_start = os_time_get_nano();
3645 
3646       void *tmp_stage_ctx = ralloc_context(tmp_pipeline_ctx);
3647 
3648       nir_shader *nir = nir_shader_clone(tmp_stage_ctx, stages[i].nir);
3649       switch (stages[i].stage) {
3650       case MESA_SHADER_RAYGEN:
3651          brw_nir_lower_raygen(nir);
3652          break;
3653 
3654       case MESA_SHADER_ANY_HIT:
3655          brw_nir_lower_any_hit(nir, devinfo);
3656          break;
3657 
3658       case MESA_SHADER_CLOSEST_HIT:
3659          brw_nir_lower_closest_hit(nir);
3660          break;
3661 
3662       case MESA_SHADER_MISS:
3663          brw_nir_lower_miss(nir);
3664          break;
3665 
3666       case MESA_SHADER_INTERSECTION:
3667          unreachable("These are handled later");
3668 
3669       case MESA_SHADER_CALLABLE:
3670          brw_nir_lower_callable(nir);
3671          break;
3672 
3673       default:
3674          unreachable("Invalid ray-tracing shader stage");
3675       }
3676 
3677       result = compile_upload_rt_shader(pipeline, cache, nir, &stages[i],
3678                                         tmp_stage_ctx);
3679       if (result != VK_SUCCESS) {
3680          ralloc_free(tmp_stage_ctx);
3681          return result;
3682       }
3683 
3684       ralloc_free(tmp_stage_ctx);
3685 
3686       stages[i].feedback.duration += os_time_get_nano() - stage_start;
3687    }
3688 
3689  done:
3690    for (uint32_t i = 0; i < info->groupCount; i++) {
3691       const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];
3692       struct anv_rt_shader_group *group = &pipeline->groups[i];
3693       group->type = ginfo->type;
3694       switch (ginfo->type) {
3695       case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
3696          assert(ginfo->generalShader < info->stageCount);
3697          group->general = stages[ginfo->generalShader].bin;
3698          break;
3699 
3700       case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
3701          if (ginfo->anyHitShader < info->stageCount)
3702             group->any_hit = stages[ginfo->anyHitShader].bin;
3703 
3704          if (ginfo->closestHitShader < info->stageCount)
3705             group->closest_hit = stages[ginfo->closestHitShader].bin;
3706          break;
3707 
3708       case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: {
3709          if (ginfo->closestHitShader < info->stageCount)
3710             group->closest_hit = stages[ginfo->closestHitShader].bin;
3711 
3712          uint32_t intersection_idx = info->pGroups[i].intersectionShader;
3713          assert(intersection_idx < info->stageCount);
3714 
3715          /* Only compile this stage if not already found in the cache. */
3716          if (stages[intersection_idx].bin == NULL) {
3717             /* The any-hit and intersection shader have to be combined */
3718             uint32_t any_hit_idx = info->pGroups[i].anyHitShader;
3719             const nir_shader *any_hit = NULL;
3720             if (any_hit_idx < info->stageCount)
3721                any_hit = stages[any_hit_idx].nir;
3722 
3723             void *tmp_group_ctx = ralloc_context(tmp_pipeline_ctx);
3724             nir_shader *intersection =
3725                nir_shader_clone(tmp_group_ctx, stages[intersection_idx].nir);
3726 
3727             brw_nir_lower_combined_intersection_any_hit(intersection, any_hit,
3728                                                         devinfo);
3729 
3730             result = compile_upload_rt_shader(pipeline, cache,
3731                                               intersection,
3732                                               &stages[intersection_idx],
3733                                               tmp_group_ctx);
3734             ralloc_free(tmp_group_ctx);
3735             if (result != VK_SUCCESS)
3736                return result;
3737          }
3738 
3739          group->intersection = stages[intersection_idx].bin;
3740          break;
3741       }
3742 
3743       default:
3744          unreachable("Invalid ray tracing shader group type");
3745       }
3746    }
3747 
3748    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
3749 
3750    const VkPipelineCreationFeedbackCreateInfo *create_feedback =
3751       vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
3752    if (create_feedback) {
3753       *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
3754 
3755       uint32_t stage_count = create_feedback->pipelineStageCreationFeedbackCount;
3756       assert(stage_count == 0 || info->stageCount == stage_count);
3757       for (uint32_t i = 0; i < stage_count; i++) {
3758          gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
3759          create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
3760       }
3761    }
3762 
3763    return VK_SUCCESS;
3764 }
3765 
3766 VkResult
anv_device_init_rt_shaders(struct anv_device * device)3767 anv_device_init_rt_shaders(struct anv_device *device)
3768 {
3769    device->bvh_build_method = ANV_BVH_BUILD_METHOD_NEW_SAH;
3770 
3771    if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
3772       return VK_SUCCESS;
3773 
3774    bool cache_hit;
3775 
3776    struct anv_push_descriptor_info empty_push_desc_info = {};
3777    struct anv_pipeline_bind_map empty_bind_map = {};
3778    struct brw_rt_trampoline {
3779       char name[16];
3780       struct brw_cs_prog_key key;
3781    } trampoline_key = {
3782       .name = "rt-trampoline",
3783    };
3784    device->rt_trampoline =
3785       anv_device_search_for_kernel(device, device->internal_cache,
3786                                    &trampoline_key, sizeof(trampoline_key),
3787                                    &cache_hit);
3788    if (device->rt_trampoline == NULL) {
3789 
3790       void *tmp_ctx = ralloc_context(NULL);
3791       nir_shader *trampoline_nir =
3792          brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx);
3793 
3794       if (device->info->ver >= 20)
3795          trampoline_nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_16;
3796       else
3797          trampoline_nir->info.subgroup_size = SUBGROUP_SIZE_REQUIRE_8;
3798 
3799       struct brw_cs_prog_data trampoline_prog_data = {
3800          .uses_btd_stack_ids = true,
3801       };
3802       struct brw_compile_cs_params params = {
3803          .base = {
3804             .nir = trampoline_nir,
3805             .log_data = device,
3806             .mem_ctx = tmp_ctx,
3807          },
3808          .key = &trampoline_key.key,
3809          .prog_data = &trampoline_prog_data,
3810       };
3811       const unsigned *tramp_data =
3812          brw_compile_cs(device->physical->compiler, &params);
3813 
3814       struct anv_shader_upload_params upload_params = {
3815          .stage               = MESA_SHADER_COMPUTE,
3816          .key_data            = &trampoline_key,
3817          .key_size            = sizeof(trampoline_key),
3818          .kernel_data         = tramp_data,
3819          .kernel_size         = trampoline_prog_data.base.program_size,
3820          .prog_data           = &trampoline_prog_data.base,
3821          .prog_data_size      = sizeof(trampoline_prog_data),
3822          .bind_map            = &empty_bind_map,
3823          .push_desc_info      = &empty_push_desc_info,
3824       };
3825 
3826       device->rt_trampoline =
3827          anv_device_upload_kernel(device, device->internal_cache,
3828                                   &upload_params);
3829 
3830       ralloc_free(tmp_ctx);
3831 
3832       if (device->rt_trampoline == NULL)
3833          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3834    }
3835 
3836    /* The cache already has a reference and it's not going anywhere so there
3837     * is no need to hold a second reference.
3838     */
3839    anv_shader_bin_unref(device, device->rt_trampoline);
3840 
3841    struct brw_rt_trivial_return {
3842       char name[16];
3843       struct brw_bs_prog_key key;
3844    } return_key = {
3845       .name = "rt-trivial-ret",
3846    };
3847    device->rt_trivial_return =
3848       anv_device_search_for_kernel(device, device->internal_cache,
3849                                    &return_key, sizeof(return_key),
3850                                    &cache_hit);
3851    if (device->rt_trivial_return == NULL) {
3852       void *tmp_ctx = ralloc_context(NULL);
3853       nir_shader *trivial_return_nir =
3854          brw_nir_create_trivial_return_shader(device->physical->compiler, tmp_ctx);
3855 
3856       NIR_PASS_V(trivial_return_nir, brw_nir_lower_rt_intrinsics, device->info);
3857 
3858       struct brw_bs_prog_data return_prog_data = { 0, };
3859       struct brw_compile_bs_params params = {
3860          .base = {
3861             .nir = trivial_return_nir,
3862             .log_data = device,
3863             .mem_ctx = tmp_ctx,
3864          },
3865          .key = &return_key.key,
3866          .prog_data = &return_prog_data,
3867       };
3868       const unsigned *return_data =
3869          brw_compile_bs(device->physical->compiler, &params);
3870 
3871       struct anv_shader_upload_params upload_params = {
3872          .stage               = MESA_SHADER_CALLABLE,
3873          .key_data            = &return_key,
3874          .key_size            = sizeof(return_key),
3875          .kernel_data         = return_data,
3876          .kernel_size         = return_prog_data.base.program_size,
3877          .prog_data           = &return_prog_data.base,
3878          .prog_data_size      = sizeof(return_prog_data),
3879          .bind_map            = &empty_bind_map,
3880          .push_desc_info      = &empty_push_desc_info,
3881       };
3882 
3883       device->rt_trivial_return =
3884          anv_device_upload_kernel(device, device->internal_cache,
3885                                   &upload_params);
3886 
3887       ralloc_free(tmp_ctx);
3888 
3889       if (device->rt_trivial_return == NULL)
3890          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3891    }
3892 
3893    /* The cache already has a reference and it's not going anywhere so there
3894     * is no need to hold a second reference.
3895     */
3896    anv_shader_bin_unref(device, device->rt_trivial_return);
3897 
3898    return VK_SUCCESS;
3899 }
3900 
3901 void
anv_device_finish_rt_shaders(struct anv_device * device)3902 anv_device_finish_rt_shaders(struct anv_device *device)
3903 {
3904    if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
3905       return;
3906 }
3907 
3908 static void
anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline * pipeline,struct anv_device * device,struct vk_pipeline_cache * cache,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,const VkAllocationCallbacks * alloc)3909 anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline,
3910                               struct anv_device *device,
3911                               struct vk_pipeline_cache *cache,
3912                               const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
3913                               const VkAllocationCallbacks *alloc)
3914 {
3915    util_dynarray_init(&pipeline->shaders, pipeline->base.mem_ctx);
3916 
3917    ANV_FROM_HANDLE(anv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
3918    anv_pipeline_init_layout(&pipeline->base, pipeline_layout);
3919 
3920    anv_pipeline_setup_l3_config(&pipeline->base, /* needs_slm */ false);
3921 }
3922 
3923 static void
assert_rt_stage_index_valid(const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,uint32_t stage_idx,VkShaderStageFlags valid_stages)3924 assert_rt_stage_index_valid(const VkRayTracingPipelineCreateInfoKHR* pCreateInfo,
3925                             uint32_t stage_idx,
3926                             VkShaderStageFlags valid_stages)
3927 {
3928    if (stage_idx == VK_SHADER_UNUSED_KHR)
3929       return;
3930 
3931    assert(stage_idx <= pCreateInfo->stageCount);
3932    assert(util_bitcount(pCreateInfo->pStages[stage_idx].stage) == 1);
3933    assert(pCreateInfo->pStages[stage_idx].stage & valid_stages);
3934 }
3935 
3936 static VkResult
anv_ray_tracing_pipeline_create(VkDevice _device,struct vk_pipeline_cache * cache,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)3937 anv_ray_tracing_pipeline_create(
3938     VkDevice                                    _device,
3939     struct vk_pipeline_cache *                  cache,
3940     const VkRayTracingPipelineCreateInfoKHR*    pCreateInfo,
3941     const VkAllocationCallbacks*                pAllocator,
3942     VkPipeline*                                 pPipeline)
3943 {
3944    ANV_FROM_HANDLE(anv_device, device, _device);
3945    VkResult result;
3946 
3947    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR);
3948 
3949    uint32_t group_count = pCreateInfo->groupCount;
3950    if (pCreateInfo->pLibraryInfo) {
3951       for (uint32_t l = 0; l < pCreateInfo->pLibraryInfo->libraryCount; l++) {
3952          ANV_FROM_HANDLE(anv_pipeline, library,
3953                          pCreateInfo->pLibraryInfo->pLibraries[l]);
3954          struct anv_ray_tracing_pipeline *rt_library =
3955             anv_pipeline_to_ray_tracing(library);
3956          group_count += rt_library->group_count;
3957       }
3958    }
3959 
3960    VK_MULTIALLOC(ma);
3961    VK_MULTIALLOC_DECL(&ma, struct anv_ray_tracing_pipeline, pipeline, 1);
3962    VK_MULTIALLOC_DECL(&ma, struct anv_rt_shader_group, groups, group_count);
3963    if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator,
3964                               VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
3965       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3966 
3967    result = anv_pipeline_init(&pipeline->base, device,
3968                               ANV_PIPELINE_RAY_TRACING,
3969                               vk_rt_pipeline_create_flags(pCreateInfo),
3970                               pAllocator);
3971    if (result != VK_SUCCESS) {
3972       vk_free2(&device->vk.alloc, pAllocator, pipeline);
3973       return result;
3974    }
3975 
3976    pipeline->group_count = group_count;
3977    pipeline->groups = groups;
3978 
3979    ASSERTED const VkShaderStageFlags ray_tracing_stages =
3980       VK_SHADER_STAGE_RAYGEN_BIT_KHR |
3981       VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
3982       VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
3983       VK_SHADER_STAGE_MISS_BIT_KHR |
3984       VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
3985       VK_SHADER_STAGE_CALLABLE_BIT_KHR;
3986 
3987    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
3988       assert((pCreateInfo->pStages[i].stage & ~ray_tracing_stages) == 0);
3989 
3990    for (uint32_t i = 0; i < pCreateInfo->groupCount; i++) {
3991       const VkRayTracingShaderGroupCreateInfoKHR *ginfo =
3992          &pCreateInfo->pGroups[i];
3993       assert_rt_stage_index_valid(pCreateInfo, ginfo->generalShader,
3994                                   VK_SHADER_STAGE_RAYGEN_BIT_KHR |
3995                                   VK_SHADER_STAGE_MISS_BIT_KHR |
3996                                   VK_SHADER_STAGE_CALLABLE_BIT_KHR);
3997       assert_rt_stage_index_valid(pCreateInfo, ginfo->closestHitShader,
3998                                   VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR);
3999       assert_rt_stage_index_valid(pCreateInfo, ginfo->anyHitShader,
4000                                   VK_SHADER_STAGE_ANY_HIT_BIT_KHR);
4001       assert_rt_stage_index_valid(pCreateInfo, ginfo->intersectionShader,
4002                                   VK_SHADER_STAGE_INTERSECTION_BIT_KHR);
4003       switch (ginfo->type) {
4004       case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
4005          assert(ginfo->generalShader < pCreateInfo->stageCount);
4006          assert(ginfo->anyHitShader == VK_SHADER_UNUSED_KHR);
4007          assert(ginfo->closestHitShader == VK_SHADER_UNUSED_KHR);
4008          assert(ginfo->intersectionShader == VK_SHADER_UNUSED_KHR);
4009          break;
4010 
4011       case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
4012          assert(ginfo->generalShader == VK_SHADER_UNUSED_KHR);
4013          assert(ginfo->intersectionShader == VK_SHADER_UNUSED_KHR);
4014          break;
4015 
4016       case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR:
4017          assert(ginfo->generalShader == VK_SHADER_UNUSED_KHR);
4018          break;
4019 
4020       default:
4021          unreachable("Invalid ray-tracing shader group type");
4022       }
4023    }
4024 
4025    anv_ray_tracing_pipeline_init(pipeline, device, cache,
4026                                  pCreateInfo, pAllocator);
4027 
4028    void *tmp_ctx = ralloc_context(NULL);
4029 
4030    struct anv_pipeline_stage *stages =
4031       anv_pipeline_init_ray_tracing_stages(pipeline, pCreateInfo, tmp_ctx);
4032 
4033    result = anv_pipeline_compile_ray_tracing(pipeline, tmp_ctx, stages,
4034                                              cache, pCreateInfo);
4035    if (result != VK_SUCCESS) {
4036       for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
4037          if (stages[i].bin != NULL)
4038             anv_shader_bin_unref(device, stages[i].bin);
4039       }
4040       ralloc_free(tmp_ctx);
4041       anv_pipeline_finish(&pipeline->base, device);
4042       vk_free2(&device->vk.alloc, pAllocator, pipeline);
4043       return result;
4044    }
4045 
4046    /* Compute the size of the scratch BO (for register spilling) by taking the
4047     * max of all the shaders in the pipeline. Also add the shaders to the list
4048     * of executables.
4049     */
4050    uint32_t stack_max[MESA_VULKAN_SHADER_STAGES] = {};
4051    for (uint32_t s = 0; s < pCreateInfo->stageCount; s++) {
4052       util_dynarray_append(&pipeline->shaders,
4053                            struct anv_shader_bin *,
4054                            stages[s].bin);
4055 
4056       uint32_t stack_size =
4057          brw_bs_prog_data_const(stages[s].bin->prog_data)->max_stack_size;
4058       stack_max[stages[s].stage] = MAX2(stack_max[stages[s].stage], stack_size);
4059 
4060       anv_pipeline_account_shader(&pipeline->base, stages[s].bin);
4061    }
4062 
4063    anv_pipeline_compute_ray_tracing_stacks(pipeline, pCreateInfo, stack_max);
4064 
4065    if (pCreateInfo->pLibraryInfo) {
4066       uint32_t g = pCreateInfo->groupCount;
4067       for (uint32_t l = 0; l < pCreateInfo->pLibraryInfo->libraryCount; l++) {
4068          ANV_FROM_HANDLE(anv_pipeline, library,
4069                          pCreateInfo->pLibraryInfo->pLibraries[l]);
4070          struct anv_ray_tracing_pipeline *rt_library =
4071             anv_pipeline_to_ray_tracing(library);
4072          for (uint32_t lg = 0; lg < rt_library->group_count; lg++) {
4073             pipeline->groups[g] = rt_library->groups[lg];
4074             pipeline->groups[g].imported = true;
4075             g++;
4076          }
4077 
4078          /* Account for shaders in the library. */
4079          util_dynarray_foreach(&rt_library->shaders,
4080                                struct anv_shader_bin *, shader) {
4081             util_dynarray_append(&pipeline->shaders,
4082                                  struct anv_shader_bin *,
4083                                  anv_shader_bin_ref(*shader));
4084             anv_pipeline_account_shader(&pipeline->base, *shader);
4085          }
4086 
4087          /* Add the library shaders to this pipeline's executables. */
4088          util_dynarray_foreach(&rt_library->base.executables,
4089                                struct anv_pipeline_executable, exe) {
4090             util_dynarray_append(&pipeline->base.executables,
4091                                  struct anv_pipeline_executable, *exe);
4092          }
4093 
4094          pipeline->base.active_stages |= rt_library->base.active_stages;
4095       }
4096    }
4097 
4098    anv_genX(device->info, ray_tracing_pipeline_emit)(pipeline);
4099 
4100    ralloc_free(tmp_ctx);
4101 
4102    ANV_RMV(rt_pipeline_create, device, pipeline, false);
4103 
4104    *pPipeline = anv_pipeline_to_handle(&pipeline->base);
4105 
4106    return pipeline->base.batch.status;
4107 }
4108 
4109 VkResult
anv_CreateRayTracingPipelinesKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,VkPipelineCache pipelineCache,uint32_t createInfoCount,const VkRayTracingPipelineCreateInfoKHR * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)4110 anv_CreateRayTracingPipelinesKHR(
4111     VkDevice                                    _device,
4112     VkDeferredOperationKHR                      deferredOperation,
4113     VkPipelineCache                             pipelineCache,
4114     uint32_t                                    createInfoCount,
4115     const VkRayTracingPipelineCreateInfoKHR*    pCreateInfos,
4116     const VkAllocationCallbacks*                pAllocator,
4117     VkPipeline*                                 pPipelines)
4118 {
4119    ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache);
4120 
4121    VkResult result = VK_SUCCESS;
4122 
4123    unsigned i;
4124    for (i = 0; i < createInfoCount; i++) {
4125       const VkPipelineCreateFlags2KHR flags =
4126          vk_rt_pipeline_create_flags(&pCreateInfos[i]);
4127       VkResult res = anv_ray_tracing_pipeline_create(_device, pipeline_cache,
4128                                                      &pCreateInfos[i],
4129                                                      pAllocator, &pPipelines[i]);
4130 
4131       if (res != VK_SUCCESS) {
4132          result = res;
4133          if (flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
4134             break;
4135          pPipelines[i] = VK_NULL_HANDLE;
4136       }
4137    }
4138 
4139    for (; i < createInfoCount; i++)
4140       pPipelines[i] = VK_NULL_HANDLE;
4141 
4142    return result;
4143 }
4144 
4145 #define WRITE_STR(field, ...) ({                               \
4146    memset(field, 0, sizeof(field));                            \
4147    UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \
4148    assert(i > 0 && i < sizeof(field));                         \
4149 })
4150 
anv_GetPipelineExecutablePropertiesKHR(VkDevice device,const VkPipelineInfoKHR * pPipelineInfo,uint32_t * pExecutableCount,VkPipelineExecutablePropertiesKHR * pProperties)4151 VkResult anv_GetPipelineExecutablePropertiesKHR(
4152     VkDevice                                    device,
4153     const VkPipelineInfoKHR*                    pPipelineInfo,
4154     uint32_t*                                   pExecutableCount,
4155     VkPipelineExecutablePropertiesKHR*          pProperties)
4156 {
4157    ANV_FROM_HANDLE(anv_pipeline, pipeline, pPipelineInfo->pipeline);
4158    VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out,
4159                           pProperties, pExecutableCount);
4160 
4161    util_dynarray_foreach (&pipeline->executables, struct anv_pipeline_executable, exe) {
4162       vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) {
4163          gl_shader_stage stage = exe->stage;
4164          props->stages = mesa_to_vk_shader_stage(stage);
4165 
4166          unsigned simd_width = exe->stats.dispatch_width;
4167          if (stage == MESA_SHADER_FRAGMENT) {
4168             if (exe->stats.max_polygons > 1)
4169                WRITE_STR(props->name, "SIMD%dx%d %s",
4170                          exe->stats.max_polygons,
4171                          simd_width / exe->stats.max_polygons,
4172                          _mesa_shader_stage_to_string(stage));
4173             else
4174                WRITE_STR(props->name, "%s%d %s",
4175                          simd_width ? "SIMD" : "vec",
4176                          simd_width ? simd_width : 4,
4177                          _mesa_shader_stage_to_string(stage));
4178          } else {
4179             WRITE_STR(props->name, "%s", _mesa_shader_stage_to_string(stage));
4180          }
4181          WRITE_STR(props->description, "%s%d %s shader",
4182                    simd_width ? "SIMD" : "vec",
4183                    simd_width ? simd_width : 4,
4184                    _mesa_shader_stage_to_string(stage));
4185 
4186          /* The compiler gives us a dispatch width of 0 for vec4 but Vulkan
4187           * wants a subgroup size of 1.
4188           */
4189          props->subgroupSize = MAX2(simd_width, 1);
4190       }
4191    }
4192 
4193    return vk_outarray_status(&out);
4194 }
4195 
4196 static const struct anv_pipeline_executable *
anv_pipeline_get_executable(struct anv_pipeline * pipeline,uint32_t index)4197 anv_pipeline_get_executable(struct anv_pipeline *pipeline, uint32_t index)
4198 {
4199    assert(index < util_dynarray_num_elements(&pipeline->executables,
4200                                              struct anv_pipeline_executable));
4201    return util_dynarray_element(
4202       &pipeline->executables, struct anv_pipeline_executable, index);
4203 }
4204 
anv_GetPipelineExecutableStatisticsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pStatisticCount,VkPipelineExecutableStatisticKHR * pStatistics)4205 VkResult anv_GetPipelineExecutableStatisticsKHR(
4206     VkDevice                                    device,
4207     const VkPipelineExecutableInfoKHR*          pExecutableInfo,
4208     uint32_t*                                   pStatisticCount,
4209     VkPipelineExecutableStatisticKHR*           pStatistics)
4210 {
4211    ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
4212    VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out,
4213                           pStatistics, pStatisticCount);
4214 
4215    const struct anv_pipeline_executable *exe =
4216       anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
4217 
4218    const struct brw_stage_prog_data *prog_data;
4219    switch (pipeline->type) {
4220    case ANV_PIPELINE_GRAPHICS:
4221    case ANV_PIPELINE_GRAPHICS_LIB: {
4222       prog_data = anv_pipeline_to_graphics_base(pipeline)->shaders[exe->stage]->prog_data;
4223       break;
4224    }
4225    case ANV_PIPELINE_COMPUTE: {
4226       prog_data = anv_pipeline_to_compute(pipeline)->cs->prog_data;
4227       break;
4228    }
4229    case ANV_PIPELINE_RAY_TRACING: {
4230       struct anv_shader_bin **shader =
4231          util_dynarray_element(&anv_pipeline_to_ray_tracing(pipeline)->shaders,
4232                                struct anv_shader_bin *,
4233                                pExecutableInfo->executableIndex);
4234       prog_data = (*shader)->prog_data;
4235       break;
4236    }
4237    default:
4238       unreachable("invalid pipeline type");
4239    }
4240 
4241    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
4242       WRITE_STR(stat->name, "Instruction Count");
4243       WRITE_STR(stat->description,
4244                 "Number of GEN instructions in the final generated "
4245                 "shader executable.");
4246       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
4247       stat->value.u64 = exe->stats.instructions;
4248    }
4249 
4250    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
4251       WRITE_STR(stat->name, "SEND Count");
4252       WRITE_STR(stat->description,
4253                 "Number of instructions in the final generated shader "
4254                 "executable which access external units such as the "
4255                 "constant cache or the sampler.");
4256       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
4257       stat->value.u64 = exe->stats.sends;
4258    }
4259 
4260    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
4261       WRITE_STR(stat->name, "Loop Count");
4262       WRITE_STR(stat->description,
4263                 "Number of loops (not unrolled) in the final generated "
4264                 "shader executable.");
4265       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
4266       stat->value.u64 = exe->stats.loops;
4267    }
4268 
4269    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
4270       WRITE_STR(stat->name, "Cycle Count");
4271       WRITE_STR(stat->description,
4272                 "Estimate of the number of EU cycles required to execute "
4273                 "the final generated executable.  This is an estimate only "
4274                 "and may vary greatly from actual run-time performance.");
4275       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
4276       stat->value.u64 = exe->stats.cycles;
4277    }
4278 
4279    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
4280       WRITE_STR(stat->name, "Spill Count");
4281       WRITE_STR(stat->description,
4282                 "Number of scratch spill operations.  This gives a rough "
4283                 "estimate of the cost incurred due to spilling temporary "
4284                 "values to memory.  If this is non-zero, you may want to "
4285                 "adjust your shader to reduce register pressure.");
4286       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
4287       stat->value.u64 = exe->stats.spills;
4288    }
4289 
4290    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
4291       WRITE_STR(stat->name, "Fill Count");
4292       WRITE_STR(stat->description,
4293                 "Number of scratch fill operations.  This gives a rough "
4294                 "estimate of the cost incurred due to spilling temporary "
4295                 "values to memory.  If this is non-zero, you may want to "
4296                 "adjust your shader to reduce register pressure.");
4297       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
4298       stat->value.u64 = exe->stats.fills;
4299    }
4300 
4301    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
4302       WRITE_STR(stat->name, "Scratch Memory Size");
4303       WRITE_STR(stat->description,
4304                 "Number of bytes of scratch memory required by the "
4305                 "generated shader executable.  If this is non-zero, you "
4306                 "may want to adjust your shader to reduce register "
4307                 "pressure.");
4308       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
4309       stat->value.u64 = prog_data->total_scratch;
4310    }
4311 
4312    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
4313       WRITE_STR(stat->name, "Max dispatch width");
4314       WRITE_STR(stat->description,
4315                 "Largest SIMD dispatch width.");
4316       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
4317       /* Report the max dispatch width only on the smallest SIMD variant */
4318       if (exe->stage != MESA_SHADER_FRAGMENT || exe->stats.dispatch_width == 8)
4319          stat->value.u64 = exe->stats.max_dispatch_width;
4320       else
4321          stat->value.u64 = 0;
4322    }
4323 
4324    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
4325       WRITE_STR(stat->name, "Max live registers");
4326       WRITE_STR(stat->description,
4327                 "Maximum number of registers used across the entire shader.");
4328       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
4329       stat->value.u64 = exe->stats.max_live_registers;
4330    }
4331 
4332    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
4333       WRITE_STR(stat->name, "Workgroup Memory Size");
4334       WRITE_STR(stat->description,
4335                 "Number of bytes of workgroup shared memory used by this "
4336                 "shader including any padding.");
4337       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
4338       if (gl_shader_stage_uses_workgroup(exe->stage))
4339          stat->value.u64 = prog_data->total_shared;
4340       else
4341          stat->value.u64 = 0;
4342    }
4343 
4344    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
4345       uint32_t hash = pipeline->type == ANV_PIPELINE_COMPUTE ?
4346                       anv_pipeline_to_compute(pipeline)->source_hash :
4347                       (pipeline->type == ANV_PIPELINE_GRAPHICS_LIB ||
4348                        pipeline->type == ANV_PIPELINE_GRAPHICS) ?
4349                       anv_pipeline_to_graphics_base(pipeline)->source_hashes[exe->stage] :
4350                       0 /* No source hash for ray tracing */;
4351       WRITE_STR(stat->name, "Source hash");
4352       WRITE_STR(stat->description,
4353                 "hash = 0x%08x. Hash generated from shader source.", hash);
4354       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
4355       stat->value.u64 = hash;
4356    }
4357 
4358    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
4359       WRITE_STR(stat->name, "Non SSA regs after NIR");
4360       WRITE_STR(stat->description, "Non SSA regs after NIR translation to BRW.");
4361       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
4362       stat->value.u64 = exe->stats.non_ssa_registers_after_nir;
4363    }
4364 
4365    return vk_outarray_status(&out);
4366 }
4367 
4368 static bool
write_ir_text(VkPipelineExecutableInternalRepresentationKHR * ir,const char * data)4369 write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
4370               const char *data)
4371 {
4372    ir->isText = VK_TRUE;
4373 
4374    size_t data_len = strlen(data) + 1;
4375 
4376    if (ir->pData == NULL) {
4377       ir->dataSize = data_len;
4378       return true;
4379    }
4380 
4381    strncpy(ir->pData, data, ir->dataSize);
4382    if (ir->dataSize < data_len)
4383       return false;
4384 
4385    ir->dataSize = data_len;
4386    return true;
4387 }
4388 
anv_GetPipelineExecutableInternalRepresentationsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pInternalRepresentationCount,VkPipelineExecutableInternalRepresentationKHR * pInternalRepresentations)4389 VkResult anv_GetPipelineExecutableInternalRepresentationsKHR(
4390     VkDevice                                    device,
4391     const VkPipelineExecutableInfoKHR*          pExecutableInfo,
4392     uint32_t*                                   pInternalRepresentationCount,
4393     VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)
4394 {
4395    ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
4396    VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out,
4397                           pInternalRepresentations, pInternalRepresentationCount);
4398    bool incomplete_text = false;
4399 
4400    const struct anv_pipeline_executable *exe =
4401       anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
4402 
4403    if (exe->nir) {
4404       vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) {
4405          WRITE_STR(ir->name, "Final NIR");
4406          WRITE_STR(ir->description,
4407                    "Final NIR before going into the back-end compiler");
4408 
4409          if (!write_ir_text(ir, exe->nir))
4410             incomplete_text = true;
4411       }
4412    }
4413 
4414    if (exe->disasm) {
4415       vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) {
4416          WRITE_STR(ir->name, "GEN Assembly");
4417          WRITE_STR(ir->description,
4418                    "Final GEN assembly for the generated shader binary");
4419 
4420          if (!write_ir_text(ir, exe->disasm))
4421             incomplete_text = true;
4422       }
4423    }
4424 
4425    return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
4426 }
4427 
4428 VkResult
anv_GetRayTracingShaderGroupHandlesKHR(VkDevice _device,VkPipeline _pipeline,uint32_t firstGroup,uint32_t groupCount,size_t dataSize,void * pData)4429 anv_GetRayTracingShaderGroupHandlesKHR(
4430     VkDevice                                    _device,
4431     VkPipeline                                  _pipeline,
4432     uint32_t                                    firstGroup,
4433     uint32_t                                    groupCount,
4434     size_t                                      dataSize,
4435     void*                                       pData)
4436 {
4437    ANV_FROM_HANDLE(anv_device, device, _device);
4438    ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
4439 
4440    if (pipeline->type != ANV_PIPELINE_RAY_TRACING)
4441       return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
4442 
4443    struct anv_ray_tracing_pipeline *rt_pipeline =
4444       anv_pipeline_to_ray_tracing(pipeline);
4445 
4446    assert(firstGroup + groupCount <= rt_pipeline->group_count);
4447    for (uint32_t i = 0; i < groupCount; i++) {
4448       struct anv_rt_shader_group *group = &rt_pipeline->groups[firstGroup + i];
4449       memcpy(pData, group->handle, sizeof(group->handle));
4450       pData += sizeof(group->handle);
4451    }
4452 
4453    return VK_SUCCESS;
4454 }
4455 
4456 VkResult
anv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(VkDevice _device,VkPipeline pipeline,uint32_t firstGroup,uint32_t groupCount,size_t dataSize,void * pData)4457 anv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(
4458     VkDevice                                    _device,
4459     VkPipeline                                  pipeline,
4460     uint32_t                                    firstGroup,
4461     uint32_t                                    groupCount,
4462     size_t                                      dataSize,
4463     void*                                       pData)
4464 {
4465    ANV_FROM_HANDLE(anv_device, device, _device);
4466    unreachable("Unimplemented");
4467    return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
4468 }
4469 
4470 VkDeviceSize
anv_GetRayTracingShaderGroupStackSizeKHR(VkDevice device,VkPipeline _pipeline,uint32_t group,VkShaderGroupShaderKHR groupShader)4471 anv_GetRayTracingShaderGroupStackSizeKHR(
4472     VkDevice                                    device,
4473     VkPipeline                                  _pipeline,
4474     uint32_t                                    group,
4475     VkShaderGroupShaderKHR                      groupShader)
4476 {
4477    ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
4478    assert(pipeline->type == ANV_PIPELINE_RAY_TRACING);
4479 
4480    struct anv_ray_tracing_pipeline *rt_pipeline =
4481       anv_pipeline_to_ray_tracing(pipeline);
4482 
4483    assert(group < rt_pipeline->group_count);
4484 
4485    struct anv_shader_bin *bin;
4486    switch (groupShader) {
4487    case VK_SHADER_GROUP_SHADER_GENERAL_KHR:
4488       bin = rt_pipeline->groups[group].general;
4489       break;
4490 
4491    case VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR:
4492       bin = rt_pipeline->groups[group].closest_hit;
4493       break;
4494 
4495    case VK_SHADER_GROUP_SHADER_ANY_HIT_KHR:
4496       bin = rt_pipeline->groups[group].any_hit;
4497       break;
4498 
4499    case VK_SHADER_GROUP_SHADER_INTERSECTION_KHR:
4500       bin = rt_pipeline->groups[group].intersection;
4501       break;
4502 
4503    default:
4504       unreachable("Invalid VkShaderGroupShader enum");
4505    }
4506 
4507    if (bin == NULL)
4508       return 0;
4509 
4510    return brw_bs_prog_data_const(bin->prog_data)->max_stack_size;
4511 }
4512