• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29 
30 #include "util/mesa-sha1.h"
31 #include "util/os_time.h"
32 #include "common/intel_l3_config.h"
33 #include "common/intel_sample_positions.h"
34 #include "compiler/elk/elk_disasm.h"
35 #include "anv_private.h"
36 #include "compiler/elk/elk_nir.h"
37 #include "compiler/intel_nir.h"
38 #include "anv_nir.h"
39 #include "nir/nir_xfb_info.h"
40 #include "spirv/nir_spirv.h"
41 #include "vk_pipeline.h"
42 #include "vk_render_pass.h"
43 #include "vk_util.h"
44 
45 /* Eventually, this will become part of anv_CreateShader.  Unfortunately,
46  * we can't do that yet because we don't have the ability to copy nir.
47  */
48 static nir_shader *
anv_shader_stage_to_nir(struct anv_device * device,const VkPipelineShaderStageCreateInfo * stage_info,enum elk_robustness_flags robust_flags,void * mem_ctx)49 anv_shader_stage_to_nir(struct anv_device *device,
50                         const VkPipelineShaderStageCreateInfo *stage_info,
51                         enum elk_robustness_flags robust_flags,
52                         void *mem_ctx)
53 {
54    const struct anv_physical_device *pdevice = device->physical;
55    const struct anv_instance *instance = pdevice->instance;
56    const struct elk_compiler *compiler = pdevice->compiler;
57    gl_shader_stage stage = vk_to_mesa_shader_stage(stage_info->stage);
58    const nir_shader_compiler_options *nir_options =
59       compiler->nir_options[stage];
60 
61    const struct spirv_to_nir_options spirv_options = {
62       .caps = {
63          .demote_to_helper_invocation = true,
64          .derivative_group = true,
65          .descriptor_array_dynamic_indexing = true,
66          .descriptor_array_non_uniform_indexing = true,
67          .descriptor_indexing = true,
68          .device_group = true,
69          .draw_parameters = true,
70          .float16 = pdevice->info.ver >= 8,
71          .float32_atomic_add = pdevice->info.has_lsc,
72          .float32_atomic_min_max = pdevice->info.ver >= 9,
73          .float64 = pdevice->info.ver >= 8,
74          .float64_atomic_min_max = pdevice->info.has_lsc,
75          .fragment_shader_sample_interlock = pdevice->info.ver >= 9,
76          .fragment_shader_pixel_interlock = pdevice->info.ver >= 9,
77          .geometry_streams = true,
78          /* When using Vulkan 1.3 or KHR_format_feature_flags2 is enabled, the
79           * read/write without format is per format, so just report true. It's
80           * up to the application to check.
81           */
82          .image_read_without_format = instance->vk.app_info.api_version >= VK_API_VERSION_1_3 || device->vk.enabled_extensions.KHR_format_feature_flags2,
83          .image_write_without_format = true,
84          .int8 = pdevice->info.ver >= 8,
85          .int16 = pdevice->info.ver >= 8,
86          .int64 = pdevice->info.ver >= 8,
87          .int64_atomics = pdevice->info.ver >= 9 && pdevice->use_softpin,
88          .integer_functions2 = pdevice->info.ver >= 8,
89          .min_lod = true,
90          .multiview = true,
91          .physical_storage_buffer_address = pdevice->has_a64_buffer_access,
92          .post_depth_coverage = pdevice->info.ver >= 9,
93          .runtime_descriptor_array = true,
94          .float_controls = true,
95          .shader_clock = true,
96          .shader_viewport_index_layer = true,
97          .stencil_export = pdevice->info.ver >= 9,
98          .storage_8bit = pdevice->info.ver >= 8,
99          .storage_16bit = pdevice->info.ver >= 8,
100          .subgroup_arithmetic = true,
101          .subgroup_basic = true,
102          .subgroup_ballot = true,
103          .subgroup_dispatch = true,
104          .subgroup_quad = true,
105          .subgroup_uniform_control_flow = true,
106          .subgroup_shuffle = true,
107          .subgroup_vote = true,
108          .tessellation = true,
109          .transform_feedback = true,
110          .variable_pointers = true,
111          .vk_memory_model = true,
112          .vk_memory_model_device_scope = true,
113          .workgroup_memory_explicit_layout = true,
114       },
115       .ubo_addr_format = anv_nir_ubo_addr_format(pdevice, robust_flags),
116       .ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_flags),
117       .phys_ssbo_addr_format = nir_address_format_64bit_global,
118       .push_const_addr_format = nir_address_format_logical,
119 
120       /* TODO: Consider changing this to an address format that has the NULL
121        * pointer equals to 0.  That might be a better format to play nice
122        * with certain code / code generators.
123        */
124       .shared_addr_format = nir_address_format_32bit_offset,
125 
126       .min_ubo_alignment = ANV_UBO_ALIGNMENT,
127       .min_ssbo_alignment = ANV_SSBO_ALIGNMENT,
128    };
129 
130    nir_shader *nir;
131    VkResult result =
132       vk_pipeline_shader_stage_to_nir(&device->vk, stage_info,
133                                       &spirv_options, nir_options,
134                                       mem_ctx, &nir);
135    if (result != VK_SUCCESS)
136       return NULL;
137 
138    if (INTEL_DEBUG(intel_debug_flag_for_shader_stage(stage))) {
139       fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n",
140               gl_shader_stage_name(stage));
141       nir_print_shader(nir, stderr);
142    }
143 
144    NIR_PASS_V(nir, nir_lower_io_to_temporaries,
145               nir_shader_get_entrypoint(nir), true, false);
146 
147    const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
148       .point_coord = true,
149    };
150    NIR_PASS(_, nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
151 
152    const nir_opt_access_options opt_access_options = {
153       .is_vulkan = true,
154    };
155    NIR_PASS(_, nir, nir_opt_access, &opt_access_options);
156 
157    /* Vulkan uses the separate-shader linking model */
158    nir->info.separate_shader = true;
159 
160    struct elk_nir_compiler_opts opts = {};
161 
162    elk_preprocess_nir(compiler, nir, &opts);
163 
164    return nir;
165 }
166 
167 VkResult
anv_pipeline_init(struct anv_pipeline * pipeline,struct anv_device * device,enum anv_pipeline_type type,VkPipelineCreateFlags flags,const VkAllocationCallbacks * pAllocator)168 anv_pipeline_init(struct anv_pipeline *pipeline,
169                   struct anv_device *device,
170                   enum anv_pipeline_type type,
171                   VkPipelineCreateFlags flags,
172                   const VkAllocationCallbacks *pAllocator)
173 {
174    VkResult result;
175 
176    memset(pipeline, 0, sizeof(*pipeline));
177 
178    vk_object_base_init(&device->vk, &pipeline->base,
179                        VK_OBJECT_TYPE_PIPELINE);
180    pipeline->device = device;
181 
182    /* It's the job of the child class to provide actual backing storage for
183     * the batch by setting batch.start, batch.next, and batch.end.
184     */
185    pipeline->batch.alloc = pAllocator ? pAllocator : &device->vk.alloc;
186    pipeline->batch.relocs = &pipeline->batch_relocs;
187    pipeline->batch.status = VK_SUCCESS;
188 
189    result = anv_reloc_list_init(&pipeline->batch_relocs,
190                                 pipeline->batch.alloc);
191    if (result != VK_SUCCESS)
192       return result;
193 
194    pipeline->mem_ctx = ralloc_context(NULL);
195 
196    pipeline->type = type;
197    pipeline->flags = flags;
198 
199    util_dynarray_init(&pipeline->executables, pipeline->mem_ctx);
200 
201    return VK_SUCCESS;
202 }
203 
204 void
anv_pipeline_finish(struct anv_pipeline * pipeline,struct anv_device * device,const VkAllocationCallbacks * pAllocator)205 anv_pipeline_finish(struct anv_pipeline *pipeline,
206                     struct anv_device *device,
207                     const VkAllocationCallbacks *pAllocator)
208 {
209    anv_reloc_list_finish(&pipeline->batch_relocs,
210                          pAllocator ? pAllocator : &device->vk.alloc);
211    ralloc_free(pipeline->mem_ctx);
212    vk_object_base_finish(&pipeline->base);
213 }
214 
anv_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)215 void anv_DestroyPipeline(
216     VkDevice                                    _device,
217     VkPipeline                                  _pipeline,
218     const VkAllocationCallbacks*                pAllocator)
219 {
220    ANV_FROM_HANDLE(anv_device, device, _device);
221    ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
222 
223    if (!pipeline)
224       return;
225 
226    switch (pipeline->type) {
227    case ANV_PIPELINE_GRAPHICS: {
228       struct anv_graphics_pipeline *gfx_pipeline =
229          anv_pipeline_to_graphics(pipeline);
230 
231       for (unsigned s = 0; s < ARRAY_SIZE(gfx_pipeline->shaders); s++) {
232          if (gfx_pipeline->shaders[s])
233             anv_shader_bin_unref(device, gfx_pipeline->shaders[s]);
234       }
235       break;
236    }
237 
238    case ANV_PIPELINE_COMPUTE: {
239       struct anv_compute_pipeline *compute_pipeline =
240          anv_pipeline_to_compute(pipeline);
241 
242       if (compute_pipeline->cs)
243          anv_shader_bin_unref(device, compute_pipeline->cs);
244 
245       break;
246    }
247 
248    default:
249       unreachable("invalid pipeline type");
250    }
251 
252    anv_pipeline_finish(pipeline, device, pAllocator);
253    vk_free2(&device->vk.alloc, pAllocator, pipeline);
254 }
255 
256 static void
populate_sampler_prog_key(const struct intel_device_info * devinfo,struct elk_sampler_prog_key_data * key)257 populate_sampler_prog_key(const struct intel_device_info *devinfo,
258                           struct elk_sampler_prog_key_data *key)
259 {
260    /* XXX: Handle texture swizzle Pre-HSW */
261 }
262 
263 static void
populate_base_prog_key(const struct anv_device * device,enum elk_robustness_flags robust_flags,struct elk_base_prog_key * key)264 populate_base_prog_key(const struct anv_device *device,
265                        enum elk_robustness_flags robust_flags,
266                        struct elk_base_prog_key *key)
267 {
268    key->robust_flags = robust_flags;
269    key->limit_trig_input_range =
270       device->physical->instance->limit_trig_input_range;
271 
272    populate_sampler_prog_key(device->info, &key->tex);
273 }
274 
275 static void
populate_vs_prog_key(const struct anv_device * device,enum elk_robustness_flags robust_flags,struct elk_vs_prog_key * key)276 populate_vs_prog_key(const struct anv_device *device,
277                      enum elk_robustness_flags robust_flags,
278                      struct elk_vs_prog_key *key)
279 {
280    memset(key, 0, sizeof(*key));
281 
282    populate_base_prog_key(device, robust_flags, &key->base);
283 
284    /* XXX: Handle vertex input work-arounds */
285 
286    /* XXX: Handle sampler_prog_key */
287 }
288 
289 static void
populate_tcs_prog_key(const struct anv_device * device,enum elk_robustness_flags robust_flags,unsigned input_vertices,struct elk_tcs_prog_key * key)290 populate_tcs_prog_key(const struct anv_device *device,
291                       enum elk_robustness_flags robust_flags,
292                       unsigned input_vertices,
293                       struct elk_tcs_prog_key *key)
294 {
295    memset(key, 0, sizeof(*key));
296 
297    populate_base_prog_key(device, robust_flags, &key->base);
298 
299    key->input_vertices = input_vertices;
300 }
301 
302 static void
populate_tes_prog_key(const struct anv_device * device,enum elk_robustness_flags robust_flags,struct elk_tes_prog_key * key)303 populate_tes_prog_key(const struct anv_device *device,
304                       enum elk_robustness_flags robust_flags,
305                       struct elk_tes_prog_key *key)
306 {
307    memset(key, 0, sizeof(*key));
308 
309    populate_base_prog_key(device, robust_flags, &key->base);
310 }
311 
312 static void
populate_gs_prog_key(const struct anv_device * device,bool robust_flags,struct elk_gs_prog_key * key)313 populate_gs_prog_key(const struct anv_device *device,
314                      bool robust_flags,
315                      struct elk_gs_prog_key *key)
316 {
317    memset(key, 0, sizeof(*key));
318 
319    populate_base_prog_key(device, robust_flags, &key->base);
320 }
321 
322 static void
populate_wm_prog_key(const struct anv_graphics_pipeline * pipeline,enum elk_robustness_flags robust_flags,const BITSET_WORD * dynamic,const struct vk_multisample_state * ms,const struct vk_render_pass_state * rp,struct elk_wm_prog_key * key)323 populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
324                      enum elk_robustness_flags robust_flags,
325                      const BITSET_WORD *dynamic,
326                      const struct vk_multisample_state *ms,
327                      const struct vk_render_pass_state *rp,
328                      struct elk_wm_prog_key *key)
329 {
330    const struct anv_device *device = pipeline->base.device;
331 
332    memset(key, 0, sizeof(*key));
333 
334    populate_base_prog_key(device, robust_flags, &key->base);
335 
336    /* We set this to 0 here and set to the actual value before we call
337     * elk_compile_fs.
338     */
339    key->input_slots_valid = 0;
340 
341    /* XXX Vulkan doesn't appear to specify */
342    key->clamp_fragment_color = false;
343 
344    key->ignore_sample_mask_out = false;
345 
346    assert(rp->color_attachment_count <= MAX_RTS);
347    /* Consider all inputs as valid until look at the NIR variables. */
348    key->color_outputs_valid = (1u << rp->color_attachment_count) - 1;
349    key->nr_color_regions = rp->color_attachment_count;
350 
351    /* To reduce possible shader recompilations we would need to know if
352     * there is a SampleMask output variable to compute if we should emit
353     * code to workaround the issue that hardware disables alpha to coverage
354     * when there is SampleMask output.
355     */
356    key->alpha_to_coverage = ms != NULL && ms->alpha_to_coverage_enable ?
357       ELK_ALWAYS : ELK_NEVER;
358 
359    /* Vulkan doesn't support fixed-function alpha test */
360    key->alpha_test_replicate_alpha = false;
361 
362    if (ms != NULL) {
363       /* We should probably pull this out of the shader, but it's fairly
364        * harmless to compute it and then let dead-code take care of it.
365        */
366       if (ms->rasterization_samples > 1) {
367          key->persample_interp =
368             (ms->sample_shading_enable &&
369              (ms->min_sample_shading * ms->rasterization_samples) > 1) ?
370             ELK_ALWAYS : ELK_NEVER;
371          key->multisample_fbo = ELK_ALWAYS;
372       }
373 
374       if (device->physical->instance->sample_mask_out_opengl_behaviour)
375          key->ignore_sample_mask_out = !key->multisample_fbo;
376    }
377 }
378 
379 static void
populate_cs_prog_key(const struct anv_device * device,enum elk_robustness_flags robust_flags,struct elk_cs_prog_key * key)380 populate_cs_prog_key(const struct anv_device *device,
381                      enum elk_robustness_flags robust_flags,
382                      struct elk_cs_prog_key *key)
383 {
384    memset(key, 0, sizeof(*key));
385 
386    populate_base_prog_key(device, robust_flags, &key->base);
387 }
388 
389 struct anv_pipeline_stage {
390    gl_shader_stage stage;
391 
392    const VkPipelineShaderStageCreateInfo *info;
393 
394    unsigned char shader_sha1[20];
395 
396    union elk_any_prog_key key;
397 
398    struct {
399       gl_shader_stage stage;
400       unsigned char sha1[20];
401    } cache_key;
402 
403    nir_shader *nir;
404 
405    struct anv_pipeline_binding surface_to_descriptor[256];
406    struct anv_pipeline_binding sampler_to_descriptor[256];
407    struct anv_pipeline_bind_map bind_map;
408 
409    union elk_any_prog_data prog_data;
410 
411    uint32_t num_stats;
412    struct elk_compile_stats stats[3];
413    char *disasm[3];
414 
415    VkPipelineCreationFeedback feedback;
416 
417    const unsigned *code;
418 
419    struct anv_shader_bin *bin;
420 };
421 
422 static void
anv_pipeline_hash_graphics(struct anv_graphics_pipeline * pipeline,struct anv_pipeline_layout * layout,struct anv_pipeline_stage * stages,unsigned char * sha1_out)423 anv_pipeline_hash_graphics(struct anv_graphics_pipeline *pipeline,
424                            struct anv_pipeline_layout *layout,
425                            struct anv_pipeline_stage *stages,
426                            unsigned char *sha1_out)
427 {
428    struct mesa_sha1 ctx;
429    _mesa_sha1_init(&ctx);
430 
431    _mesa_sha1_update(&ctx, &pipeline->view_mask,
432                      sizeof(pipeline->view_mask));
433 
434    if (layout)
435       _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
436 
437    for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
438       if (stages[s].info) {
439          _mesa_sha1_update(&ctx, stages[s].shader_sha1,
440                            sizeof(stages[s].shader_sha1));
441          _mesa_sha1_update(&ctx, &stages[s].key, elk_prog_key_size(s));
442       }
443    }
444 
445    _mesa_sha1_final(&ctx, sha1_out);
446 }
447 
448 static void
anv_pipeline_hash_compute(struct anv_compute_pipeline * pipeline,struct anv_pipeline_layout * layout,struct anv_pipeline_stage * stage,unsigned char * sha1_out)449 anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline,
450                           struct anv_pipeline_layout *layout,
451                           struct anv_pipeline_stage *stage,
452                           unsigned char *sha1_out)
453 {
454    struct mesa_sha1 ctx;
455    _mesa_sha1_init(&ctx);
456 
457    if (layout)
458       _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
459 
460    const struct anv_device *device = pipeline->base.device;
461 
462    const bool rba = device->vk.enabled_features.robustBufferAccess;
463    _mesa_sha1_update(&ctx, &rba, sizeof(rba));
464 
465    const uint8_t afs = device->physical->instance->assume_full_subgroups;
466    _mesa_sha1_update(&ctx, &afs, sizeof(afs));
467 
468    _mesa_sha1_update(&ctx, stage->shader_sha1,
469                      sizeof(stage->shader_sha1));
470    _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
471 
472    _mesa_sha1_final(&ctx, sha1_out);
473 }
474 
475 static nir_shader *
anv_pipeline_stage_get_nir(struct anv_pipeline * pipeline,struct vk_pipeline_cache * cache,void * mem_ctx,struct anv_pipeline_stage * stage)476 anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,
477                            struct vk_pipeline_cache *cache,
478                            void *mem_ctx,
479                            struct anv_pipeline_stage *stage)
480 {
481    const struct elk_compiler *compiler =
482       pipeline->device->physical->compiler;
483    const nir_shader_compiler_options *nir_options =
484       compiler->nir_options[stage->stage];
485    nir_shader *nir;
486 
487    nir = anv_device_search_for_nir(pipeline->device, cache,
488                                    nir_options,
489                                    stage->shader_sha1,
490                                    mem_ctx);
491    if (nir) {
492       assert(nir->info.stage == stage->stage);
493       return nir;
494    }
495 
496    nir = anv_shader_stage_to_nir(pipeline->device, stage->info,
497                                  stage->key.base.robust_flags, mem_ctx);
498    if (nir) {
499       anv_device_upload_nir(pipeline->device, cache, nir, stage->shader_sha1);
500       return nir;
501    }
502 
503    return NULL;
504 }
505 
506 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)507 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
508 {
509    assert(glsl_type_is_vector_or_scalar(type));
510 
511    uint32_t comp_size = glsl_type_is_boolean(type)
512       ? 4 : glsl_get_bit_size(type) / 8;
513    unsigned length = glsl_get_vector_elements(type);
514    *size = comp_size * length,
515    *align = comp_size * (length == 3 ? 4 : length);
516 }
517 
518 static void
anv_pipeline_lower_nir(struct anv_pipeline * pipeline,void * mem_ctx,struct anv_pipeline_stage * stage,struct anv_pipeline_layout * layout)519 anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
520                        void *mem_ctx,
521                        struct anv_pipeline_stage *stage,
522                        struct anv_pipeline_layout *layout)
523 {
524    const struct anv_physical_device *pdevice = pipeline->device->physical;
525    const struct elk_compiler *compiler = pdevice->compiler;
526 
527    struct elk_stage_prog_data *prog_data = &stage->prog_data.base;
528    nir_shader *nir = stage->nir;
529 
530    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
531       NIR_PASS(_, nir, nir_lower_wpos_center);
532       NIR_PASS(_, nir, nir_lower_input_attachments,
533                &(nir_input_attachment_options) {
534                    .use_fragcoord_sysval = true,
535                    .use_layer_id_sysval = true,
536                });
537    }
538 
539    NIR_PASS(_, nir, anv_nir_lower_ycbcr_textures, layout);
540 
541    if (pipeline->type == ANV_PIPELINE_GRAPHICS) {
542       struct anv_graphics_pipeline *gfx_pipeline =
543          anv_pipeline_to_graphics(pipeline);
544       NIR_PASS(_, nir, anv_nir_lower_multiview, gfx_pipeline->view_mask);
545    }
546 
547    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
548 
549    NIR_PASS(_, nir, elk_nir_lower_storage_image,
550             &(struct elk_nir_lower_storage_image_opts) {
551                .devinfo = compiler->devinfo,
552                .lower_loads = true,
553                .lower_stores = true,
554                .lower_atomics = true,
555                .lower_get_size = true,
556             });
557 
558    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global,
559             nir_address_format_64bit_global);
560    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const,
561             nir_address_format_32bit_offset);
562 
563    /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
564    NIR_PASS_V(nir, anv_nir_apply_pipeline_layout,
565               pdevice, stage->key.base.robust_flags,
566               layout, &stage->bind_map);
567 
568    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ubo,
569             anv_nir_ubo_addr_format(pdevice, stage->key.base.robust_flags));
570    NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ssbo,
571             anv_nir_ssbo_addr_format(pdevice, stage->key.base.robust_flags));
572 
573    /* First run copy-prop to get rid of all of the vec() that address
574     * calculations often create and then constant-fold so that, when we
575     * get to anv_nir_lower_ubo_loads, we can detect constant offsets.
576     */
577    NIR_PASS(_, nir, nir_copy_prop);
578    NIR_PASS(_, nir, nir_opt_constant_folding);
579 
580    NIR_PASS(_, nir, anv_nir_lower_ubo_loads);
581 
582    enum nir_lower_non_uniform_access_type lower_non_uniform_access_types =
583       nir_lower_non_uniform_texture_access | nir_lower_non_uniform_image_access;
584 
585    /* In practice, most shaders do not have non-uniform-qualified
586     * accesses (see
587     * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17558#note_1475069)
588     * thus a cheaper and likely to fail check is run first.
589     */
590    if (nir_has_non_uniform_access(nir, lower_non_uniform_access_types)) {
591       NIR_PASS(_, nir, nir_opt_non_uniform_access);
592 
593       /* We don't support non-uniform UBOs and non-uniform SSBO access is
594       * handled naturally by falling back to A64 messages.
595       */
596       NIR_PASS(_, nir, nir_lower_non_uniform_access,
597                &(nir_lower_non_uniform_access_options) {
598                   .types = lower_non_uniform_access_types,
599                   .callback = NULL,
600                });
601    }
602 
603    NIR_PASS_V(nir, anv_nir_compute_push_layout,
604               pdevice, stage->key.base.robust_flags,
605               prog_data, &stage->bind_map, mem_ctx);
606 
607    if (gl_shader_stage_uses_workgroup(nir->info.stage)) {
608       if (!nir->info.shared_memory_explicit_layout) {
609          NIR_PASS(_, nir, nir_lower_vars_to_explicit_types,
610                   nir_var_mem_shared, shared_type_info);
611       }
612 
613       NIR_PASS(_, nir, nir_lower_explicit_io,
614                nir_var_mem_shared, nir_address_format_32bit_offset);
615 
616       if (nir->info.zero_initialize_shared_memory &&
617           nir->info.shared_size > 0) {
618          /* The effective Shared Local Memory size is at least 1024 bytes and
619           * is always rounded to a power of two, so it is OK to align the size
620           * used by the shader to chunk_size -- which does simplify the logic.
621           */
622          const unsigned chunk_size = 16;
623          const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
624          assert(shared_size <=
625                 elk_calculate_slm_size(compiler->devinfo->ver, nir->info.shared_size));
626 
627          NIR_PASS(_, nir, nir_zero_initialize_shared_memory,
628                   shared_size, chunk_size);
629       }
630    }
631 
632    if (gl_shader_stage_is_compute(nir->info.stage)) {
633       NIR_PASS(_, nir, elk_nir_lower_cs_intrinsics, compiler->devinfo,
634                &stage->prog_data.cs);
635    }
636 
637    stage->nir = nir;
638 }
639 
640 static void
anv_pipeline_link_vs(const struct elk_compiler * compiler,struct anv_pipeline_stage * vs_stage,struct anv_pipeline_stage * next_stage)641 anv_pipeline_link_vs(const struct elk_compiler *compiler,
642                      struct anv_pipeline_stage *vs_stage,
643                      struct anv_pipeline_stage *next_stage)
644 {
645    if (next_stage)
646       elk_nir_link_shaders(compiler, vs_stage->nir, next_stage->nir);
647 }
648 
649 static void
anv_pipeline_compile_vs(const struct elk_compiler * compiler,void * mem_ctx,struct anv_graphics_pipeline * pipeline,struct anv_pipeline_stage * vs_stage)650 anv_pipeline_compile_vs(const struct elk_compiler *compiler,
651                         void *mem_ctx,
652                         struct anv_graphics_pipeline *pipeline,
653                         struct anv_pipeline_stage *vs_stage)
654 {
655    /* When using Primitive Replication for multiview, each view gets its own
656     * position slot.
657     */
658    uint32_t pos_slots =
659       (vs_stage->nir->info.per_view_outputs & VARYING_BIT_POS) ?
660       MAX2(1, util_bitcount(pipeline->view_mask)) : 1;
661 
662    /* Only position is allowed to be per-view */
663    assert(!(vs_stage->nir->info.per_view_outputs & ~VARYING_BIT_POS));
664 
665    elk_compute_vue_map(compiler->devinfo,
666                        &vs_stage->prog_data.vs.base.vue_map,
667                        vs_stage->nir->info.outputs_written,
668                        vs_stage->nir->info.separate_shader,
669                        pos_slots);
670 
671    vs_stage->num_stats = 1;
672 
673    struct elk_compile_vs_params params = {
674       .base = {
675          .nir = vs_stage->nir,
676          .stats = vs_stage->stats,
677          .log_data = pipeline->base.device,
678          .mem_ctx = mem_ctx,
679       },
680       .key = &vs_stage->key.vs,
681       .prog_data = &vs_stage->prog_data.vs,
682    };
683 
684    vs_stage->code = elk_compile_vs(compiler, &params);
685 }
686 
687 static void
merge_tess_info(struct shader_info * tes_info,const struct shader_info * tcs_info)688 merge_tess_info(struct shader_info *tes_info,
689                 const struct shader_info *tcs_info)
690 {
691    /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
692     *
693     *    "PointMode. Controls generation of points rather than triangles
694     *     or lines. This functionality defaults to disabled, and is
695     *     enabled if either shader stage includes the execution mode.
696     *
697     * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
698     * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
699     * and OutputVertices, it says:
700     *
701     *    "One mode must be set in at least one of the tessellation
702     *     shader stages."
703     *
704     * So, the fields can be set in either the TCS or TES, but they must
705     * agree if set in both.  Our backend looks at TES, so bitwise-or in
706     * the values from the TCS.
707     */
708    assert(tcs_info->tess.tcs_vertices_out == 0 ||
709           tes_info->tess.tcs_vertices_out == 0 ||
710           tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
711    tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
712 
713    assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
714           tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
715           tcs_info->tess.spacing == tes_info->tess.spacing);
716    tes_info->tess.spacing |= tcs_info->tess.spacing;
717 
718    assert(tcs_info->tess._primitive_mode == 0 ||
719           tes_info->tess._primitive_mode == 0 ||
720           tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode);
721    tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode;
722    tes_info->tess.ccw |= tcs_info->tess.ccw;
723    tes_info->tess.point_mode |= tcs_info->tess.point_mode;
724 }
725 
726 static void
anv_pipeline_link_tcs(const struct elk_compiler * compiler,struct anv_pipeline_stage * tcs_stage,struct anv_pipeline_stage * tes_stage)727 anv_pipeline_link_tcs(const struct elk_compiler *compiler,
728                       struct anv_pipeline_stage *tcs_stage,
729                       struct anv_pipeline_stage *tes_stage)
730 {
731    assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL);
732 
733    elk_nir_link_shaders(compiler, tcs_stage->nir, tes_stage->nir);
734 
735    nir_lower_patch_vertices(tes_stage->nir,
736                             tcs_stage->nir->info.tess.tcs_vertices_out,
737                             NULL);
738 
739    /* Copy TCS info into the TES info */
740    merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
741 
742    /* Whacking the key after cache lookup is a bit sketchy, but all of
743     * this comes from the SPIR-V, which is part of the hash used for the
744     * pipeline cache.  So it should be safe.
745     */
746    tcs_stage->key.tcs._tes_primitive_mode =
747       tes_stage->nir->info.tess._primitive_mode;
748    tcs_stage->key.tcs.quads_workaround =
749       compiler->devinfo->ver < 9 &&
750       tes_stage->nir->info.tess._primitive_mode == TESS_PRIMITIVE_QUADS &&
751       tes_stage->nir->info.tess.spacing == TESS_SPACING_EQUAL;
752 }
753 
754 static void
anv_pipeline_compile_tcs(const struct elk_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * tcs_stage,struct anv_pipeline_stage * prev_stage)755 anv_pipeline_compile_tcs(const struct elk_compiler *compiler,
756                          void *mem_ctx,
757                          struct anv_device *device,
758                          struct anv_pipeline_stage *tcs_stage,
759                          struct anv_pipeline_stage *prev_stage)
760 {
761    tcs_stage->key.tcs.outputs_written =
762       tcs_stage->nir->info.outputs_written;
763    tcs_stage->key.tcs.patch_outputs_written =
764       tcs_stage->nir->info.patch_outputs_written;
765 
766    tcs_stage->num_stats = 1;
767 
768    struct elk_compile_tcs_params params = {
769       .base = {
770          .nir = tcs_stage->nir,
771          .stats = tcs_stage->stats,
772          .log_data = device,
773          .mem_ctx = mem_ctx,
774       },
775       .key = &tcs_stage->key.tcs,
776       .prog_data = &tcs_stage->prog_data.tcs,
777    };
778 
779    tcs_stage->code = elk_compile_tcs(compiler, &params);
780 }
781 
782 static void
anv_pipeline_link_tes(const struct elk_compiler * compiler,struct anv_pipeline_stage * tes_stage,struct anv_pipeline_stage * next_stage)783 anv_pipeline_link_tes(const struct elk_compiler *compiler,
784                       struct anv_pipeline_stage *tes_stage,
785                       struct anv_pipeline_stage *next_stage)
786 {
787    if (next_stage)
788       elk_nir_link_shaders(compiler, tes_stage->nir, next_stage->nir);
789 }
790 
791 static void
anv_pipeline_compile_tes(const struct elk_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * tes_stage,struct anv_pipeline_stage * tcs_stage)792 anv_pipeline_compile_tes(const struct elk_compiler *compiler,
793                          void *mem_ctx,
794                          struct anv_device *device,
795                          struct anv_pipeline_stage *tes_stage,
796                          struct anv_pipeline_stage *tcs_stage)
797 {
798    tes_stage->key.tes.inputs_read =
799       tcs_stage->nir->info.outputs_written;
800    tes_stage->key.tes.patch_inputs_read =
801       tcs_stage->nir->info.patch_outputs_written;
802 
803    tes_stage->num_stats = 1;
804 
805    struct elk_compile_tes_params params = {
806       .base = {
807          .nir = tes_stage->nir,
808          .stats = tes_stage->stats,
809          .log_data = device,
810          .mem_ctx = mem_ctx,
811       },
812       .key = &tes_stage->key.tes,
813       .prog_data = &tes_stage->prog_data.tes,
814       .input_vue_map = &tcs_stage->prog_data.tcs.base.vue_map,
815    };
816 
817    tes_stage->code = elk_compile_tes(compiler, &params);
818 }
819 
820 static void
anv_pipeline_link_gs(const struct elk_compiler * compiler,struct anv_pipeline_stage * gs_stage,struct anv_pipeline_stage * next_stage)821 anv_pipeline_link_gs(const struct elk_compiler *compiler,
822                      struct anv_pipeline_stage *gs_stage,
823                      struct anv_pipeline_stage *next_stage)
824 {
825    if (next_stage)
826       elk_nir_link_shaders(compiler, gs_stage->nir, next_stage->nir);
827 }
828 
829 static void
anv_pipeline_compile_gs(const struct elk_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * gs_stage,struct anv_pipeline_stage * prev_stage)830 anv_pipeline_compile_gs(const struct elk_compiler *compiler,
831                         void *mem_ctx,
832                         struct anv_device *device,
833                         struct anv_pipeline_stage *gs_stage,
834                         struct anv_pipeline_stage *prev_stage)
835 {
836    elk_compute_vue_map(compiler->devinfo,
837                        &gs_stage->prog_data.gs.base.vue_map,
838                        gs_stage->nir->info.outputs_written,
839                        gs_stage->nir->info.separate_shader, 1);
840 
841    gs_stage->num_stats = 1;
842 
843    struct elk_compile_gs_params params = {
844       .base = {
845          .nir = gs_stage->nir,
846          .stats = gs_stage->stats,
847          .log_data = device,
848          .mem_ctx = mem_ctx,
849       },
850       .key = &gs_stage->key.gs,
851       .prog_data = &gs_stage->prog_data.gs,
852    };
853 
854    gs_stage->code = elk_compile_gs(compiler, &params);
855 }
856 
857 static void
anv_pipeline_link_fs(const struct elk_compiler * compiler,struct anv_pipeline_stage * stage,const struct vk_render_pass_state * rp)858 anv_pipeline_link_fs(const struct elk_compiler *compiler,
859                      struct anv_pipeline_stage *stage,
860                      const struct vk_render_pass_state *rp)
861 {
862    /* Initially the valid outputs value is set to all possible render targets
863     * valid (see populate_wm_prog_key()), before we look at the shader
864     * variables. Here we look at the output variables of the shader an compute
865     * a correct number of render target outputs.
866     */
867    stage->key.wm.color_outputs_valid = 0;
868    nir_foreach_shader_out_variable_safe(var, stage->nir) {
869       if (var->data.location < FRAG_RESULT_DATA0)
870          continue;
871 
872       const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
873       const unsigned array_len =
874          glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
875       assert(rt + array_len <= MAX_RTS);
876 
877       stage->key.wm.color_outputs_valid |= BITFIELD_RANGE(rt, array_len);
878    }
879    stage->key.wm.color_outputs_valid &=
880       (1u << rp->color_attachment_count) - 1;
881    stage->key.wm.nr_color_regions =
882       util_last_bit(stage->key.wm.color_outputs_valid);
883 
884    unsigned num_rt_bindings;
885    struct anv_pipeline_binding rt_bindings[MAX_RTS];
886    if (stage->key.wm.nr_color_regions > 0) {
887       assert(stage->key.wm.nr_color_regions <= MAX_RTS);
888       for (unsigned rt = 0; rt < stage->key.wm.nr_color_regions; rt++) {
889          if (stage->key.wm.color_outputs_valid & BITFIELD_BIT(rt)) {
890             rt_bindings[rt] = (struct anv_pipeline_binding) {
891                .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
892                .index = rt,
893             };
894          } else {
895             /* Setup a null render target */
896             rt_bindings[rt] = (struct anv_pipeline_binding) {
897                .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
898                .index = UINT32_MAX,
899             };
900          }
901       }
902       num_rt_bindings = stage->key.wm.nr_color_regions;
903    } else {
904       /* Setup a null render target */
905       rt_bindings[0] = (struct anv_pipeline_binding) {
906          .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
907          .index = UINT32_MAX,
908       };
909       num_rt_bindings = 1;
910    }
911 
912    assert(num_rt_bindings <= MAX_RTS);
913    assert(stage->bind_map.surface_count == 0);
914    typed_memcpy(stage->bind_map.surface_to_descriptor,
915                 rt_bindings, num_rt_bindings);
916    stage->bind_map.surface_count += num_rt_bindings;
917 }
918 
919 static void
anv_pipeline_compile_fs(const struct elk_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * fs_stage,struct anv_pipeline_stage * prev_stage)920 anv_pipeline_compile_fs(const struct elk_compiler *compiler,
921                         void *mem_ctx,
922                         struct anv_device *device,
923                         struct anv_pipeline_stage *fs_stage,
924                         struct anv_pipeline_stage *prev_stage)
925 {
926    /* TODO: we could set this to 0 based on the information in nir_shader, but
927     * we need this before we call spirv_to_nir.
928     */
929    assert(prev_stage);
930 
931    struct elk_compile_fs_params params = {
932       .base = {
933          .nir = fs_stage->nir,
934          .stats = fs_stage->stats,
935          .log_data = device,
936          .mem_ctx = mem_ctx,
937       },
938       .key = &fs_stage->key.wm,
939       .prog_data = &fs_stage->prog_data.wm,
940 
941       .allow_spilling = true,
942    };
943 
944    fs_stage->key.wm.input_slots_valid =
945       prev_stage->prog_data.vue.vue_map.slots_valid;
946 
947    fs_stage->code = elk_compile_fs(compiler, &params);
948 
949    fs_stage->num_stats = (uint32_t)fs_stage->prog_data.wm.dispatch_8 +
950                          (uint32_t)fs_stage->prog_data.wm.dispatch_16 +
951                          (uint32_t)fs_stage->prog_data.wm.dispatch_32;
952 }
953 
954 static void
anv_pipeline_add_executable(struct anv_pipeline * pipeline,struct anv_pipeline_stage * stage,struct elk_compile_stats * stats,uint32_t code_offset)955 anv_pipeline_add_executable(struct anv_pipeline *pipeline,
956                             struct anv_pipeline_stage *stage,
957                             struct elk_compile_stats *stats,
958                             uint32_t code_offset)
959 {
960    char *nir = NULL;
961    if (stage->nir &&
962        (pipeline->flags &
963         VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
964       nir = nir_shader_as_str(stage->nir, pipeline->mem_ctx);
965    }
966 
967    char *disasm = NULL;
968    if (stage->code &&
969        (pipeline->flags &
970         VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
971       char *stream_data = NULL;
972       size_t stream_size = 0;
973       FILE *stream = open_memstream(&stream_data, &stream_size);
974 
975       uint32_t push_size = 0;
976       for (unsigned i = 0; i < 4; i++)
977          push_size += stage->bind_map.push_ranges[i].length;
978       if (push_size > 0) {
979          fprintf(stream, "Push constant ranges:\n");
980          for (unsigned i = 0; i < 4; i++) {
981             if (stage->bind_map.push_ranges[i].length == 0)
982                continue;
983 
984             fprintf(stream, "    RANGE%d (%dB): ", i,
985                     stage->bind_map.push_ranges[i].length * 32);
986 
987             switch (stage->bind_map.push_ranges[i].set) {
988             case ANV_DESCRIPTOR_SET_NULL:
989                fprintf(stream, "NULL");
990                break;
991 
992             case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS:
993                fprintf(stream, "Vulkan push constants and API params");
994                break;
995 
996             case ANV_DESCRIPTOR_SET_DESCRIPTORS:
997                fprintf(stream, "Descriptor buffer for set %d (start=%dB)",
998                        stage->bind_map.push_ranges[i].index,
999                        stage->bind_map.push_ranges[i].start * 32);
1000                break;
1001 
1002             case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS:
1003                unreachable("gl_NumWorkgroups is never pushed");
1004 
1005             case ANV_DESCRIPTOR_SET_SHADER_CONSTANTS:
1006                fprintf(stream, "Inline shader constant data (start=%dB)",
1007                        stage->bind_map.push_ranges[i].start * 32);
1008                break;
1009 
1010             case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:
1011                unreachable("Color attachments can't be pushed");
1012 
1013             default:
1014                fprintf(stream, "UBO (set=%d binding=%d start=%dB)",
1015                        stage->bind_map.push_ranges[i].set,
1016                        stage->bind_map.push_ranges[i].index,
1017                        stage->bind_map.push_ranges[i].start * 32);
1018                break;
1019             }
1020             fprintf(stream, "\n");
1021          }
1022          fprintf(stream, "\n");
1023       }
1024 
1025       /* Creating this is far cheaper than it looks.  It's perfectly fine to
1026        * do it for every binary.
1027        */
1028       elk_disassemble_with_errors(&pipeline->device->physical->compiler->isa,
1029                                   stage->code, code_offset, stream);
1030 
1031       fclose(stream);
1032 
1033       /* Copy it to a ralloc'd thing */
1034       disasm = ralloc_size(pipeline->mem_ctx, stream_size + 1);
1035       memcpy(disasm, stream_data, stream_size);
1036       disasm[stream_size] = 0;
1037 
1038       free(stream_data);
1039    }
1040 
1041    const struct anv_pipeline_executable exe = {
1042       .stage = stage->stage,
1043       .stats = *stats,
1044       .nir = nir,
1045       .disasm = disasm,
1046    };
1047    util_dynarray_append(&pipeline->executables,
1048                         struct anv_pipeline_executable, exe);
1049 }
1050 
1051 static void
anv_pipeline_add_executables(struct anv_pipeline * pipeline,struct anv_pipeline_stage * stage,struct anv_shader_bin * bin)1052 anv_pipeline_add_executables(struct anv_pipeline *pipeline,
1053                              struct anv_pipeline_stage *stage,
1054                              struct anv_shader_bin *bin)
1055 {
1056    if (stage->stage == MESA_SHADER_FRAGMENT) {
1057       /* We pull the prog data and stats out of the anv_shader_bin because
1058        * the anv_pipeline_stage may not be fully populated if we successfully
1059        * looked up the shader in a cache.
1060        */
1061       const struct elk_wm_prog_data *wm_prog_data =
1062          (const struct elk_wm_prog_data *)bin->prog_data;
1063       struct elk_compile_stats *stats = bin->stats;
1064 
1065       if (wm_prog_data->dispatch_8) {
1066          anv_pipeline_add_executable(pipeline, stage, stats++, 0);
1067       }
1068 
1069       if (wm_prog_data->dispatch_16) {
1070          anv_pipeline_add_executable(pipeline, stage, stats++,
1071                                      wm_prog_data->prog_offset_16);
1072       }
1073 
1074       if (wm_prog_data->dispatch_32) {
1075          anv_pipeline_add_executable(pipeline, stage, stats++,
1076                                      wm_prog_data->prog_offset_32);
1077       }
1078    } else {
1079       anv_pipeline_add_executable(pipeline, stage, bin->stats, 0);
1080    }
1081 }
1082 
1083 static enum elk_robustness_flags
anv_device_get_robust_flags(const struct anv_device * device)1084 anv_device_get_robust_flags(const struct anv_device *device)
1085 {
1086    return device->robust_buffer_access ?
1087           (ELK_ROBUSTNESS_UBO | ELK_ROBUSTNESS_SSBO) : 0;
1088 }
1089 
1090 static void
anv_graphics_pipeline_init_keys(struct anv_graphics_pipeline * pipeline,const struct vk_graphics_pipeline_state * state,struct anv_pipeline_stage * stages)1091 anv_graphics_pipeline_init_keys(struct anv_graphics_pipeline *pipeline,
1092                                 const struct vk_graphics_pipeline_state *state,
1093                                 struct anv_pipeline_stage *stages)
1094 {
1095    for (uint32_t s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
1096       if (!stages[s].info)
1097          continue;
1098 
1099       int64_t stage_start = os_time_get_nano();
1100 
1101       vk_pipeline_hash_shader_stage(stages[s].info, NULL, stages[s].shader_sha1);
1102 
1103       const struct anv_device *device = pipeline->base.device;
1104       enum elk_robustness_flags robust_flags = anv_device_get_robust_flags(device);
1105       switch (stages[s].stage) {
1106       case MESA_SHADER_VERTEX:
1107          populate_vs_prog_key(device,
1108                               robust_flags,
1109                               &stages[s].key.vs);
1110          break;
1111       case MESA_SHADER_TESS_CTRL:
1112          populate_tcs_prog_key(device,
1113                                robust_flags,
1114                                state->ts->patch_control_points,
1115                                &stages[s].key.tcs);
1116          break;
1117       case MESA_SHADER_TESS_EVAL:
1118          populate_tes_prog_key(device,
1119                                robust_flags,
1120                                &stages[s].key.tes);
1121          break;
1122       case MESA_SHADER_GEOMETRY:
1123          populate_gs_prog_key(device,
1124                               robust_flags,
1125                               &stages[s].key.gs);
1126          break;
1127       case MESA_SHADER_FRAGMENT: {
1128          populate_wm_prog_key(pipeline,
1129                               robust_flags,
1130                               state->dynamic, state->ms, state->rp,
1131                               &stages[s].key.wm);
1132          break;
1133       }
1134       default:
1135          unreachable("Invalid graphics shader stage");
1136       }
1137 
1138       stages[s].feedback.duration += os_time_get_nano() - stage_start;
1139       stages[s].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
1140    }
1141 
1142    assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
1143 }
1144 
1145 static bool
anv_graphics_pipeline_load_cached_shaders(struct anv_graphics_pipeline * pipeline,struct vk_pipeline_cache * cache,struct anv_pipeline_stage * stages,VkPipelineCreationFeedback * pipeline_feedback)1146 anv_graphics_pipeline_load_cached_shaders(struct anv_graphics_pipeline *pipeline,
1147                                           struct vk_pipeline_cache *cache,
1148                                           struct anv_pipeline_stage *stages,
1149                                           VkPipelineCreationFeedback *pipeline_feedback)
1150 {
1151    unsigned found = 0;
1152    unsigned cache_hits = 0;
1153    for (unsigned s = 0; s < ANV_GRAPHICS_SHADER_STAGE_COUNT; s++) {
1154       if (!stages[s].info)
1155          continue;
1156 
1157       int64_t stage_start = os_time_get_nano();
1158 
1159       bool cache_hit;
1160       struct anv_shader_bin *bin =
1161          anv_device_search_for_kernel(pipeline->base.device, cache,
1162                                       &stages[s].cache_key,
1163                                       sizeof(stages[s].cache_key), &cache_hit);
1164       if (bin) {
1165          found++;
1166          pipeline->shaders[s] = bin;
1167       }
1168 
1169       if (cache_hit) {
1170          cache_hits++;
1171          stages[s].feedback.flags |=
1172             VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1173       }
1174       stages[s].feedback.duration += os_time_get_nano() - stage_start;
1175    }
1176 
1177    if (found == __builtin_popcount(pipeline->active_stages)) {
1178       if (cache_hits == found) {
1179          pipeline_feedback->flags |=
1180             VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1181       }
1182       /* We found all our shaders in the cache.  We're done. */
1183       for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1184          if (!stages[s].info)
1185             continue;
1186 
1187          anv_pipeline_add_executables(&pipeline->base, &stages[s],
1188                                       pipeline->shaders[s]);
1189       }
1190       return true;
1191    } else if (found > 0) {
1192       /* We found some but not all of our shaders. This shouldn't happen most
1193        * of the time but it can if we have a partially populated pipeline
1194        * cache.
1195        */
1196       assert(found < __builtin_popcount(pipeline->active_stages));
1197 
1198       vk_perf(VK_LOG_OBJS(cache ? &cache->base :
1199                                   &pipeline->base.device->vk.base),
1200               "Found a partial pipeline in the cache.  This is "
1201               "most likely caused by an incomplete pipeline cache "
1202               "import or export");
1203 
1204       /* We're going to have to recompile anyway, so just throw away our
1205        * references to the shaders in the cache.  We'll get them out of the
1206        * cache again as part of the compilation process.
1207        */
1208       for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1209          stages[s].feedback.flags = 0;
1210          if (pipeline->shaders[s]) {
1211             anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1212             pipeline->shaders[s] = NULL;
1213          }
1214       }
1215    }
1216 
1217    return false;
1218 }
1219 
1220 static const gl_shader_stage graphics_shader_order[] = {
1221    MESA_SHADER_VERTEX,
1222    MESA_SHADER_TESS_CTRL,
1223    MESA_SHADER_TESS_EVAL,
1224    MESA_SHADER_GEOMETRY,
1225 
1226    MESA_SHADER_FRAGMENT,
1227 };
1228 
1229 static VkResult
anv_graphics_pipeline_load_nir(struct anv_graphics_pipeline * pipeline,struct vk_pipeline_cache * cache,struct anv_pipeline_stage * stages,void * pipeline_ctx)1230 anv_graphics_pipeline_load_nir(struct anv_graphics_pipeline *pipeline,
1231                                struct vk_pipeline_cache *cache,
1232                                struct anv_pipeline_stage *stages,
1233                                void *pipeline_ctx)
1234 {
1235    for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
1236       gl_shader_stage s = graphics_shader_order[i];
1237       if (!stages[s].info)
1238          continue;
1239 
1240       int64_t stage_start = os_time_get_nano();
1241 
1242       assert(stages[s].stage == s);
1243       assert(pipeline->shaders[s] == NULL);
1244 
1245       stages[s].bind_map = (struct anv_pipeline_bind_map) {
1246          .surface_to_descriptor = stages[s].surface_to_descriptor,
1247          .sampler_to_descriptor = stages[s].sampler_to_descriptor
1248       };
1249 
1250       stages[s].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
1251                                                  pipeline_ctx,
1252                                                  &stages[s]);
1253       if (stages[s].nir == NULL) {
1254          return vk_error(pipeline, VK_ERROR_UNKNOWN);
1255       }
1256 
1257       stages[s].feedback.duration += os_time_get_nano() - stage_start;
1258    }
1259 
1260    return VK_SUCCESS;
1261 }
1262 
1263 static VkResult
anv_graphics_pipeline_compile(struct anv_graphics_pipeline * pipeline,struct vk_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * info,const struct vk_graphics_pipeline_state * state)1264 anv_graphics_pipeline_compile(struct anv_graphics_pipeline *pipeline,
1265                               struct vk_pipeline_cache *cache,
1266                               const VkGraphicsPipelineCreateInfo *info,
1267                               const struct vk_graphics_pipeline_state *state)
1268 {
1269    ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1270    VkResult result;
1271 
1272    VkPipelineCreationFeedback pipeline_feedback = {
1273       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
1274    };
1275    int64_t pipeline_start = os_time_get_nano();
1276 
1277    const struct elk_compiler *compiler = pipeline->base.device->physical->compiler;
1278    struct anv_pipeline_stage stages[ANV_GRAPHICS_SHADER_STAGE_COUNT] = {};
1279    for (uint32_t i = 0; i < info->stageCount; i++) {
1280       gl_shader_stage stage = vk_to_mesa_shader_stage(info->pStages[i].stage);
1281       stages[stage].stage = stage;
1282       stages[stage].info = &info->pStages[i];
1283    }
1284 
1285    anv_graphics_pipeline_init_keys(pipeline, state, stages);
1286 
1287    unsigned char sha1[20];
1288    anv_pipeline_hash_graphics(pipeline, layout, stages, sha1);
1289 
1290    for (unsigned s = 0; s < ARRAY_SIZE(stages); s++) {
1291       if (!stages[s].info)
1292          continue;
1293 
1294       stages[s].cache_key.stage = s;
1295       memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1));
1296    }
1297 
1298    const bool skip_cache_lookup =
1299       (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1300    if (!skip_cache_lookup) {
1301       bool found_all_shaders =
1302          anv_graphics_pipeline_load_cached_shaders(pipeline, cache, stages,
1303                                                    &pipeline_feedback);
1304       if (found_all_shaders)
1305          goto done;
1306    }
1307 
1308    if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
1309       return VK_PIPELINE_COMPILE_REQUIRED;
1310 
1311    void *pipeline_ctx = ralloc_context(NULL);
1312 
1313    result = anv_graphics_pipeline_load_nir(pipeline, cache, stages,
1314                                            pipeline_ctx);
1315    if (result != VK_SUCCESS)
1316       goto fail;
1317 
1318    /* Walk backwards to link */
1319    struct anv_pipeline_stage *next_stage = NULL;
1320    for (int i = ARRAY_SIZE(graphics_shader_order) - 1; i >= 0; i--) {
1321       gl_shader_stage s = graphics_shader_order[i];
1322       if (!stages[s].info)
1323          continue;
1324 
1325       switch (s) {
1326       case MESA_SHADER_VERTEX:
1327          anv_pipeline_link_vs(compiler, &stages[s], next_stage);
1328          break;
1329       case MESA_SHADER_TESS_CTRL:
1330          anv_pipeline_link_tcs(compiler, &stages[s], next_stage);
1331          break;
1332       case MESA_SHADER_TESS_EVAL:
1333          anv_pipeline_link_tes(compiler, &stages[s], next_stage);
1334          break;
1335       case MESA_SHADER_GEOMETRY:
1336          anv_pipeline_link_gs(compiler, &stages[s], next_stage);
1337          break;
1338       case MESA_SHADER_FRAGMENT:
1339          anv_pipeline_link_fs(compiler, &stages[s], state->rp);
1340          break;
1341       default:
1342          unreachable("Invalid graphics shader stage");
1343       }
1344 
1345       next_stage = &stages[s];
1346    }
1347 
1348    struct anv_pipeline_stage *prev_stage = NULL;
1349    for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
1350       gl_shader_stage s = graphics_shader_order[i];
1351       if (!stages[s].info)
1352          continue;
1353 
1354       int64_t stage_start = os_time_get_nano();
1355 
1356       void *stage_ctx = ralloc_context(NULL);
1357 
1358       anv_pipeline_lower_nir(&pipeline->base, stage_ctx, &stages[s], layout);
1359 
1360       if (prev_stage && compiler->nir_options[s]->unify_interfaces) {
1361          prev_stage->nir->info.outputs_written |= stages[s].nir->info.inputs_read &
1362                   ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
1363          stages[s].nir->info.inputs_read |= prev_stage->nir->info.outputs_written &
1364                   ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
1365          prev_stage->nir->info.patch_outputs_written |= stages[s].nir->info.patch_inputs_read;
1366          stages[s].nir->info.patch_inputs_read |= prev_stage->nir->info.patch_outputs_written;
1367       }
1368 
1369       ralloc_free(stage_ctx);
1370 
1371       stages[s].feedback.duration += os_time_get_nano() - stage_start;
1372 
1373       prev_stage = &stages[s];
1374    }
1375 
1376    prev_stage = NULL;
1377    for (unsigned i = 0; i < ARRAY_SIZE(graphics_shader_order); i++) {
1378       gl_shader_stage s = graphics_shader_order[i];
1379       if (!stages[s].info)
1380          continue;
1381 
1382       int64_t stage_start = os_time_get_nano();
1383 
1384       void *stage_ctx = ralloc_context(NULL);
1385 
1386       switch (s) {
1387       case MESA_SHADER_VERTEX:
1388          anv_pipeline_compile_vs(compiler, stage_ctx, pipeline,
1389                                  &stages[s]);
1390          break;
1391       case MESA_SHADER_TESS_CTRL:
1392          anv_pipeline_compile_tcs(compiler, stage_ctx, pipeline->base.device,
1393                                   &stages[s], prev_stage);
1394          break;
1395       case MESA_SHADER_TESS_EVAL:
1396          anv_pipeline_compile_tes(compiler, stage_ctx, pipeline->base.device,
1397                                   &stages[s], prev_stage);
1398          break;
1399       case MESA_SHADER_GEOMETRY:
1400          anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->base.device,
1401                                  &stages[s], prev_stage);
1402          break;
1403       case MESA_SHADER_FRAGMENT:
1404          anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->base.device,
1405                                  &stages[s], prev_stage);
1406          break;
1407       default:
1408          unreachable("Invalid graphics shader stage");
1409       }
1410       if (stages[s].code == NULL) {
1411          ralloc_free(stage_ctx);
1412          result = vk_error(pipeline->base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
1413          goto fail;
1414       }
1415 
1416       anv_nir_validate_push_layout(&stages[s].prog_data.base,
1417                                    &stages[s].bind_map);
1418 
1419       struct anv_shader_bin *bin =
1420          anv_device_upload_kernel(pipeline->base.device, cache, s,
1421                                   &stages[s].cache_key,
1422                                   sizeof(stages[s].cache_key),
1423                                   stages[s].code,
1424                                   stages[s].prog_data.base.program_size,
1425                                   &stages[s].prog_data.base,
1426                                   elk_prog_data_size(s),
1427                                   stages[s].stats, stages[s].num_stats,
1428                                   stages[s].nir->xfb_info,
1429                                   &stages[s].bind_map);
1430       if (!bin) {
1431          ralloc_free(stage_ctx);
1432          result = vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1433          goto fail;
1434       }
1435 
1436       anv_pipeline_add_executables(&pipeline->base, &stages[s], bin);
1437 
1438       pipeline->shaders[s] = bin;
1439       ralloc_free(stage_ctx);
1440 
1441       stages[s].feedback.duration += os_time_get_nano() - stage_start;
1442 
1443       prev_stage = &stages[s];
1444    }
1445 
1446    ralloc_free(pipeline_ctx);
1447 
1448 done:
1449 
1450    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1451 
1452    const VkPipelineCreationFeedbackCreateInfo *create_feedback =
1453       vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
1454    if (create_feedback) {
1455       *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1456 
1457       uint32_t stage_count = create_feedback->pipelineStageCreationFeedbackCount;
1458       assert(stage_count == 0 || info->stageCount == stage_count);
1459       for (uint32_t i = 0; i < stage_count; i++) {
1460          gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
1461          create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
1462       }
1463    }
1464 
1465    return VK_SUCCESS;
1466 
1467 fail:
1468    ralloc_free(pipeline_ctx);
1469 
1470    for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1471       if (pipeline->shaders[s])
1472          anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1473    }
1474 
1475    return result;
1476 }
1477 
1478 static VkResult
anv_pipeline_compile_cs(struct anv_compute_pipeline * pipeline,struct vk_pipeline_cache * cache,const VkComputePipelineCreateInfo * info)1479 anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
1480                         struct vk_pipeline_cache *cache,
1481                         const VkComputePipelineCreateInfo *info)
1482 {
1483    const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;
1484    assert(sinfo->stage == VK_SHADER_STAGE_COMPUTE_BIT);
1485 
1486    VkPipelineCreationFeedback pipeline_feedback = {
1487       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
1488    };
1489    int64_t pipeline_start = os_time_get_nano();
1490 
1491    struct anv_device *device = pipeline->base.device;
1492    const struct elk_compiler *compiler = device->physical->compiler;
1493 
1494    struct anv_pipeline_stage stage = {
1495       .stage = MESA_SHADER_COMPUTE,
1496       .info = &info->stage,
1497       .cache_key = {
1498          .stage = MESA_SHADER_COMPUTE,
1499       },
1500       .feedback = {
1501          .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
1502       },
1503    };
1504    vk_pipeline_hash_shader_stage(&info->stage, NULL, stage.shader_sha1);
1505 
1506    struct anv_shader_bin *bin = NULL;
1507 
1508    populate_cs_prog_key(device,
1509                         anv_device_get_robust_flags(device),
1510                         &stage.key.cs);
1511 
1512    ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1513 
1514    const bool skip_cache_lookup =
1515       (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1516 
1517    anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1);
1518 
1519    bool cache_hit = false;
1520    if (!skip_cache_lookup) {
1521       bin = anv_device_search_for_kernel(device, cache,
1522                                          &stage.cache_key,
1523                                          sizeof(stage.cache_key),
1524                                          &cache_hit);
1525    }
1526 
1527    if (bin == NULL &&
1528        (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT))
1529       return VK_PIPELINE_COMPILE_REQUIRED;
1530 
1531    void *mem_ctx = ralloc_context(NULL);
1532    if (bin == NULL) {
1533       int64_t stage_start = os_time_get_nano();
1534 
1535       stage.bind_map = (struct anv_pipeline_bind_map) {
1536          .surface_to_descriptor = stage.surface_to_descriptor,
1537          .sampler_to_descriptor = stage.sampler_to_descriptor
1538       };
1539 
1540       /* Set up a binding for the gl_NumWorkGroups */
1541       stage.bind_map.surface_count = 1;
1542       stage.bind_map.surface_to_descriptor[0] = (struct anv_pipeline_binding) {
1543          .set = ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS,
1544       };
1545 
1546       stage.nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, mem_ctx, &stage);
1547       if (stage.nir == NULL) {
1548          ralloc_free(mem_ctx);
1549          return vk_error(pipeline, VK_ERROR_UNKNOWN);
1550       }
1551 
1552       anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout);
1553 
1554       unsigned local_size = stage.nir->info.workgroup_size[0] *
1555                             stage.nir->info.workgroup_size[1] *
1556                             stage.nir->info.workgroup_size[2];
1557 
1558       /* Games don't always request full subgroups when they should,
1559        * which can cause bugs, as they may expect bigger size of the
1560        * subgroup than we choose for the execution.
1561        */
1562       if (device->physical->instance->assume_full_subgroups &&
1563           stage.nir->info.uses_wide_subgroup_intrinsics &&
1564           stage.nir->info.subgroup_size == SUBGROUP_SIZE_API_CONSTANT &&
1565           local_size &&
1566           local_size % ELK_SUBGROUP_SIZE == 0)
1567          stage.nir->info.subgroup_size = SUBGROUP_SIZE_FULL_SUBGROUPS;
1568 
1569       /* If the client requests that we dispatch full subgroups but doesn't
1570        * allow us to pick a subgroup size, we have to smash it to the API
1571        * value of 32.  Performance will likely be terrible in this case but
1572        * there's nothing we can do about that.  The client should have chosen
1573        * a size.
1574        */
1575       if (stage.nir->info.subgroup_size == SUBGROUP_SIZE_FULL_SUBGROUPS)
1576          stage.nir->info.subgroup_size =
1577             device->physical->instance->assume_full_subgroups != 0 ?
1578             device->physical->instance->assume_full_subgroups : ELK_SUBGROUP_SIZE;
1579 
1580       stage.num_stats = 1;
1581 
1582       struct elk_compile_cs_params params = {
1583          .base = {
1584             .nir = stage.nir,
1585             .stats = stage.stats,
1586             .log_data = device,
1587             .mem_ctx = mem_ctx,
1588          },
1589          .key = &stage.key.cs,
1590          .prog_data = &stage.prog_data.cs,
1591       };
1592 
1593       stage.code = elk_compile_cs(compiler, &params);
1594       if (stage.code == NULL) {
1595          ralloc_free(mem_ctx);
1596          return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1597       }
1598 
1599       anv_nir_validate_push_layout(&stage.prog_data.base, &stage.bind_map);
1600 
1601       if (!stage.prog_data.cs.uses_num_work_groups) {
1602          assert(stage.bind_map.surface_to_descriptor[0].set ==
1603                 ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS);
1604          stage.bind_map.surface_to_descriptor[0].set = ANV_DESCRIPTOR_SET_NULL;
1605       }
1606 
1607       const unsigned code_size = stage.prog_data.base.program_size;
1608       bin = anv_device_upload_kernel(device, cache,
1609                                      MESA_SHADER_COMPUTE,
1610                                      &stage.cache_key, sizeof(stage.cache_key),
1611                                      stage.code, code_size,
1612                                      &stage.prog_data.base,
1613                                      sizeof(stage.prog_data.cs),
1614                                      stage.stats, stage.num_stats,
1615                                      NULL, &stage.bind_map);
1616       if (!bin) {
1617          ralloc_free(mem_ctx);
1618          return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1619       }
1620 
1621       stage.feedback.duration = os_time_get_nano() - stage_start;
1622    }
1623 
1624    anv_pipeline_add_executables(&pipeline->base, &stage, bin);
1625 
1626    ralloc_free(mem_ctx);
1627 
1628    if (cache_hit) {
1629       stage.feedback.flags |=
1630          VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1631       pipeline_feedback.flags |=
1632          VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
1633    }
1634    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1635 
1636    const VkPipelineCreationFeedbackCreateInfo *create_feedback =
1637       vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
1638    if (create_feedback) {
1639       *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1640 
1641       if (create_feedback->pipelineStageCreationFeedbackCount) {
1642          assert(create_feedback->pipelineStageCreationFeedbackCount == 1);
1643          create_feedback->pPipelineStageCreationFeedbacks[0] = stage.feedback;
1644       }
1645    }
1646 
1647    pipeline->cs = bin;
1648 
1649    return VK_SUCCESS;
1650 }
1651 
1652 static VkResult
anv_compute_pipeline_create(struct anv_device * device,struct vk_pipeline_cache * cache,const VkComputePipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)1653 anv_compute_pipeline_create(struct anv_device *device,
1654                             struct vk_pipeline_cache *cache,
1655                             const VkComputePipelineCreateInfo *pCreateInfo,
1656                             const VkAllocationCallbacks *pAllocator,
1657                             VkPipeline *pPipeline)
1658 {
1659    struct anv_compute_pipeline *pipeline;
1660    VkResult result;
1661 
1662    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO);
1663 
1664    pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
1665                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1666    if (pipeline == NULL)
1667       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1668 
1669    result = anv_pipeline_init(&pipeline->base, device,
1670                               ANV_PIPELINE_COMPUTE, pCreateInfo->flags,
1671                               pAllocator);
1672    if (result != VK_SUCCESS) {
1673       vk_free2(&device->vk.alloc, pAllocator, pipeline);
1674       return result;
1675    }
1676 
1677    anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS,
1678                          pipeline->batch_data, sizeof(pipeline->batch_data));
1679 
1680    result = anv_pipeline_compile_cs(pipeline, cache, pCreateInfo);
1681    if (result != VK_SUCCESS) {
1682       anv_pipeline_finish(&pipeline->base, device, pAllocator);
1683       vk_free2(&device->vk.alloc, pAllocator, pipeline);
1684       return result;
1685    }
1686 
1687    anv_genX(device->info, compute_pipeline_emit)(pipeline);
1688 
1689    *pPipeline = anv_pipeline_to_handle(&pipeline->base);
1690 
1691    return pipeline->base.batch.status;
1692 }
1693 
anv_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1694 VkResult anv_CreateComputePipelines(
1695     VkDevice                                    _device,
1696     VkPipelineCache                             pipelineCache,
1697     uint32_t                                    count,
1698     const VkComputePipelineCreateInfo*          pCreateInfos,
1699     const VkAllocationCallbacks*                pAllocator,
1700     VkPipeline*                                 pPipelines)
1701 {
1702    ANV_FROM_HANDLE(anv_device, device, _device);
1703    ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache);
1704 
1705    VkResult result = VK_SUCCESS;
1706 
1707    unsigned i;
1708    for (i = 0; i < count; i++) {
1709       VkResult res = anv_compute_pipeline_create(device, pipeline_cache,
1710                                                  &pCreateInfos[i],
1711                                                  pAllocator, &pPipelines[i]);
1712 
1713       if (res == VK_SUCCESS)
1714          continue;
1715 
1716       /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED as it
1717        * is not obvious what error should be report upon 2 different failures.
1718        * */
1719       result = res;
1720       if (res != VK_PIPELINE_COMPILE_REQUIRED)
1721          break;
1722 
1723       pPipelines[i] = VK_NULL_HANDLE;
1724 
1725       if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
1726          break;
1727    }
1728 
1729    for (; i < count; i++)
1730       pPipelines[i] = VK_NULL_HANDLE;
1731 
1732    return result;
1733 }
1734 
1735 /**
1736  * Calculate the desired L3 partitioning based on the current state of the
1737  * pipeline.  For now this simply returns the conservative defaults calculated
1738  * by get_default_l3_weights(), but we could probably do better by gathering
1739  * more statistics from the pipeline state (e.g. guess of expected URB usage
1740  * and bound surfaces), or by using feed-back from performance counters.
1741  */
1742 void
anv_pipeline_setup_l3_config(struct anv_pipeline * pipeline,bool needs_slm)1743 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)
1744 {
1745    const struct intel_device_info *devinfo = pipeline->device->info;
1746 
1747    const struct intel_l3_weights w =
1748       intel_get_default_l3_weights(devinfo, true, needs_slm);
1749 
1750    pipeline->l3_config = intel_get_l3_config(devinfo, w);
1751 }
1752 
1753 static VkResult
anv_graphics_pipeline_init(struct anv_graphics_pipeline * pipeline,struct anv_device * device,struct vk_pipeline_cache * cache,const struct VkGraphicsPipelineCreateInfo * pCreateInfo,const struct vk_graphics_pipeline_state * state,const VkAllocationCallbacks * alloc)1754 anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline,
1755                            struct anv_device *device,
1756                            struct vk_pipeline_cache *cache,
1757                            const struct VkGraphicsPipelineCreateInfo *pCreateInfo,
1758                            const struct vk_graphics_pipeline_state *state,
1759                            const VkAllocationCallbacks *alloc)
1760 {
1761    VkResult result;
1762 
1763    result = anv_pipeline_init(&pipeline->base, device,
1764                               ANV_PIPELINE_GRAPHICS, pCreateInfo->flags,
1765                               alloc);
1766    if (result != VK_SUCCESS)
1767       return result;
1768 
1769    anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS,
1770                          pipeline->batch_data, sizeof(pipeline->batch_data));
1771 
1772    pipeline->active_stages = 0;
1773    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
1774       pipeline->active_stages |= pCreateInfo->pStages[i].stage;
1775 
1776    if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1777       pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
1778 
1779    pipeline->dynamic_state.ms.sample_locations = &pipeline->sample_locations;
1780    vk_dynamic_graphics_state_fill(&pipeline->dynamic_state, state);
1781 
1782    pipeline->depth_clamp_enable = state->rs->depth_clamp_enable;
1783    pipeline->depth_clip_enable =
1784       vk_rasterization_state_depth_clip_enable(state->rs);
1785    pipeline->view_mask = state->rp->view_mask;
1786 
1787    result = anv_graphics_pipeline_compile(pipeline, cache, pCreateInfo, state);
1788    if (result != VK_SUCCESS) {
1789       anv_pipeline_finish(&pipeline->base, device, alloc);
1790       return result;
1791    }
1792 
1793    anv_pipeline_setup_l3_config(&pipeline->base, false);
1794 
1795    const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;
1796 
1797    u_foreach_bit(a, state->vi->attributes_valid) {
1798       if (inputs_read & BITFIELD64_BIT(VERT_ATTRIB_GENERIC0 + a))
1799          pipeline->vb_used |= BITFIELD64_BIT(state->vi->attributes[a].binding);
1800    }
1801 
1802    u_foreach_bit(b, state->vi->bindings_valid) {
1803       pipeline->vb[b].stride = state->vi->bindings[b].stride;
1804       pipeline->vb[b].instanced = state->vi->bindings[b].input_rate ==
1805          VK_VERTEX_INPUT_RATE_INSTANCE;
1806       pipeline->vb[b].instance_divisor = state->vi->bindings[b].divisor;
1807    }
1808 
1809    pipeline->instance_multiplier = 1;
1810    if (pipeline->view_mask)
1811       pipeline->instance_multiplier = util_bitcount(pipeline->view_mask);
1812 
1813    pipeline->negative_one_to_one =
1814       state->vp != NULL && state->vp->depth_clip_negative_one_to_one;
1815 
1816    /* Store line mode, polygon mode and rasterization samples, these are used
1817     * for dynamic primitive topology.
1818     */
1819    pipeline->polygon_mode = state->rs->polygon_mode;
1820    pipeline->rasterization_samples =
1821       state->ms != NULL ? state->ms->rasterization_samples : 1;
1822    pipeline->line_mode = state->rs->line.mode;
1823    if (pipeline->line_mode == VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT) {
1824       if (pipeline->rasterization_samples > 1) {
1825          pipeline->line_mode = VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT;
1826       } else {
1827          pipeline->line_mode = VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT;
1828       }
1829    }
1830    pipeline->patch_control_points =
1831       state->ts != NULL ? state->ts->patch_control_points : 0;
1832 
1833    /* Store the color write masks, to be merged with color write enable if
1834     * dynamic.
1835     */
1836    if (state->cb != NULL) {
1837       for (unsigned i = 0; i < state->cb->attachment_count; i++)
1838          pipeline->color_comp_writes[i] = state->cb->attachments[i].write_mask;
1839    }
1840 
1841    return VK_SUCCESS;
1842 }
1843 
1844 static VkResult
anv_graphics_pipeline_create(struct anv_device * device,struct vk_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)1845 anv_graphics_pipeline_create(struct anv_device *device,
1846                              struct vk_pipeline_cache *cache,
1847                              const VkGraphicsPipelineCreateInfo *pCreateInfo,
1848                              const VkAllocationCallbacks *pAllocator,
1849                              VkPipeline *pPipeline)
1850 {
1851    struct anv_graphics_pipeline *pipeline;
1852    VkResult result;
1853 
1854    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
1855 
1856    pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
1857                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1858    if (pipeline == NULL)
1859       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1860 
1861    struct vk_graphics_pipeline_all_state all;
1862    struct vk_graphics_pipeline_state state = { };
1863    result = vk_graphics_pipeline_state_fill(&device->vk, &state, pCreateInfo,
1864                                             NULL /* driver_rp */,
1865                                             0 /* driver_rp_flags */,
1866                                             &all, NULL, 0, NULL);
1867    if (result != VK_SUCCESS) {
1868       vk_free2(&device->vk.alloc, pAllocator, pipeline);
1869       return result;
1870    }
1871 
1872    result = anv_graphics_pipeline_init(pipeline, device, cache,
1873                                        pCreateInfo, &state, pAllocator);
1874    if (result != VK_SUCCESS) {
1875       vk_free2(&device->vk.alloc, pAllocator, pipeline);
1876       return result;
1877    }
1878 
1879    anv_genX(device->info, graphics_pipeline_emit)(pipeline, &state);
1880 
1881    *pPipeline = anv_pipeline_to_handle(&pipeline->base);
1882 
1883    return pipeline->base.batch.status;
1884 }
1885 
anv_CreateGraphicsPipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkGraphicsPipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)1886 VkResult anv_CreateGraphicsPipelines(
1887     VkDevice                                    _device,
1888     VkPipelineCache                             pipelineCache,
1889     uint32_t                                    count,
1890     const VkGraphicsPipelineCreateInfo*         pCreateInfos,
1891     const VkAllocationCallbacks*                pAllocator,
1892     VkPipeline*                                 pPipelines)
1893 {
1894    ANV_FROM_HANDLE(anv_device, device, _device);
1895    ANV_FROM_HANDLE(vk_pipeline_cache, pipeline_cache, pipelineCache);
1896 
1897    VkResult result = VK_SUCCESS;
1898 
1899    unsigned i;
1900    for (i = 0; i < count; i++) {
1901       VkResult res = anv_graphics_pipeline_create(device,
1902                                                   pipeline_cache,
1903                                                   &pCreateInfos[i],
1904                                                   pAllocator, &pPipelines[i]);
1905 
1906       if (res == VK_SUCCESS)
1907          continue;
1908 
1909       /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED as it
1910        * is not obvious what error should be report upon 2 different failures.
1911        * */
1912       result = res;
1913       if (res != VK_PIPELINE_COMPILE_REQUIRED)
1914          break;
1915 
1916       pPipelines[i] = VK_NULL_HANDLE;
1917 
1918       if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
1919          break;
1920    }
1921 
1922    for (; i < count; i++)
1923       pPipelines[i] = VK_NULL_HANDLE;
1924 
1925    return result;
1926 }
1927 
1928 #define WRITE_STR(field, ...) ({                               \
1929    memset(field, 0, sizeof(field));                            \
1930    UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \
1931    assert(i > 0 && i < sizeof(field));                         \
1932 })
1933 
anv_GetPipelineExecutablePropertiesKHR(VkDevice device,const VkPipelineInfoKHR * pPipelineInfo,uint32_t * pExecutableCount,VkPipelineExecutablePropertiesKHR * pProperties)1934 VkResult anv_GetPipelineExecutablePropertiesKHR(
1935     VkDevice                                    device,
1936     const VkPipelineInfoKHR*                    pPipelineInfo,
1937     uint32_t*                                   pExecutableCount,
1938     VkPipelineExecutablePropertiesKHR*          pProperties)
1939 {
1940    ANV_FROM_HANDLE(anv_pipeline, pipeline, pPipelineInfo->pipeline);
1941    VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out,
1942                           pProperties, pExecutableCount);
1943 
1944    util_dynarray_foreach (&pipeline->executables, struct anv_pipeline_executable, exe) {
1945       vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) {
1946          gl_shader_stage stage = exe->stage;
1947          props->stages = mesa_to_vk_shader_stage(stage);
1948 
1949          unsigned simd_width = exe->stats.dispatch_width;
1950          if (stage == MESA_SHADER_FRAGMENT) {
1951             WRITE_STR(props->name, "%s%d %s",
1952                       simd_width ? "SIMD" : "vec",
1953                       simd_width ? simd_width : 4,
1954                       _mesa_shader_stage_to_string(stage));
1955          } else {
1956             WRITE_STR(props->name, "%s", _mesa_shader_stage_to_string(stage));
1957          }
1958          WRITE_STR(props->description, "%s%d %s shader",
1959                    simd_width ? "SIMD" : "vec",
1960                    simd_width ? simd_width : 4,
1961                    _mesa_shader_stage_to_string(stage));
1962 
1963          /* The compiler gives us a dispatch width of 0 for vec4 but Vulkan
1964           * wants a subgroup size of 1.
1965           */
1966          props->subgroupSize = MAX2(simd_width, 1);
1967       }
1968    }
1969 
1970    return vk_outarray_status(&out);
1971 }
1972 
1973 static const struct anv_pipeline_executable *
anv_pipeline_get_executable(struct anv_pipeline * pipeline,uint32_t index)1974 anv_pipeline_get_executable(struct anv_pipeline *pipeline, uint32_t index)
1975 {
1976    assert(index < util_dynarray_num_elements(&pipeline->executables,
1977                                              struct anv_pipeline_executable));
1978    return util_dynarray_element(
1979       &pipeline->executables, struct anv_pipeline_executable, index);
1980 }
1981 
anv_GetPipelineExecutableStatisticsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pStatisticCount,VkPipelineExecutableStatisticKHR * pStatistics)1982 VkResult anv_GetPipelineExecutableStatisticsKHR(
1983     VkDevice                                    device,
1984     const VkPipelineExecutableInfoKHR*          pExecutableInfo,
1985     uint32_t*                                   pStatisticCount,
1986     VkPipelineExecutableStatisticKHR*           pStatistics)
1987 {
1988    ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
1989    VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out,
1990                           pStatistics, pStatisticCount);
1991 
1992    const struct anv_pipeline_executable *exe =
1993       anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
1994 
1995    const struct elk_stage_prog_data *prog_data;
1996    switch (pipeline->type) {
1997    case ANV_PIPELINE_GRAPHICS: {
1998       prog_data = anv_pipeline_to_graphics(pipeline)->shaders[exe->stage]->prog_data;
1999       break;
2000    }
2001    case ANV_PIPELINE_COMPUTE: {
2002       prog_data = anv_pipeline_to_compute(pipeline)->cs->prog_data;
2003       break;
2004    }
2005    default:
2006       unreachable("invalid pipeline type");
2007    }
2008 
2009    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
2010       WRITE_STR(stat->name, "Instruction Count");
2011       WRITE_STR(stat->description,
2012                 "Number of GEN instructions in the final generated "
2013                 "shader executable.");
2014       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2015       stat->value.u64 = exe->stats.instructions;
2016    }
2017 
2018    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
2019       WRITE_STR(stat->name, "SEND Count");
2020       WRITE_STR(stat->description,
2021                 "Number of instructions in the final generated shader "
2022                 "executable which access external units such as the "
2023                 "constant cache or the sampler.");
2024       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2025       stat->value.u64 = exe->stats.sends;
2026    }
2027 
2028    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
2029       WRITE_STR(stat->name, "Loop Count");
2030       WRITE_STR(stat->description,
2031                 "Number of loops (not unrolled) in the final generated "
2032                 "shader executable.");
2033       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2034       stat->value.u64 = exe->stats.loops;
2035    }
2036 
2037    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
2038       WRITE_STR(stat->name, "Cycle Count");
2039       WRITE_STR(stat->description,
2040                 "Estimate of the number of EU cycles required to execute "
2041                 "the final generated executable.  This is an estimate only "
2042                 "and may vary greatly from actual run-time performance.");
2043       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2044       stat->value.u64 = exe->stats.cycles;
2045    }
2046 
2047    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
2048       WRITE_STR(stat->name, "Spill Count");
2049       WRITE_STR(stat->description,
2050                 "Number of scratch spill operations.  This gives a rough "
2051                 "estimate of the cost incurred due to spilling temporary "
2052                 "values to memory.  If this is non-zero, you may want to "
2053                 "adjust your shader to reduce register pressure.");
2054       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2055       stat->value.u64 = exe->stats.spills;
2056    }
2057 
2058    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
2059       WRITE_STR(stat->name, "Fill Count");
2060       WRITE_STR(stat->description,
2061                 "Number of scratch fill operations.  This gives a rough "
2062                 "estimate of the cost incurred due to spilling temporary "
2063                 "values to memory.  If this is non-zero, you may want to "
2064                 "adjust your shader to reduce register pressure.");
2065       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2066       stat->value.u64 = exe->stats.fills;
2067    }
2068 
2069    vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
2070       WRITE_STR(stat->name, "Scratch Memory Size");
2071       WRITE_STR(stat->description,
2072                 "Number of bytes of scratch memory required by the "
2073                 "generated shader executable.  If this is non-zero, you "
2074                 "may want to adjust your shader to reduce register "
2075                 "pressure.");
2076       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2077       stat->value.u64 = prog_data->total_scratch;
2078    }
2079 
2080    if (gl_shader_stage_uses_workgroup(exe->stage)) {
2081       vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) {
2082          WRITE_STR(stat->name, "Workgroup Memory Size");
2083          WRITE_STR(stat->description,
2084                    "Number of bytes of workgroup shared memory used by this "
2085                    "shader including any padding.");
2086          stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
2087          stat->value.u64 = prog_data->total_shared;
2088       }
2089    }
2090 
2091    return vk_outarray_status(&out);
2092 }
2093 
2094 static bool
write_ir_text(VkPipelineExecutableInternalRepresentationKHR * ir,const char * data)2095 write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
2096               const char *data)
2097 {
2098    ir->isText = VK_TRUE;
2099 
2100    size_t data_len = strlen(data) + 1;
2101 
2102    if (ir->pData == NULL) {
2103       ir->dataSize = data_len;
2104       return true;
2105    }
2106 
2107    strncpy(ir->pData, data, ir->dataSize);
2108    if (ir->dataSize < data_len)
2109       return false;
2110 
2111    ir->dataSize = data_len;
2112    return true;
2113 }
2114 
anv_GetPipelineExecutableInternalRepresentationsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pInternalRepresentationCount,VkPipelineExecutableInternalRepresentationKHR * pInternalRepresentations)2115 VkResult anv_GetPipelineExecutableInternalRepresentationsKHR(
2116     VkDevice                                    device,
2117     const VkPipelineExecutableInfoKHR*          pExecutableInfo,
2118     uint32_t*                                   pInternalRepresentationCount,
2119     VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)
2120 {
2121    ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
2122    VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out,
2123                           pInternalRepresentations, pInternalRepresentationCount);
2124    bool incomplete_text = false;
2125 
2126    const struct anv_pipeline_executable *exe =
2127       anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
2128 
2129    if (exe->nir) {
2130       vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) {
2131          WRITE_STR(ir->name, "Final NIR");
2132          WRITE_STR(ir->description,
2133                    "Final NIR before going into the back-end compiler");
2134 
2135          if (!write_ir_text(ir, exe->nir))
2136             incomplete_text = true;
2137       }
2138    }
2139 
2140    if (exe->disasm) {
2141       vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) {
2142          WRITE_STR(ir->name, "GEN Assembly");
2143          WRITE_STR(ir->description,
2144                    "Final GEN assembly for the generated shader binary");
2145 
2146          if (!write_ir_text(ir, exe->disasm))
2147             incomplete_text = true;
2148       }
2149    }
2150 
2151    return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
2152 }
2153