• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29 
30 #include "util/mesa-sha1.h"
31 #include "util/os_time.h"
32 #include "common/intel_l3_config.h"
33 #include "common/intel_disasm.h"
34 #include "common/intel_sample_positions.h"
35 #include "anv_private.h"
36 #include "compiler/brw_nir.h"
37 #include "compiler/brw_nir_rt.h"
38 #include "anv_nir.h"
39 #include "nir/nir_xfb_info.h"
40 #include "spirv/nir_spirv.h"
41 #include "vk_util.h"
42 
43 /* Needed for SWIZZLE macros */
44 #include "program/prog_instruction.h"
45 
46 // Shader functions
47 #define SPIR_V_MAGIC_NUMBER 0x07230203
48 
49 struct anv_spirv_debug_data {
50    struct anv_device *device;
51    const struct vk_shader_module *module;
52 };
53 
anv_spirv_nir_debug(void * private_data,enum nir_spirv_debug_level level,size_t spirv_offset,const char * message)54 static void anv_spirv_nir_debug(void *private_data,
55                                 enum nir_spirv_debug_level level,
56                                 size_t spirv_offset,
57                                 const char *message)
58 {
59    struct anv_spirv_debug_data *debug_data = private_data;
60 
61    switch (level) {
62    case NIR_SPIRV_DEBUG_LEVEL_INFO:
63       vk_logi(VK_LOG_OBJS(&debug_data->module->base),
64               "SPIR-V offset %lu: %s",
65               (unsigned long) spirv_offset, message);
66       break;
67    case NIR_SPIRV_DEBUG_LEVEL_WARNING:
68       vk_logw(VK_LOG_OBJS(&debug_data->module->base),
69               "SPIR-V offset %lu: %s",
70               (unsigned long) spirv_offset, message);
71       break;
72    case NIR_SPIRV_DEBUG_LEVEL_ERROR:
73       vk_loge(VK_LOG_OBJS(&debug_data->module->base),
74               "SPIR-V offset %lu: %s",
75               (unsigned long) spirv_offset, message);
76       break;
77    default:
78       break;
79    }
80 }
81 
82 /* Eventually, this will become part of anv_CreateShader.  Unfortunately,
83  * we can't do that yet because we don't have the ability to copy nir.
84  */
85 static nir_shader *
anv_shader_compile_to_nir(struct anv_device * device,void * mem_ctx,const struct vk_shader_module * module,const char * entrypoint_name,gl_shader_stage stage,const VkSpecializationInfo * spec_info)86 anv_shader_compile_to_nir(struct anv_device *device,
87                           void *mem_ctx,
88                           const struct vk_shader_module *module,
89                           const char *entrypoint_name,
90                           gl_shader_stage stage,
91                           const VkSpecializationInfo *spec_info)
92 {
93    const struct anv_physical_device *pdevice = device->physical;
94    const struct brw_compiler *compiler = pdevice->compiler;
95    const nir_shader_compiler_options *nir_options =
96       compiler->glsl_compiler_options[stage].NirOptions;
97 
98    uint32_t *spirv = (uint32_t *) module->data;
99    assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
100    assert(module->size % 4 == 0);
101 
102    uint32_t num_spec_entries = 0;
103    struct nir_spirv_specialization *spec_entries =
104       vk_spec_info_to_nir_spirv(spec_info, &num_spec_entries);
105 
106    struct anv_spirv_debug_data spirv_debug_data = {
107       .device = device,
108       .module = module,
109    };
110    struct spirv_to_nir_options spirv_options = {
111       .caps = {
112          .demote_to_helper_invocation = true,
113          .derivative_group = true,
114          .descriptor_array_dynamic_indexing = true,
115          .descriptor_array_non_uniform_indexing = true,
116          .descriptor_indexing = true,
117          .device_group = true,
118          .draw_parameters = true,
119          .float16 = pdevice->info.ver >= 8,
120          .float32_atomic_add = pdevice->info.has_lsc,
121          .float32_atomic_min_max = pdevice->info.ver >= 9,
122          .float64 = pdevice->info.ver >= 8,
123          .float64_atomic_min_max = pdevice->info.has_lsc,
124          .fragment_shader_sample_interlock = pdevice->info.ver >= 9,
125          .fragment_shader_pixel_interlock = pdevice->info.ver >= 9,
126          .geometry_streams = true,
127          /* When KHR_format_feature_flags2 is enabled, the read/write without
128           * format is per format, so just report true. It's up to the
129           * application to check.
130           */
131          .image_read_without_format = device->vk.enabled_extensions.KHR_format_feature_flags2,
132          .image_write_without_format = true,
133          .int8 = pdevice->info.ver >= 8,
134          .int16 = pdevice->info.ver >= 8,
135          .int64 = pdevice->info.ver >= 8,
136          .int64_atomics = pdevice->info.ver >= 9 && pdevice->use_softpin,
137          .integer_functions2 = pdevice->info.ver >= 8,
138          .min_lod = true,
139          .multiview = true,
140          .physical_storage_buffer_address = pdevice->has_a64_buffer_access,
141          .post_depth_coverage = pdevice->info.ver >= 9,
142          .runtime_descriptor_array = true,
143          .float_controls = pdevice->info.ver >= 8,
144          .ray_tracing = pdevice->info.has_ray_tracing,
145          .shader_clock = true,
146          .shader_viewport_index_layer = true,
147          .stencil_export = pdevice->info.ver >= 9,
148          .storage_8bit = pdevice->info.ver >= 8,
149          .storage_16bit = pdevice->info.ver >= 8,
150          .subgroup_arithmetic = true,
151          .subgroup_basic = true,
152          .subgroup_ballot = true,
153          .subgroup_dispatch = true,
154          .subgroup_quad = true,
155          .subgroup_uniform_control_flow = true,
156          .subgroup_shuffle = true,
157          .subgroup_vote = true,
158          .tessellation = true,
159          .transform_feedback = pdevice->info.ver >= 8,
160          .variable_pointers = true,
161          .vk_memory_model = true,
162          .vk_memory_model_device_scope = true,
163          .workgroup_memory_explicit_layout = true,
164          .fragment_shading_rate = pdevice->info.ver >= 11,
165       },
166       .ubo_addr_format =
167          anv_nir_ubo_addr_format(pdevice, device->robust_buffer_access),
168       .ssbo_addr_format =
169           anv_nir_ssbo_addr_format(pdevice, device->robust_buffer_access),
170       .phys_ssbo_addr_format = nir_address_format_64bit_global,
171       .push_const_addr_format = nir_address_format_logical,
172 
173       /* TODO: Consider changing this to an address format that has the NULL
174        * pointer equals to 0.  That might be a better format to play nice
175        * with certain code / code generators.
176        */
177       .shared_addr_format = nir_address_format_32bit_offset,
178       .debug = {
179          .func = anv_spirv_nir_debug,
180          .private_data = &spirv_debug_data,
181       },
182    };
183 
184 
185    nir_shader *nir =
186       spirv_to_nir(spirv, module->size / 4,
187                    spec_entries, num_spec_entries,
188                    stage, entrypoint_name, &spirv_options, nir_options);
189    if (!nir) {
190       free(spec_entries);
191       return NULL;
192    }
193 
194    assert(nir->info.stage == stage);
195    nir_validate_shader(nir, "after spirv_to_nir");
196    nir_validate_ssa_dominance(nir, "after spirv_to_nir");
197    ralloc_steal(mem_ctx, nir);
198 
199    free(spec_entries);
200 
201    const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
202       .point_coord = true,
203    };
204    NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
205 
206    if (INTEL_DEBUG(intel_debug_flag_for_shader_stage(stage))) {
207       fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n",
208               gl_shader_stage_name(stage));
209       nir_print_shader(nir, stderr);
210    }
211 
212    /* We have to lower away local constant initializers right before we
213     * inline functions.  That way they get properly initialized at the top
214     * of the function and not at the top of its caller.
215     */
216    NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
217    NIR_PASS_V(nir, nir_lower_returns);
218    NIR_PASS_V(nir, nir_inline_functions);
219    NIR_PASS_V(nir, nir_copy_prop);
220    NIR_PASS_V(nir, nir_opt_deref);
221 
222    /* Pick off the single entrypoint that we want */
223    foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
224       if (!func->is_entrypoint)
225          exec_node_remove(&func->node);
226    }
227    assert(exec_list_length(&nir->functions) == 1);
228 
229    /* Now that we've deleted all but the main function, we can go ahead and
230     * lower the rest of the constant initializers.  We do this here so that
231     * nir_remove_dead_variables and split_per_member_structs below see the
232     * corresponding stores.
233     */
234    NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
235 
236    /* Split member structs.  We do this before lower_io_to_temporaries so that
237     * it doesn't lower system values to temporaries by accident.
238     */
239    NIR_PASS_V(nir, nir_split_var_copies);
240    NIR_PASS_V(nir, nir_split_per_member_structs);
241 
242    NIR_PASS_V(nir, nir_remove_dead_variables,
243               nir_var_shader_in | nir_var_shader_out | nir_var_system_value |
244               nir_var_shader_call_data | nir_var_ray_hit_attrib,
245               NULL);
246 
247    NIR_PASS_V(nir, nir_propagate_invariant, false);
248    NIR_PASS_V(nir, nir_lower_io_to_temporaries,
249               nir_shader_get_entrypoint(nir), true, false);
250 
251    NIR_PASS_V(nir, nir_lower_frexp);
252 
253    /* Vulkan uses the separate-shader linking model */
254    nir->info.separate_shader = true;
255 
256    brw_preprocess_nir(compiler, nir, NULL);
257 
258    return nir;
259 }
260 
261 VkResult
anv_pipeline_init(struct anv_pipeline * pipeline,struct anv_device * device,enum anv_pipeline_type type,VkPipelineCreateFlags flags,const VkAllocationCallbacks * pAllocator)262 anv_pipeline_init(struct anv_pipeline *pipeline,
263                   struct anv_device *device,
264                   enum anv_pipeline_type type,
265                   VkPipelineCreateFlags flags,
266                   const VkAllocationCallbacks *pAllocator)
267 {
268    VkResult result;
269 
270    memset(pipeline, 0, sizeof(*pipeline));
271 
272    vk_object_base_init(&device->vk, &pipeline->base,
273                        VK_OBJECT_TYPE_PIPELINE);
274    pipeline->device = device;
275 
276    /* It's the job of the child class to provide actual backing storage for
277     * the batch by setting batch.start, batch.next, and batch.end.
278     */
279    pipeline->batch.alloc = pAllocator ? pAllocator : &device->vk.alloc;
280    pipeline->batch.relocs = &pipeline->batch_relocs;
281    pipeline->batch.status = VK_SUCCESS;
282 
283    result = anv_reloc_list_init(&pipeline->batch_relocs,
284                                 pipeline->batch.alloc);
285    if (result != VK_SUCCESS)
286       return result;
287 
288    pipeline->mem_ctx = ralloc_context(NULL);
289 
290    pipeline->type = type;
291    pipeline->flags = flags;
292 
293    util_dynarray_init(&pipeline->executables, pipeline->mem_ctx);
294 
295    return VK_SUCCESS;
296 }
297 
298 void
anv_pipeline_finish(struct anv_pipeline * pipeline,struct anv_device * device,const VkAllocationCallbacks * pAllocator)299 anv_pipeline_finish(struct anv_pipeline *pipeline,
300                     struct anv_device *device,
301                     const VkAllocationCallbacks *pAllocator)
302 {
303    anv_reloc_list_finish(&pipeline->batch_relocs,
304                          pAllocator ? pAllocator : &device->vk.alloc);
305    ralloc_free(pipeline->mem_ctx);
306    vk_object_base_finish(&pipeline->base);
307 }
308 
anv_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)309 void anv_DestroyPipeline(
310     VkDevice                                    _device,
311     VkPipeline                                  _pipeline,
312     const VkAllocationCallbacks*                pAllocator)
313 {
314    ANV_FROM_HANDLE(anv_device, device, _device);
315    ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
316 
317    if (!pipeline)
318       return;
319 
320    switch (pipeline->type) {
321    case ANV_PIPELINE_GRAPHICS: {
322       struct anv_graphics_pipeline *gfx_pipeline =
323          anv_pipeline_to_graphics(pipeline);
324 
325       if (gfx_pipeline->blend_state.map)
326          anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->blend_state);
327       if (gfx_pipeline->cps_state.map)
328          anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->cps_state);
329 
330       for (unsigned s = 0; s < ARRAY_SIZE(gfx_pipeline->shaders); s++) {
331          if (gfx_pipeline->shaders[s])
332             anv_shader_bin_unref(device, gfx_pipeline->shaders[s]);
333       }
334       break;
335    }
336 
337    case ANV_PIPELINE_COMPUTE: {
338       struct anv_compute_pipeline *compute_pipeline =
339          anv_pipeline_to_compute(pipeline);
340 
341       if (compute_pipeline->cs)
342          anv_shader_bin_unref(device, compute_pipeline->cs);
343 
344       break;
345    }
346 
347    case ANV_PIPELINE_RAY_TRACING: {
348       struct anv_ray_tracing_pipeline *rt_pipeline =
349          anv_pipeline_to_ray_tracing(pipeline);
350 
351       util_dynarray_foreach(&rt_pipeline->shaders,
352                             struct anv_shader_bin *, shader) {
353          anv_shader_bin_unref(device, *shader);
354       }
355       break;
356    }
357 
358    default:
359       unreachable("invalid pipeline type");
360    }
361 
362    anv_pipeline_finish(pipeline, device, pAllocator);
363    vk_free2(&device->vk.alloc, pAllocator, pipeline);
364 }
365 
366 static const uint32_t vk_to_intel_primitive_type[] = {
367    [VK_PRIMITIVE_TOPOLOGY_POINT_LIST]                    = _3DPRIM_POINTLIST,
368    [VK_PRIMITIVE_TOPOLOGY_LINE_LIST]                     = _3DPRIM_LINELIST,
369    [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP]                    = _3DPRIM_LINESTRIP,
370    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST]                 = _3DPRIM_TRILIST,
371    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP]                = _3DPRIM_TRISTRIP,
372    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN]                  = _3DPRIM_TRIFAN,
373    [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY]      = _3DPRIM_LINELIST_ADJ,
374    [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY]     = _3DPRIM_LINESTRIP_ADJ,
375    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY]  = _3DPRIM_TRILIST_ADJ,
376    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
377 };
378 
379 static void
populate_sampler_prog_key(const struct intel_device_info * devinfo,struct brw_sampler_prog_key_data * key)380 populate_sampler_prog_key(const struct intel_device_info *devinfo,
381                           struct brw_sampler_prog_key_data *key)
382 {
383    /* Almost all multisampled textures are compressed.  The only time when we
384     * don't compress a multisampled texture is for 16x MSAA with a surface
385     * width greater than 8k which is a bit of an edge case.  Since the sampler
386     * just ignores the MCS parameter to ld2ms when MCS is disabled, it's safe
387     * to tell the compiler to always assume compression.
388     */
389    key->compressed_multisample_layout_mask = ~0;
390 
391    /* SkyLake added support for 16x MSAA.  With this came a new message for
392     * reading from a 16x MSAA surface with compression.  The new message was
393     * needed because now the MCS data is 64 bits instead of 32 or lower as is
394     * the case for 8x, 4x, and 2x.  The key->msaa_16 bit-field controls which
395     * message we use.  Fortunately, the 16x message works for 8x, 4x, and 2x
396     * so we can just use it unconditionally.  This may not be quite as
397     * efficient but it saves us from recompiling.
398     */
399    if (devinfo->ver >= 9)
400       key->msaa_16 = ~0;
401 
402    /* XXX: Handle texture swizzle on HSW- */
403    for (int i = 0; i < MAX_SAMPLERS; i++) {
404       /* Assume color sampler, no swizzling. (Works for BDW+) */
405       key->swizzles[i] = SWIZZLE_XYZW;
406    }
407 }
408 
409 static void
populate_base_prog_key(const struct intel_device_info * devinfo,enum brw_subgroup_size_type subgroup_size_type,bool robust_buffer_acccess,struct brw_base_prog_key * key)410 populate_base_prog_key(const struct intel_device_info *devinfo,
411                        enum brw_subgroup_size_type subgroup_size_type,
412                        bool robust_buffer_acccess,
413                        struct brw_base_prog_key *key)
414 {
415    key->subgroup_size_type = subgroup_size_type;
416    key->robust_buffer_access = robust_buffer_acccess;
417 
418    populate_sampler_prog_key(devinfo, &key->tex);
419 }
420 
421 static void
populate_vs_prog_key(const struct intel_device_info * devinfo,enum brw_subgroup_size_type subgroup_size_type,bool robust_buffer_acccess,struct brw_vs_prog_key * key)422 populate_vs_prog_key(const struct intel_device_info *devinfo,
423                      enum brw_subgroup_size_type subgroup_size_type,
424                      bool robust_buffer_acccess,
425                      struct brw_vs_prog_key *key)
426 {
427    memset(key, 0, sizeof(*key));
428 
429    populate_base_prog_key(devinfo, subgroup_size_type,
430                           robust_buffer_acccess, &key->base);
431 
432    /* XXX: Handle vertex input work-arounds */
433 
434    /* XXX: Handle sampler_prog_key */
435 }
436 
437 static void
populate_tcs_prog_key(const struct intel_device_info * devinfo,enum brw_subgroup_size_type subgroup_size_type,bool robust_buffer_acccess,unsigned input_vertices,struct brw_tcs_prog_key * key)438 populate_tcs_prog_key(const struct intel_device_info *devinfo,
439                       enum brw_subgroup_size_type subgroup_size_type,
440                       bool robust_buffer_acccess,
441                       unsigned input_vertices,
442                       struct brw_tcs_prog_key *key)
443 {
444    memset(key, 0, sizeof(*key));
445 
446    populate_base_prog_key(devinfo, subgroup_size_type,
447                           robust_buffer_acccess, &key->base);
448 
449    key->input_vertices = input_vertices;
450 }
451 
452 static void
populate_tes_prog_key(const struct intel_device_info * devinfo,enum brw_subgroup_size_type subgroup_size_type,bool robust_buffer_acccess,struct brw_tes_prog_key * key)453 populate_tes_prog_key(const struct intel_device_info *devinfo,
454                       enum brw_subgroup_size_type subgroup_size_type,
455                       bool robust_buffer_acccess,
456                       struct brw_tes_prog_key *key)
457 {
458    memset(key, 0, sizeof(*key));
459 
460    populate_base_prog_key(devinfo, subgroup_size_type,
461                           robust_buffer_acccess, &key->base);
462 }
463 
464 static void
populate_gs_prog_key(const struct intel_device_info * devinfo,enum brw_subgroup_size_type subgroup_size_type,bool robust_buffer_acccess,struct brw_gs_prog_key * key)465 populate_gs_prog_key(const struct intel_device_info *devinfo,
466                      enum brw_subgroup_size_type subgroup_size_type,
467                      bool robust_buffer_acccess,
468                      struct brw_gs_prog_key *key)
469 {
470    memset(key, 0, sizeof(*key));
471 
472    populate_base_prog_key(devinfo, subgroup_size_type,
473                           robust_buffer_acccess, &key->base);
474 }
475 
476 static bool
pipeline_has_coarse_pixel(const struct anv_graphics_pipeline * pipeline,const VkPipelineFragmentShadingRateStateCreateInfoKHR * fsr_info)477 pipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline,
478                           const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_info)
479 {
480    if (pipeline->sample_shading_enable)
481       return false;
482 
483    /* Not dynamic & not specified for the pipeline. */
484    if ((pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) == 0 && !fsr_info)
485       return false;
486 
487    /* Not dynamic & pipeline has a 1x1 fragment shading rate with no
488     * possibility for element of the pipeline to change the value.
489     */
490    if ((pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) == 0 &&
491        fsr_info->fragmentSize.width <= 1 &&
492        fsr_info->fragmentSize.height <= 1 &&
493        fsr_info->combinerOps[0] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR &&
494        fsr_info->combinerOps[1] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR)
495       return false;
496 
497    return true;
498 }
499 
500 static bool
is_sample_shading(const VkPipelineMultisampleStateCreateInfo * ms_info)501 is_sample_shading(const VkPipelineMultisampleStateCreateInfo *ms_info)
502 {
503    return ms_info->sampleShadingEnable &&
504       (ms_info->minSampleShading * ms_info->rasterizationSamples) > 1;
505 }
506 
507 static void
populate_wm_prog_key(const struct anv_graphics_pipeline * pipeline,VkPipelineShaderStageCreateFlags flags,bool robust_buffer_acccess,const struct anv_subpass * subpass,const VkPipelineMultisampleStateCreateInfo * ms_info,const VkPipelineFragmentShadingRateStateCreateInfoKHR * fsr_info,struct brw_wm_prog_key * key)508 populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
509                      VkPipelineShaderStageCreateFlags flags,
510                      bool robust_buffer_acccess,
511                      const struct anv_subpass *subpass,
512                      const VkPipelineMultisampleStateCreateInfo *ms_info,
513                      const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_info,
514                      struct brw_wm_prog_key *key)
515 {
516    const struct anv_device *device = pipeline->base.device;
517    const struct intel_device_info *devinfo = &device->info;
518 
519    memset(key, 0, sizeof(*key));
520 
521    populate_base_prog_key(devinfo, flags, robust_buffer_acccess, &key->base);
522 
523    /* We set this to 0 here and set to the actual value before we call
524     * brw_compile_fs.
525     */
526    key->input_slots_valid = 0;
527 
528    /* Vulkan doesn't specify a default */
529    key->high_quality_derivatives = false;
530 
531    /* XXX Vulkan doesn't appear to specify */
532    key->clamp_fragment_color = false;
533 
534    key->ignore_sample_mask_out = false;
535 
536    assert(subpass->color_count <= MAX_RTS);
537    for (uint32_t i = 0; i < subpass->color_count; i++) {
538       if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
539          key->color_outputs_valid |= (1 << i);
540    }
541 
542    key->nr_color_regions = subpass->color_count;
543 
544    /* To reduce possible shader recompilations we would need to know if
545     * there is a SampleMask output variable to compute if we should emit
546     * code to workaround the issue that hardware disables alpha to coverage
547     * when there is SampleMask output.
548     */
549    key->alpha_to_coverage = ms_info && ms_info->alphaToCoverageEnable;
550 
551    /* Vulkan doesn't support fixed-function alpha test */
552    key->alpha_test_replicate_alpha = false;
553 
554    if (ms_info) {
555       key->persample_interp = is_sample_shading(ms_info);
556       key->multisample_fbo = ms_info->rasterizationSamples > 1;
557       key->frag_coord_adds_sample_pos = key->persample_interp;
558    }
559 
560    key->coarse_pixel =
561       device->vk.enabled_extensions.KHR_fragment_shading_rate &&
562       pipeline_has_coarse_pixel(pipeline, fsr_info);
563 }
564 
565 static void
populate_cs_prog_key(const struct intel_device_info * devinfo,enum brw_subgroup_size_type subgroup_size_type,bool robust_buffer_acccess,struct brw_cs_prog_key * key)566 populate_cs_prog_key(const struct intel_device_info *devinfo,
567                      enum brw_subgroup_size_type subgroup_size_type,
568                      bool robust_buffer_acccess,
569                      struct brw_cs_prog_key *key)
570 {
571    memset(key, 0, sizeof(*key));
572 
573    populate_base_prog_key(devinfo, subgroup_size_type,
574                           robust_buffer_acccess, &key->base);
575 }
576 
577 static void
populate_bs_prog_key(const struct intel_device_info * devinfo,VkPipelineShaderStageCreateFlags flags,bool robust_buffer_access,struct brw_bs_prog_key * key)578 populate_bs_prog_key(const struct intel_device_info *devinfo,
579                      VkPipelineShaderStageCreateFlags flags,
580                      bool robust_buffer_access,
581                      struct brw_bs_prog_key *key)
582 {
583    memset(key, 0, sizeof(*key));
584 
585    populate_base_prog_key(devinfo, flags, robust_buffer_access, &key->base);
586 }
587 
588 struct anv_pipeline_stage {
589    gl_shader_stage stage;
590 
591    const struct vk_shader_module *module;
592    const char *entrypoint;
593    const VkSpecializationInfo *spec_info;
594 
595    unsigned char shader_sha1[20];
596 
597    union brw_any_prog_key key;
598 
599    struct {
600       gl_shader_stage stage;
601       unsigned char sha1[20];
602    } cache_key;
603 
604    nir_shader *nir;
605 
606    struct anv_pipeline_binding surface_to_descriptor[256];
607    struct anv_pipeline_binding sampler_to_descriptor[256];
608    struct anv_pipeline_bind_map bind_map;
609 
610    union brw_any_prog_data prog_data;
611 
612    uint32_t num_stats;
613    struct brw_compile_stats stats[3];
614    char *disasm[3];
615 
616    VkPipelineCreationFeedbackEXT feedback;
617 
618    const unsigned *code;
619 
620    struct anv_shader_bin *bin;
621 };
622 
623 static void
anv_pipeline_hash_shader(const struct vk_shader_module * module,const char * entrypoint,gl_shader_stage stage,const VkSpecializationInfo * spec_info,unsigned char * sha1_out)624 anv_pipeline_hash_shader(const struct vk_shader_module *module,
625                          const char *entrypoint,
626                          gl_shader_stage stage,
627                          const VkSpecializationInfo *spec_info,
628                          unsigned char *sha1_out)
629 {
630    struct mesa_sha1 ctx;
631    _mesa_sha1_init(&ctx);
632 
633    _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
634    _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
635    _mesa_sha1_update(&ctx, &stage, sizeof(stage));
636    if (spec_info) {
637       _mesa_sha1_update(&ctx, spec_info->pMapEntries,
638                         spec_info->mapEntryCount *
639                         sizeof(*spec_info->pMapEntries));
640       _mesa_sha1_update(&ctx, spec_info->pData,
641                         spec_info->dataSize);
642    }
643 
644    _mesa_sha1_final(&ctx, sha1_out);
645 }
646 
647 static void
anv_pipeline_hash_graphics(struct anv_graphics_pipeline * pipeline,struct anv_pipeline_layout * layout,struct anv_pipeline_stage * stages,unsigned char * sha1_out)648 anv_pipeline_hash_graphics(struct anv_graphics_pipeline *pipeline,
649                            struct anv_pipeline_layout *layout,
650                            struct anv_pipeline_stage *stages,
651                            unsigned char *sha1_out)
652 {
653    struct mesa_sha1 ctx;
654    _mesa_sha1_init(&ctx);
655 
656    _mesa_sha1_update(&ctx, &pipeline->subpass->view_mask,
657                      sizeof(pipeline->subpass->view_mask));
658 
659    if (layout)
660       _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
661 
662    const bool rba = pipeline->base.device->robust_buffer_access;
663    _mesa_sha1_update(&ctx, &rba, sizeof(rba));
664 
665    for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
666       if (stages[s].entrypoint) {
667          _mesa_sha1_update(&ctx, stages[s].shader_sha1,
668                            sizeof(stages[s].shader_sha1));
669          _mesa_sha1_update(&ctx, &stages[s].key, brw_prog_key_size(s));
670       }
671    }
672 
673    _mesa_sha1_final(&ctx, sha1_out);
674 }
675 
676 static void
anv_pipeline_hash_compute(struct anv_compute_pipeline * pipeline,struct anv_pipeline_layout * layout,struct anv_pipeline_stage * stage,unsigned char * sha1_out)677 anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline,
678                           struct anv_pipeline_layout *layout,
679                           struct anv_pipeline_stage *stage,
680                           unsigned char *sha1_out)
681 {
682    struct mesa_sha1 ctx;
683    _mesa_sha1_init(&ctx);
684 
685    if (layout)
686       _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
687 
688    const bool rba = pipeline->base.device->robust_buffer_access;
689    _mesa_sha1_update(&ctx, &rba, sizeof(rba));
690 
691    _mesa_sha1_update(&ctx, stage->shader_sha1,
692                      sizeof(stage->shader_sha1));
693    _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
694 
695    _mesa_sha1_final(&ctx, sha1_out);
696 }
697 
698 static void
anv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline * pipeline,struct anv_pipeline_layout * layout,struct anv_pipeline_stage * stage,unsigned char * sha1_out)699 anv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline *pipeline,
700                                      struct anv_pipeline_layout *layout,
701                                      struct anv_pipeline_stage *stage,
702                                      unsigned char *sha1_out)
703 {
704    struct mesa_sha1 ctx;
705    _mesa_sha1_init(&ctx);
706 
707    if (layout != NULL)
708       _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
709 
710    const bool rba = pipeline->base.device->robust_buffer_access;
711    _mesa_sha1_update(&ctx, &rba, sizeof(rba));
712 
713    _mesa_sha1_update(&ctx, stage->shader_sha1, sizeof(stage->shader_sha1));
714    _mesa_sha1_update(&ctx, &stage->key, sizeof(stage->key.bs));
715 
716    _mesa_sha1_final(&ctx, sha1_out);
717 }
718 
719 static void
anv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline * pipeline,struct anv_pipeline_layout * layout,struct anv_pipeline_stage * intersection,struct anv_pipeline_stage * any_hit,unsigned char * sha1_out)720 anv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline *pipeline,
721                                               struct anv_pipeline_layout *layout,
722                                               struct anv_pipeline_stage *intersection,
723                                               struct anv_pipeline_stage *any_hit,
724                                               unsigned char *sha1_out)
725 {
726    struct mesa_sha1 ctx;
727    _mesa_sha1_init(&ctx);
728 
729    if (layout != NULL)
730       _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
731 
732    const bool rba = pipeline->base.device->robust_buffer_access;
733    _mesa_sha1_update(&ctx, &rba, sizeof(rba));
734 
735    _mesa_sha1_update(&ctx, intersection->shader_sha1, sizeof(intersection->shader_sha1));
736    _mesa_sha1_update(&ctx, &intersection->key, sizeof(intersection->key.bs));
737    _mesa_sha1_update(&ctx, any_hit->shader_sha1, sizeof(any_hit->shader_sha1));
738    _mesa_sha1_update(&ctx, &any_hit->key, sizeof(any_hit->key.bs));
739 
740    _mesa_sha1_final(&ctx, sha1_out);
741 }
742 
743 static nir_shader *
anv_pipeline_stage_get_nir(struct anv_pipeline * pipeline,struct anv_pipeline_cache * cache,void * mem_ctx,struct anv_pipeline_stage * stage)744 anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,
745                            struct anv_pipeline_cache *cache,
746                            void *mem_ctx,
747                            struct anv_pipeline_stage *stage)
748 {
749    const struct brw_compiler *compiler =
750       pipeline->device->physical->compiler;
751    const nir_shader_compiler_options *nir_options =
752       compiler->glsl_compiler_options[stage->stage].NirOptions;
753    nir_shader *nir;
754 
755    nir = anv_device_search_for_nir(pipeline->device, cache,
756                                    nir_options,
757                                    stage->shader_sha1,
758                                    mem_ctx);
759    if (nir) {
760       assert(nir->info.stage == stage->stage);
761       return nir;
762    }
763 
764    nir = anv_shader_compile_to_nir(pipeline->device,
765                                    mem_ctx,
766                                    stage->module,
767                                    stage->entrypoint,
768                                    stage->stage,
769                                    stage->spec_info);
770    if (nir) {
771       anv_device_upload_nir(pipeline->device, cache, nir, stage->shader_sha1);
772       return nir;
773    }
774 
775    return NULL;
776 }
777 
778 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)779 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
780 {
781    assert(glsl_type_is_vector_or_scalar(type));
782 
783    uint32_t comp_size = glsl_type_is_boolean(type)
784       ? 4 : glsl_get_bit_size(type) / 8;
785    unsigned length = glsl_get_vector_elements(type);
786    *size = comp_size * length,
787    *align = comp_size * (length == 3 ? 4 : length);
788 }
789 
790 static void
anv_pipeline_lower_nir(struct anv_pipeline * pipeline,void * mem_ctx,struct anv_pipeline_stage * stage,struct anv_pipeline_layout * layout)791 anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
792                        void *mem_ctx,
793                        struct anv_pipeline_stage *stage,
794                        struct anv_pipeline_layout *layout)
795 {
796    const struct anv_physical_device *pdevice = pipeline->device->physical;
797    const struct brw_compiler *compiler = pdevice->compiler;
798 
799    struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
800    nir_shader *nir = stage->nir;
801 
802    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
803       /* Check if sample shading is enabled in the shader and toggle
804        * it on for the pipeline independent if sampleShadingEnable is set.
805        */
806       nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
807       if (nir->info.fs.uses_sample_shading)
808          anv_pipeline_to_graphics(pipeline)->sample_shading_enable = true;
809 
810       NIR_PASS_V(nir, nir_lower_wpos_center,
811                  anv_pipeline_to_graphics(pipeline)->sample_shading_enable);
812       NIR_PASS_V(nir, nir_lower_input_attachments,
813                  &(nir_input_attachment_options) {
814                      .use_fragcoord_sysval = true,
815                      .use_layer_id_sysval = true,
816                  });
817    }
818 
819    NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout);
820 
821    if (pipeline->type == ANV_PIPELINE_GRAPHICS) {
822       NIR_PASS_V(nir, anv_nir_lower_multiview,
823                  anv_pipeline_to_graphics(pipeline));
824    }
825 
826    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
827 
828    NIR_PASS_V(nir, brw_nir_lower_storage_image, compiler->devinfo);
829 
830    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_global,
831               nir_address_format_64bit_global);
832    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const,
833               nir_address_format_32bit_offset);
834 
835    /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
836    anv_nir_apply_pipeline_layout(pdevice,
837                                  pipeline->device->robust_buffer_access,
838                                  layout, nir, &stage->bind_map);
839 
840    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
841               anv_nir_ubo_addr_format(pdevice,
842                  pipeline->device->robust_buffer_access));
843    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
844               anv_nir_ssbo_addr_format(pdevice,
845                  pipeline->device->robust_buffer_access));
846 
847    /* First run copy-prop to get rid of all of the vec() that address
848     * calculations often create and then constant-fold so that, when we
849     * get to anv_nir_lower_ubo_loads, we can detect constant offsets.
850     */
851    NIR_PASS_V(nir, nir_copy_prop);
852    NIR_PASS_V(nir, nir_opt_constant_folding);
853 
854    NIR_PASS_V(nir, anv_nir_lower_ubo_loads);
855 
856    /* We don't support non-uniform UBOs and non-uniform SSBO access is
857     * handled naturally by falling back to A64 messages.
858     */
859    NIR_PASS_V(nir, nir_lower_non_uniform_access,
860               &(nir_lower_non_uniform_access_options) {
861                   .types = nir_lower_non_uniform_texture_access |
862                            nir_lower_non_uniform_image_access,
863                   .callback = NULL,
864               });
865 
866    anv_nir_compute_push_layout(pdevice, pipeline->device->robust_buffer_access,
867                                nir, prog_data, &stage->bind_map, mem_ctx);
868 
869    if (gl_shader_stage_uses_workgroup(nir->info.stage)) {
870       if (!nir->info.shared_memory_explicit_layout) {
871          NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
872                     nir_var_mem_shared, shared_type_info);
873       }
874 
875       NIR_PASS_V(nir, nir_lower_explicit_io,
876                  nir_var_mem_shared, nir_address_format_32bit_offset);
877 
878       if (nir->info.zero_initialize_shared_memory &&
879           nir->info.shared_size > 0) {
880          /* The effective Shared Local Memory size is at least 1024 bytes and
881           * is always rounded to a power of two, so it is OK to align the size
882           * used by the shader to chunk_size -- which does simplify the logic.
883           */
884          const unsigned chunk_size = 16;
885          const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
886          assert(shared_size <=
887                 intel_calculate_slm_size(compiler->devinfo->ver, nir->info.shared_size));
888 
889          NIR_PASS_V(nir, nir_zero_initialize_shared_memory,
890                     shared_size, chunk_size);
891       }
892    }
893 
894    stage->nir = nir;
895 }
896 
897 static void
anv_pipeline_link_vs(const struct brw_compiler * compiler,struct anv_pipeline_stage * vs_stage,struct anv_pipeline_stage * next_stage)898 anv_pipeline_link_vs(const struct brw_compiler *compiler,
899                      struct anv_pipeline_stage *vs_stage,
900                      struct anv_pipeline_stage *next_stage)
901 {
902    if (next_stage)
903       brw_nir_link_shaders(compiler, vs_stage->nir, next_stage->nir);
904 }
905 
906 static void
anv_pipeline_compile_vs(const struct brw_compiler * compiler,void * mem_ctx,struct anv_graphics_pipeline * pipeline,struct anv_pipeline_stage * vs_stage)907 anv_pipeline_compile_vs(const struct brw_compiler *compiler,
908                         void *mem_ctx,
909                         struct anv_graphics_pipeline *pipeline,
910                         struct anv_pipeline_stage *vs_stage)
911 {
912    /* When using Primitive Replication for multiview, each view gets its own
913     * position slot.
914     */
915    uint32_t pos_slots = pipeline->use_primitive_replication ?
916       anv_subpass_view_count(pipeline->subpass) : 1;
917 
918    brw_compute_vue_map(compiler->devinfo,
919                        &vs_stage->prog_data.vs.base.vue_map,
920                        vs_stage->nir->info.outputs_written,
921                        vs_stage->nir->info.separate_shader,
922                        pos_slots);
923 
924    vs_stage->num_stats = 1;
925 
926    struct brw_compile_vs_params params = {
927       .nir = vs_stage->nir,
928       .key = &vs_stage->key.vs,
929       .prog_data = &vs_stage->prog_data.vs,
930       .stats = vs_stage->stats,
931       .log_data = pipeline->base.device,
932    };
933 
934    vs_stage->code = brw_compile_vs(compiler, mem_ctx, &params);
935 }
936 
937 static void
merge_tess_info(struct shader_info * tes_info,const struct shader_info * tcs_info)938 merge_tess_info(struct shader_info *tes_info,
939                 const struct shader_info *tcs_info)
940 {
941    /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
942     *
943     *    "PointMode. Controls generation of points rather than triangles
944     *     or lines. This functionality defaults to disabled, and is
945     *     enabled if either shader stage includes the execution mode.
946     *
947     * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
948     * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
949     * and OutputVertices, it says:
950     *
951     *    "One mode must be set in at least one of the tessellation
952     *     shader stages."
953     *
954     * So, the fields can be set in either the TCS or TES, but they must
955     * agree if set in both.  Our backend looks at TES, so bitwise-or in
956     * the values from the TCS.
957     */
958    assert(tcs_info->tess.tcs_vertices_out == 0 ||
959           tes_info->tess.tcs_vertices_out == 0 ||
960           tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
961    tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
962 
963    assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
964           tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
965           tcs_info->tess.spacing == tes_info->tess.spacing);
966    tes_info->tess.spacing |= tcs_info->tess.spacing;
967 
968    assert(tcs_info->tess.primitive_mode == 0 ||
969           tes_info->tess.primitive_mode == 0 ||
970           tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode);
971    tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode;
972    tes_info->tess.ccw |= tcs_info->tess.ccw;
973    tes_info->tess.point_mode |= tcs_info->tess.point_mode;
974 }
975 
976 static void
anv_pipeline_link_tcs(const struct brw_compiler * compiler,struct anv_pipeline_stage * tcs_stage,struct anv_pipeline_stage * tes_stage)977 anv_pipeline_link_tcs(const struct brw_compiler *compiler,
978                       struct anv_pipeline_stage *tcs_stage,
979                       struct anv_pipeline_stage *tes_stage)
980 {
981    assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL);
982 
983    brw_nir_link_shaders(compiler, tcs_stage->nir, tes_stage->nir);
984 
985    nir_lower_patch_vertices(tes_stage->nir,
986                             tcs_stage->nir->info.tess.tcs_vertices_out,
987                             NULL);
988 
989    /* Copy TCS info into the TES info */
990    merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
991 
992    /* Whacking the key after cache lookup is a bit sketchy, but all of
993     * this comes from the SPIR-V, which is part of the hash used for the
994     * pipeline cache.  So it should be safe.
995     */
996    tcs_stage->key.tcs.tes_primitive_mode =
997       tes_stage->nir->info.tess.primitive_mode;
998    tcs_stage->key.tcs.quads_workaround =
999       compiler->devinfo->ver < 9 &&
1000       tes_stage->nir->info.tess.primitive_mode == 7 /* GL_QUADS */ &&
1001       tes_stage->nir->info.tess.spacing == TESS_SPACING_EQUAL;
1002 }
1003 
1004 static void
anv_pipeline_compile_tcs(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * tcs_stage,struct anv_pipeline_stage * prev_stage)1005 anv_pipeline_compile_tcs(const struct brw_compiler *compiler,
1006                          void *mem_ctx,
1007                          struct anv_device *device,
1008                          struct anv_pipeline_stage *tcs_stage,
1009                          struct anv_pipeline_stage *prev_stage)
1010 {
1011    tcs_stage->key.tcs.outputs_written =
1012       tcs_stage->nir->info.outputs_written;
1013    tcs_stage->key.tcs.patch_outputs_written =
1014       tcs_stage->nir->info.patch_outputs_written;
1015 
1016    tcs_stage->num_stats = 1;
1017    tcs_stage->code = brw_compile_tcs(compiler, device, mem_ctx,
1018                                      &tcs_stage->key.tcs,
1019                                      &tcs_stage->prog_data.tcs,
1020                                      tcs_stage->nir, -1,
1021                                      tcs_stage->stats, NULL);
1022 }
1023 
1024 static void
anv_pipeline_link_tes(const struct brw_compiler * compiler,struct anv_pipeline_stage * tes_stage,struct anv_pipeline_stage * next_stage)1025 anv_pipeline_link_tes(const struct brw_compiler *compiler,
1026                       struct anv_pipeline_stage *tes_stage,
1027                       struct anv_pipeline_stage *next_stage)
1028 {
1029    if (next_stage)
1030       brw_nir_link_shaders(compiler, tes_stage->nir, next_stage->nir);
1031 }
1032 
1033 static void
anv_pipeline_compile_tes(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * tes_stage,struct anv_pipeline_stage * tcs_stage)1034 anv_pipeline_compile_tes(const struct brw_compiler *compiler,
1035                          void *mem_ctx,
1036                          struct anv_device *device,
1037                          struct anv_pipeline_stage *tes_stage,
1038                          struct anv_pipeline_stage *tcs_stage)
1039 {
1040    tes_stage->key.tes.inputs_read =
1041       tcs_stage->nir->info.outputs_written;
1042    tes_stage->key.tes.patch_inputs_read =
1043       tcs_stage->nir->info.patch_outputs_written;
1044 
1045    tes_stage->num_stats = 1;
1046    tes_stage->code = brw_compile_tes(compiler, device, mem_ctx,
1047                                      &tes_stage->key.tes,
1048                                      &tcs_stage->prog_data.tcs.base.vue_map,
1049                                      &tes_stage->prog_data.tes,
1050                                      tes_stage->nir, -1,
1051                                      tes_stage->stats, NULL);
1052 }
1053 
1054 static void
anv_pipeline_link_gs(const struct brw_compiler * compiler,struct anv_pipeline_stage * gs_stage,struct anv_pipeline_stage * next_stage)1055 anv_pipeline_link_gs(const struct brw_compiler *compiler,
1056                      struct anv_pipeline_stage *gs_stage,
1057                      struct anv_pipeline_stage *next_stage)
1058 {
1059    if (next_stage)
1060       brw_nir_link_shaders(compiler, gs_stage->nir, next_stage->nir);
1061 }
1062 
1063 static void
anv_pipeline_compile_gs(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * gs_stage,struct anv_pipeline_stage * prev_stage)1064 anv_pipeline_compile_gs(const struct brw_compiler *compiler,
1065                         void *mem_ctx,
1066                         struct anv_device *device,
1067                         struct anv_pipeline_stage *gs_stage,
1068                         struct anv_pipeline_stage *prev_stage)
1069 {
1070    brw_compute_vue_map(compiler->devinfo,
1071                        &gs_stage->prog_data.gs.base.vue_map,
1072                        gs_stage->nir->info.outputs_written,
1073                        gs_stage->nir->info.separate_shader, 1);
1074 
1075    gs_stage->num_stats = 1;
1076    gs_stage->code = brw_compile_gs(compiler, device, mem_ctx,
1077                                    &gs_stage->key.gs,
1078                                    &gs_stage->prog_data.gs,
1079                                    gs_stage->nir, -1,
1080                                    gs_stage->stats, NULL);
1081 }
1082 
1083 static void
anv_pipeline_link_fs(const struct brw_compiler * compiler,struct anv_pipeline_stage * stage)1084 anv_pipeline_link_fs(const struct brw_compiler *compiler,
1085                      struct anv_pipeline_stage *stage)
1086 {
1087    unsigned num_rt_bindings;
1088    struct anv_pipeline_binding rt_bindings[MAX_RTS];
1089    if (stage->key.wm.nr_color_regions > 0) {
1090       assert(stage->key.wm.nr_color_regions <= MAX_RTS);
1091       for (unsigned rt = 0; rt < stage->key.wm.nr_color_regions; rt++) {
1092          if (stage->key.wm.color_outputs_valid & BITFIELD_BIT(rt)) {
1093             rt_bindings[rt] = (struct anv_pipeline_binding) {
1094                .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1095                .index = rt,
1096             };
1097          } else {
1098             /* Setup a null render target */
1099             rt_bindings[rt] = (struct anv_pipeline_binding) {
1100                .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1101                .index = UINT32_MAX,
1102             };
1103          }
1104       }
1105       num_rt_bindings = stage->key.wm.nr_color_regions;
1106    } else {
1107       /* Setup a null render target */
1108       rt_bindings[0] = (struct anv_pipeline_binding) {
1109          .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1110          .index = UINT32_MAX,
1111       };
1112       num_rt_bindings = 1;
1113    }
1114 
1115    assert(num_rt_bindings <= MAX_RTS);
1116    assert(stage->bind_map.surface_count == 0);
1117    typed_memcpy(stage->bind_map.surface_to_descriptor,
1118                 rt_bindings, num_rt_bindings);
1119    stage->bind_map.surface_count += num_rt_bindings;
1120 
1121    /* Now that we've set up the color attachments, we can go through and
1122     * eliminate any shader outputs that map to VK_ATTACHMENT_UNUSED in the
1123     * hopes that dead code can clean them up in this and any earlier shader
1124     * stages.
1125     */
1126    nir_function_impl *impl = nir_shader_get_entrypoint(stage->nir);
1127    bool deleted_output = false;
1128    nir_foreach_shader_out_variable_safe(var, stage->nir) {
1129       /* TODO: We don't delete depth/stencil writes.  We probably could if the
1130        * subpass doesn't have a depth/stencil attachment.
1131        */
1132       if (var->data.location < FRAG_RESULT_DATA0)
1133          continue;
1134 
1135       const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
1136 
1137       /* If this is the RT at location 0 and we have alpha to coverage
1138        * enabled we still need that write because it will affect the coverage
1139        * mask even if it's never written to a color target.
1140        */
1141       if (rt == 0 && stage->key.wm.alpha_to_coverage)
1142          continue;
1143 
1144       const unsigned array_len =
1145          glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
1146       assert(rt + array_len <= MAX_RTS);
1147 
1148       if (rt >= MAX_RTS || !(stage->key.wm.color_outputs_valid &
1149                              BITFIELD_RANGE(rt, array_len))) {
1150          deleted_output = true;
1151          var->data.mode = nir_var_function_temp;
1152          exec_node_remove(&var->node);
1153          exec_list_push_tail(&impl->locals, &var->node);
1154       }
1155    }
1156 
1157    if (deleted_output)
1158       nir_fixup_deref_modes(stage->nir);
1159 
1160    /* We stored the number of subpass color attachments in nr_color_regions
1161     * when calculating the key for caching.  Now that we've computed the bind
1162     * map, we can reduce this to the actual max before we go into the back-end
1163     * compiler.
1164     */
1165    stage->key.wm.nr_color_regions =
1166       util_last_bit(stage->key.wm.color_outputs_valid);
1167 }
1168 
1169 static void
anv_pipeline_compile_fs(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * fs_stage,struct anv_pipeline_stage * prev_stage)1170 anv_pipeline_compile_fs(const struct brw_compiler *compiler,
1171                         void *mem_ctx,
1172                         struct anv_device *device,
1173                         struct anv_pipeline_stage *fs_stage,
1174                         struct anv_pipeline_stage *prev_stage)
1175 {
1176    /* TODO: we could set this to 0 based on the information in nir_shader, but
1177     * we need this before we call spirv_to_nir.
1178     */
1179    assert(prev_stage);
1180    fs_stage->key.wm.input_slots_valid =
1181       prev_stage->prog_data.vue.vue_map.slots_valid;
1182 
1183    struct brw_compile_fs_params params = {
1184       .nir = fs_stage->nir,
1185       .key = &fs_stage->key.wm,
1186       .prog_data = &fs_stage->prog_data.wm,
1187 
1188       .allow_spilling = true,
1189       .stats = fs_stage->stats,
1190       .log_data = device,
1191    };
1192 
1193    fs_stage->code = brw_compile_fs(compiler, mem_ctx, &params);
1194 
1195    fs_stage->num_stats = (uint32_t)fs_stage->prog_data.wm.dispatch_8 +
1196                          (uint32_t)fs_stage->prog_data.wm.dispatch_16 +
1197                          (uint32_t)fs_stage->prog_data.wm.dispatch_32;
1198 
1199    if (fs_stage->key.wm.color_outputs_valid == 0 &&
1200        !fs_stage->prog_data.wm.has_side_effects &&
1201        !fs_stage->prog_data.wm.uses_omask &&
1202        !fs_stage->key.wm.alpha_to_coverage &&
1203        !fs_stage->prog_data.wm.uses_kill &&
1204        fs_stage->prog_data.wm.computed_depth_mode == BRW_PSCDEPTH_OFF &&
1205        !fs_stage->prog_data.wm.computed_stencil) {
1206       /* This fragment shader has no outputs and no side effects.  Go ahead
1207        * and return the code pointer so we don't accidentally think the
1208        * compile failed but zero out prog_data which will set program_size to
1209        * zero and disable the stage.
1210        */
1211       memset(&fs_stage->prog_data, 0, sizeof(fs_stage->prog_data));
1212    }
1213 }
1214 
1215 static void
anv_pipeline_add_executable(struct anv_pipeline * pipeline,struct anv_pipeline_stage * stage,struct brw_compile_stats * stats,uint32_t code_offset)1216 anv_pipeline_add_executable(struct anv_pipeline *pipeline,
1217                             struct anv_pipeline_stage *stage,
1218                             struct brw_compile_stats *stats,
1219                             uint32_t code_offset)
1220 {
1221    char *nir = NULL;
1222    if (stage->nir &&
1223        (pipeline->flags &
1224         VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
1225       nir = nir_shader_as_str(stage->nir, pipeline->mem_ctx);
1226    }
1227 
1228    char *disasm = NULL;
1229    if (stage->code &&
1230        (pipeline->flags &
1231         VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
1232       char *stream_data = NULL;
1233       size_t stream_size = 0;
1234       FILE *stream = open_memstream(&stream_data, &stream_size);
1235 
1236       uint32_t push_size = 0;
1237       for (unsigned i = 0; i < 4; i++)
1238          push_size += stage->bind_map.push_ranges[i].length;
1239       if (push_size > 0) {
1240          fprintf(stream, "Push constant ranges:\n");
1241          for (unsigned i = 0; i < 4; i++) {
1242             if (stage->bind_map.push_ranges[i].length == 0)
1243                continue;
1244 
1245             fprintf(stream, "    RANGE%d (%dB): ", i,
1246                     stage->bind_map.push_ranges[i].length * 32);
1247 
1248             switch (stage->bind_map.push_ranges[i].set) {
1249             case ANV_DESCRIPTOR_SET_NULL:
1250                fprintf(stream, "NULL");
1251                break;
1252 
1253             case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS:
1254                fprintf(stream, "Vulkan push constants and API params");
1255                break;
1256 
1257             case ANV_DESCRIPTOR_SET_DESCRIPTORS:
1258                fprintf(stream, "Descriptor buffer for set %d (start=%dB)",
1259                        stage->bind_map.push_ranges[i].index,
1260                        stage->bind_map.push_ranges[i].start * 32);
1261                break;
1262 
1263             case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS:
1264                unreachable("gl_NumWorkgroups is never pushed");
1265 
1266             case ANV_DESCRIPTOR_SET_SHADER_CONSTANTS:
1267                fprintf(stream, "Inline shader constant data (start=%dB)",
1268                        stage->bind_map.push_ranges[i].start * 32);
1269                break;
1270 
1271             case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:
1272                unreachable("Color attachments can't be pushed");
1273 
1274             default:
1275                fprintf(stream, "UBO (set=%d binding=%d start=%dB)",
1276                        stage->bind_map.push_ranges[i].set,
1277                        stage->bind_map.push_ranges[i].index,
1278                        stage->bind_map.push_ranges[i].start * 32);
1279                break;
1280             }
1281             fprintf(stream, "\n");
1282          }
1283          fprintf(stream, "\n");
1284       }
1285 
1286       /* Creating this is far cheaper than it looks.  It's perfectly fine to
1287        * do it for every binary.
1288        */
1289       intel_disassemble(&pipeline->device->info,
1290                         stage->code, code_offset, stream);
1291 
1292       fclose(stream);
1293 
1294       /* Copy it to a ralloc'd thing */
1295       disasm = ralloc_size(pipeline->mem_ctx, stream_size + 1);
1296       memcpy(disasm, stream_data, stream_size);
1297       disasm[stream_size] = 0;
1298 
1299       free(stream_data);
1300    }
1301 
1302    const struct anv_pipeline_executable exe = {
1303       .stage = stage->stage,
1304       .stats = *stats,
1305       .nir = nir,
1306       .disasm = disasm,
1307    };
1308    util_dynarray_append(&pipeline->executables,
1309                         struct anv_pipeline_executable, exe);
1310 }
1311 
1312 static void
anv_pipeline_add_executables(struct anv_pipeline * pipeline,struct anv_pipeline_stage * stage,struct anv_shader_bin * bin)1313 anv_pipeline_add_executables(struct anv_pipeline *pipeline,
1314                              struct anv_pipeline_stage *stage,
1315                              struct anv_shader_bin *bin)
1316 {
1317    if (stage->stage == MESA_SHADER_FRAGMENT) {
1318       /* We pull the prog data and stats out of the anv_shader_bin because
1319        * the anv_pipeline_stage may not be fully populated if we successfully
1320        * looked up the shader in a cache.
1321        */
1322       const struct brw_wm_prog_data *wm_prog_data =
1323          (const struct brw_wm_prog_data *)bin->prog_data;
1324       struct brw_compile_stats *stats = bin->stats;
1325 
1326       if (wm_prog_data->dispatch_8) {
1327          anv_pipeline_add_executable(pipeline, stage, stats++, 0);
1328       }
1329 
1330       if (wm_prog_data->dispatch_16) {
1331          anv_pipeline_add_executable(pipeline, stage, stats++,
1332                                      wm_prog_data->prog_offset_16);
1333       }
1334 
1335       if (wm_prog_data->dispatch_32) {
1336          anv_pipeline_add_executable(pipeline, stage, stats++,
1337                                      wm_prog_data->prog_offset_32);
1338       }
1339    } else {
1340       anv_pipeline_add_executable(pipeline, stage, bin->stats, 0);
1341    }
1342 }
1343 
1344 static enum brw_subgroup_size_type
anv_subgroup_size_type(gl_shader_stage stage,VkPipelineShaderStageCreateFlags flags,const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT * rss_info)1345 anv_subgroup_size_type(gl_shader_stage stage,
1346                        VkPipelineShaderStageCreateFlags flags,
1347                        const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info)
1348 {
1349    enum brw_subgroup_size_type subgroup_size_type;
1350 
1351    if (rss_info) {
1352       assert(stage == MESA_SHADER_COMPUTE);
1353       /* These enum values are expressly chosen to be equal to the subgroup
1354        * size that they require.
1355        */
1356       assert(rss_info->requiredSubgroupSize == 8 ||
1357              rss_info->requiredSubgroupSize == 16 ||
1358              rss_info->requiredSubgroupSize == 32);
1359       subgroup_size_type = rss_info->requiredSubgroupSize;
1360    } else if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) {
1361       subgroup_size_type = BRW_SUBGROUP_SIZE_VARYING;
1362    } else if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) {
1363       assert(stage == MESA_SHADER_COMPUTE);
1364       /* If the client expressly requests full subgroups and they don't
1365        * specify a subgroup size neither allow varying subgroups, we need to
1366        * pick one.  So we specify the API value of 32.  Performance will
1367        * likely be terrible in this case but there's nothing we can do about
1368        * that.  The client should have chosen a size.
1369        */
1370       subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_32;
1371    } else {
1372       subgroup_size_type = BRW_SUBGROUP_SIZE_API_CONSTANT;
1373    }
1374 
1375    return subgroup_size_type;
1376 }
1377 
1378 static void
anv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline * pipeline)1379 anv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline *pipeline)
1380 {
1381    /* TODO: Cache this pipeline-wide information. */
1382 
1383    if (anv_pipeline_is_primitive(pipeline)) {
1384       /* Primitive replication depends on information from all the shaders.
1385        * Recover this bit from the fact that we have more than one position slot
1386        * in the vertex shader when using it.
1387        */
1388       assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
1389       int pos_slots = 0;
1390       const struct brw_vue_prog_data *vue_prog_data =
1391          (const void *) pipeline->shaders[MESA_SHADER_VERTEX]->prog_data;
1392       const struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
1393       for (int i = 0; i < vue_map->num_slots; i++) {
1394          if (vue_map->slot_to_varying[i] == VARYING_SLOT_POS)
1395             pos_slots++;
1396       }
1397       pipeline->use_primitive_replication = pos_slots > 1;
1398    }
1399 }
1400 
1401 static VkResult
anv_pipeline_compile_graphics(struct anv_graphics_pipeline * pipeline,struct anv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * info)1402 anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
1403                               struct anv_pipeline_cache *cache,
1404                               const VkGraphicsPipelineCreateInfo *info)
1405 {
1406    VkPipelineCreationFeedbackEXT pipeline_feedback = {
1407       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1408    };
1409    int64_t pipeline_start = os_time_get_nano();
1410 
1411    const struct brw_compiler *compiler = pipeline->base.device->physical->compiler;
1412    struct anv_pipeline_stage stages[MESA_SHADER_STAGES] = {};
1413 
1414    /* Information on which states are considered dynamic. */
1415    const VkPipelineDynamicStateCreateInfo *dyn_info =
1416       info->pDynamicState;
1417    uint32_t dynamic_states = 0;
1418    if (dyn_info) {
1419       for (unsigned i = 0; i < dyn_info->dynamicStateCount; i++)
1420          dynamic_states |=
1421             anv_cmd_dirty_bit_for_vk_dynamic_state(dyn_info->pDynamicStates[i]);
1422    }
1423 
1424    VkResult result;
1425    for (uint32_t i = 0; i < info->stageCount; i++) {
1426       const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
1427       gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
1428 
1429       int64_t stage_start = os_time_get_nano();
1430 
1431       stages[stage].stage = stage;
1432       stages[stage].module = vk_shader_module_from_handle(sinfo->module);
1433       stages[stage].entrypoint = sinfo->pName;
1434       stages[stage].spec_info = sinfo->pSpecializationInfo;
1435       anv_pipeline_hash_shader(stages[stage].module,
1436                                stages[stage].entrypoint,
1437                                stage,
1438                                stages[stage].spec_info,
1439                                stages[stage].shader_sha1);
1440 
1441       enum brw_subgroup_size_type subgroup_size_type =
1442          anv_subgroup_size_type(stage, sinfo->flags, NULL);
1443 
1444       const struct intel_device_info *devinfo = &pipeline->base.device->info;
1445       switch (stage) {
1446       case MESA_SHADER_VERTEX:
1447          populate_vs_prog_key(devinfo, subgroup_size_type,
1448                               pipeline->base.device->robust_buffer_access,
1449                               &stages[stage].key.vs);
1450          break;
1451       case MESA_SHADER_TESS_CTRL:
1452          populate_tcs_prog_key(devinfo, subgroup_size_type,
1453                                pipeline->base.device->robust_buffer_access,
1454                                info->pTessellationState->patchControlPoints,
1455                                &stages[stage].key.tcs);
1456          break;
1457       case MESA_SHADER_TESS_EVAL:
1458          populate_tes_prog_key(devinfo, subgroup_size_type,
1459                                pipeline->base.device->robust_buffer_access,
1460                                &stages[stage].key.tes);
1461          break;
1462       case MESA_SHADER_GEOMETRY:
1463          populate_gs_prog_key(devinfo, subgroup_size_type,
1464                               pipeline->base.device->robust_buffer_access,
1465                               &stages[stage].key.gs);
1466          break;
1467       case MESA_SHADER_FRAGMENT: {
1468          const bool raster_enabled =
1469             !info->pRasterizationState->rasterizerDiscardEnable ||
1470             dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE;
1471          populate_wm_prog_key(pipeline, subgroup_size_type,
1472                               pipeline->base.device->robust_buffer_access,
1473                               pipeline->subpass,
1474                               raster_enabled ? info->pMultisampleState : NULL,
1475                               vk_find_struct_const(info->pNext,
1476                                                    PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR),
1477                               &stages[stage].key.wm);
1478          break;
1479       }
1480       default:
1481          unreachable("Invalid graphics shader stage");
1482       }
1483 
1484       stages[stage].feedback.duration += os_time_get_nano() - stage_start;
1485       stages[stage].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
1486    }
1487 
1488    assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
1489 
1490    ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1491 
1492    unsigned char sha1[20];
1493    anv_pipeline_hash_graphics(pipeline, layout, stages, sha1);
1494 
1495    for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1496       if (!stages[s].entrypoint)
1497          continue;
1498 
1499       stages[s].cache_key.stage = s;
1500       memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1));
1501    }
1502 
1503    const bool skip_cache_lookup =
1504       (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1505 
1506    if (!skip_cache_lookup) {
1507       unsigned found = 0;
1508       unsigned cache_hits = 0;
1509       for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1510          if (!stages[s].entrypoint)
1511             continue;
1512 
1513          int64_t stage_start = os_time_get_nano();
1514 
1515          bool cache_hit;
1516          struct anv_shader_bin *bin =
1517             anv_device_search_for_kernel(pipeline->base.device, cache,
1518                                          &stages[s].cache_key,
1519                                          sizeof(stages[s].cache_key), &cache_hit);
1520          if (bin) {
1521             found++;
1522             pipeline->shaders[s] = bin;
1523          }
1524 
1525          if (cache_hit) {
1526             cache_hits++;
1527             stages[s].feedback.flags |=
1528                VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1529          }
1530          stages[s].feedback.duration += os_time_get_nano() - stage_start;
1531       }
1532 
1533       if (found == __builtin_popcount(pipeline->active_stages)) {
1534          if (cache_hits == found) {
1535             pipeline_feedback.flags |=
1536                VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1537          }
1538          /* We found all our shaders in the cache.  We're done. */
1539          for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1540             if (!stages[s].entrypoint)
1541                continue;
1542 
1543             anv_pipeline_add_executables(&pipeline->base, &stages[s],
1544                                          pipeline->shaders[s]);
1545          }
1546          anv_pipeline_init_from_cached_graphics(pipeline);
1547          goto done;
1548       } else if (found > 0) {
1549          /* We found some but not all of our shaders.  This shouldn't happen
1550           * most of the time but it can if we have a partially populated
1551           * pipeline cache.
1552           */
1553          assert(found < __builtin_popcount(pipeline->active_stages));
1554 
1555          vk_perf(VK_LOG_OBJS(&cache->base),
1556                  "Found a partial pipeline in the cache.  This is "
1557                  "most likely caused by an incomplete pipeline cache "
1558                  "import or export");
1559 
1560          /* We're going to have to recompile anyway, so just throw away our
1561           * references to the shaders in the cache.  We'll get them out of the
1562           * cache again as part of the compilation process.
1563           */
1564          for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1565             stages[s].feedback.flags = 0;
1566             if (pipeline->shaders[s]) {
1567                anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1568                pipeline->shaders[s] = NULL;
1569             }
1570          }
1571       }
1572    }
1573 
1574    if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT)
1575       return VK_PIPELINE_COMPILE_REQUIRED_EXT;
1576 
1577    void *pipeline_ctx = ralloc_context(NULL);
1578 
1579    for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1580       if (!stages[s].entrypoint)
1581          continue;
1582 
1583       int64_t stage_start = os_time_get_nano();
1584 
1585       assert(stages[s].stage == s);
1586       assert(pipeline->shaders[s] == NULL);
1587 
1588       stages[s].bind_map = (struct anv_pipeline_bind_map) {
1589          .surface_to_descriptor = stages[s].surface_to_descriptor,
1590          .sampler_to_descriptor = stages[s].sampler_to_descriptor
1591       };
1592 
1593       stages[s].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
1594                                                  pipeline_ctx,
1595                                                  &stages[s]);
1596       if (stages[s].nir == NULL) {
1597          result = vk_error(pipeline, VK_ERROR_UNKNOWN);
1598          goto fail;
1599       }
1600 
1601       /* This is rather ugly.
1602        *
1603        * Any variable annotated as interpolated by sample essentially disables
1604        * coarse pixel shading. Unfortunately the CTS tests exercising this set
1605        * the varying value in the previous stage using a constant. Our NIR
1606        * infrastructure is clever enough to lookup variables across stages and
1607        * constant fold, removing the variable. So in order to comply with CTS
1608        * we have check variables here.
1609        */
1610       if (s == MESA_SHADER_FRAGMENT) {
1611          nir_foreach_variable_in_list(var, &stages[s].nir->variables) {
1612             if (var->data.sample) {
1613                stages[s].key.wm.coarse_pixel = false;
1614                break;
1615             }
1616          }
1617       }
1618 
1619       stages[s].feedback.duration += os_time_get_nano() - stage_start;
1620    }
1621 
1622    /* Walk backwards to link */
1623    struct anv_pipeline_stage *next_stage = NULL;
1624    for (int s = ARRAY_SIZE(pipeline->shaders) - 1; s >= 0; s--) {
1625       if (!stages[s].entrypoint)
1626          continue;
1627 
1628       switch (s) {
1629       case MESA_SHADER_VERTEX:
1630          anv_pipeline_link_vs(compiler, &stages[s], next_stage);
1631          break;
1632       case MESA_SHADER_TESS_CTRL:
1633          anv_pipeline_link_tcs(compiler, &stages[s], next_stage);
1634          break;
1635       case MESA_SHADER_TESS_EVAL:
1636          anv_pipeline_link_tes(compiler, &stages[s], next_stage);
1637          break;
1638       case MESA_SHADER_GEOMETRY:
1639          anv_pipeline_link_gs(compiler, &stages[s], next_stage);
1640          break;
1641       case MESA_SHADER_FRAGMENT:
1642          anv_pipeline_link_fs(compiler, &stages[s]);
1643          break;
1644       default:
1645          unreachable("Invalid graphics shader stage");
1646       }
1647 
1648       next_stage = &stages[s];
1649    }
1650 
1651    if (pipeline->base.device->info.ver >= 12 &&
1652        pipeline->subpass->view_mask != 0) {
1653       /* For some pipelines HW Primitive Replication can be used instead of
1654        * instancing to implement Multiview.  This depend on how viewIndex is
1655        * used in all the active shaders, so this check can't be done per
1656        * individual shaders.
1657        */
1658       nir_shader *shaders[MESA_SHADER_STAGES] = {};
1659       for (unsigned s = 0; s < MESA_SHADER_STAGES; s++)
1660          shaders[s] = stages[s].nir;
1661 
1662       pipeline->use_primitive_replication =
1663          anv_check_for_primitive_replication(shaders, pipeline);
1664    } else {
1665       pipeline->use_primitive_replication = false;
1666    }
1667 
1668    struct anv_pipeline_stage *prev_stage = NULL;
1669    for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1670       if (!stages[s].entrypoint)
1671          continue;
1672 
1673       int64_t stage_start = os_time_get_nano();
1674 
1675       void *stage_ctx = ralloc_context(NULL);
1676 
1677       anv_pipeline_lower_nir(&pipeline->base, stage_ctx, &stages[s], layout);
1678 
1679       if (prev_stage && compiler->glsl_compiler_options[s].NirOptions->unify_interfaces) {
1680          prev_stage->nir->info.outputs_written |= stages[s].nir->info.inputs_read &
1681                   ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
1682          stages[s].nir->info.inputs_read |= prev_stage->nir->info.outputs_written &
1683                   ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
1684          prev_stage->nir->info.patch_outputs_written |= stages[s].nir->info.patch_inputs_read;
1685          stages[s].nir->info.patch_inputs_read |= prev_stage->nir->info.patch_outputs_written;
1686       }
1687 
1688       ralloc_free(stage_ctx);
1689 
1690       stages[s].feedback.duration += os_time_get_nano() - stage_start;
1691 
1692       prev_stage = &stages[s];
1693    }
1694 
1695    prev_stage = NULL;
1696    for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1697       if (!stages[s].entrypoint)
1698          continue;
1699 
1700       int64_t stage_start = os_time_get_nano();
1701 
1702       void *stage_ctx = ralloc_context(NULL);
1703 
1704       nir_xfb_info *xfb_info = NULL;
1705       if (s == MESA_SHADER_VERTEX ||
1706           s == MESA_SHADER_TESS_EVAL ||
1707           s == MESA_SHADER_GEOMETRY)
1708          xfb_info = nir_gather_xfb_info(stages[s].nir, stage_ctx);
1709 
1710       switch (s) {
1711       case MESA_SHADER_VERTEX:
1712          anv_pipeline_compile_vs(compiler, stage_ctx, pipeline,
1713                                  &stages[s]);
1714          break;
1715       case MESA_SHADER_TESS_CTRL:
1716          anv_pipeline_compile_tcs(compiler, stage_ctx, pipeline->base.device,
1717                                   &stages[s], prev_stage);
1718          break;
1719       case MESA_SHADER_TESS_EVAL:
1720          anv_pipeline_compile_tes(compiler, stage_ctx, pipeline->base.device,
1721                                   &stages[s], prev_stage);
1722          break;
1723       case MESA_SHADER_GEOMETRY:
1724          anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->base.device,
1725                                  &stages[s], prev_stage);
1726          break;
1727       case MESA_SHADER_FRAGMENT:
1728          anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->base.device,
1729                                  &stages[s], prev_stage);
1730          break;
1731       default:
1732          unreachable("Invalid graphics shader stage");
1733       }
1734       if (stages[s].code == NULL) {
1735          ralloc_free(stage_ctx);
1736          result = vk_error(pipeline->base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
1737          goto fail;
1738       }
1739 
1740       anv_nir_validate_push_layout(&stages[s].prog_data.base,
1741                                    &stages[s].bind_map);
1742 
1743       struct anv_shader_bin *bin =
1744          anv_device_upload_kernel(pipeline->base.device, cache, s,
1745                                   &stages[s].cache_key,
1746                                   sizeof(stages[s].cache_key),
1747                                   stages[s].code,
1748                                   stages[s].prog_data.base.program_size,
1749                                   &stages[s].prog_data.base,
1750                                   brw_prog_data_size(s),
1751                                   stages[s].stats, stages[s].num_stats,
1752                                   xfb_info, &stages[s].bind_map);
1753       if (!bin) {
1754          ralloc_free(stage_ctx);
1755          result = vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1756          goto fail;
1757       }
1758 
1759       anv_pipeline_add_executables(&pipeline->base, &stages[s], bin);
1760 
1761       pipeline->shaders[s] = bin;
1762       ralloc_free(stage_ctx);
1763 
1764       stages[s].feedback.duration += os_time_get_nano() - stage_start;
1765 
1766       prev_stage = &stages[s];
1767    }
1768 
1769    ralloc_free(pipeline_ctx);
1770 
1771 done:
1772 
1773    if (pipeline->shaders[MESA_SHADER_FRAGMENT] &&
1774        pipeline->shaders[MESA_SHADER_FRAGMENT]->prog_data->program_size == 0) {
1775       /* This can happen if we decided to implicitly disable the fragment
1776        * shader.  See anv_pipeline_compile_fs().
1777        */
1778       anv_shader_bin_unref(pipeline->base.device,
1779                            pipeline->shaders[MESA_SHADER_FRAGMENT]);
1780       pipeline->shaders[MESA_SHADER_FRAGMENT] = NULL;
1781       pipeline->active_stages &= ~VK_SHADER_STAGE_FRAGMENT_BIT;
1782    }
1783 
1784    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1785 
1786    const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
1787       vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
1788    if (create_feedback) {
1789       *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1790 
1791       assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount);
1792       for (uint32_t i = 0; i < info->stageCount; i++) {
1793          gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
1794          create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
1795       }
1796    }
1797 
1798    return VK_SUCCESS;
1799 
1800 fail:
1801    ralloc_free(pipeline_ctx);
1802 
1803    for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1804       if (pipeline->shaders[s])
1805          anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1806    }
1807 
1808    return result;
1809 }
1810 
1811 VkResult
anv_pipeline_compile_cs(struct anv_compute_pipeline * pipeline,struct anv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const struct vk_shader_module * module,const char * entrypoint,const VkSpecializationInfo * spec_info)1812 anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
1813                         struct anv_pipeline_cache *cache,
1814                         const VkComputePipelineCreateInfo *info,
1815                         const struct vk_shader_module *module,
1816                         const char *entrypoint,
1817                         const VkSpecializationInfo *spec_info)
1818 {
1819    VkPipelineCreationFeedbackEXT pipeline_feedback = {
1820       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1821    };
1822    int64_t pipeline_start = os_time_get_nano();
1823 
1824    const struct brw_compiler *compiler = pipeline->base.device->physical->compiler;
1825 
1826    struct anv_pipeline_stage stage = {
1827       .stage = MESA_SHADER_COMPUTE,
1828       .module = module,
1829       .entrypoint = entrypoint,
1830       .spec_info = spec_info,
1831       .cache_key = {
1832          .stage = MESA_SHADER_COMPUTE,
1833       },
1834       .feedback = {
1835          .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1836       },
1837    };
1838    anv_pipeline_hash_shader(stage.module,
1839                             stage.entrypoint,
1840                             MESA_SHADER_COMPUTE,
1841                             stage.spec_info,
1842                             stage.shader_sha1);
1843 
1844    struct anv_shader_bin *bin = NULL;
1845 
1846    const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info =
1847       vk_find_struct_const(info->stage.pNext,
1848                            PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
1849 
1850    const enum brw_subgroup_size_type subgroup_size_type =
1851       anv_subgroup_size_type(MESA_SHADER_COMPUTE, info->stage.flags, rss_info);
1852 
1853    populate_cs_prog_key(&pipeline->base.device->info, subgroup_size_type,
1854                         pipeline->base.device->robust_buffer_access,
1855                         &stage.key.cs);
1856 
1857    ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1858 
1859    const bool skip_cache_lookup =
1860       (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1861 
1862    anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1);
1863 
1864    bool cache_hit = false;
1865    if (!skip_cache_lookup) {
1866       bin = anv_device_search_for_kernel(pipeline->base.device, cache,
1867                                          &stage.cache_key,
1868                                          sizeof(stage.cache_key),
1869                                          &cache_hit);
1870    }
1871 
1872    if (bin == NULL &&
1873        (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT))
1874       return VK_PIPELINE_COMPILE_REQUIRED_EXT;
1875 
1876    void *mem_ctx = ralloc_context(NULL);
1877    if (bin == NULL) {
1878       int64_t stage_start = os_time_get_nano();
1879 
1880       stage.bind_map = (struct anv_pipeline_bind_map) {
1881          .surface_to_descriptor = stage.surface_to_descriptor,
1882          .sampler_to_descriptor = stage.sampler_to_descriptor
1883       };
1884 
1885       /* Set up a binding for the gl_NumWorkGroups */
1886       stage.bind_map.surface_count = 1;
1887       stage.bind_map.surface_to_descriptor[0] = (struct anv_pipeline_binding) {
1888          .set = ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS,
1889       };
1890 
1891       stage.nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, mem_ctx, &stage);
1892       if (stage.nir == NULL) {
1893          ralloc_free(mem_ctx);
1894          return vk_error(pipeline, VK_ERROR_UNKNOWN);
1895       }
1896 
1897       NIR_PASS_V(stage.nir, anv_nir_add_base_work_group_id);
1898 
1899       anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout);
1900 
1901       NIR_PASS_V(stage.nir, brw_nir_lower_cs_intrinsics);
1902 
1903       stage.num_stats = 1;
1904 
1905       struct brw_compile_cs_params params = {
1906          .nir = stage.nir,
1907          .key = &stage.key.cs,
1908          .prog_data = &stage.prog_data.cs,
1909          .stats = stage.stats,
1910          .log_data = pipeline->base.device,
1911       };
1912 
1913       stage.code = brw_compile_cs(compiler, mem_ctx, &params);
1914       if (stage.code == NULL) {
1915          ralloc_free(mem_ctx);
1916          return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1917       }
1918 
1919       anv_nir_validate_push_layout(&stage.prog_data.base, &stage.bind_map);
1920 
1921       if (!stage.prog_data.cs.uses_num_work_groups) {
1922          assert(stage.bind_map.surface_to_descriptor[0].set ==
1923                 ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS);
1924          stage.bind_map.surface_to_descriptor[0].set = ANV_DESCRIPTOR_SET_NULL;
1925       }
1926 
1927       const unsigned code_size = stage.prog_data.base.program_size;
1928       bin = anv_device_upload_kernel(pipeline->base.device, cache,
1929                                      MESA_SHADER_COMPUTE,
1930                                      &stage.cache_key, sizeof(stage.cache_key),
1931                                      stage.code, code_size,
1932                                      &stage.prog_data.base,
1933                                      sizeof(stage.prog_data.cs),
1934                                      stage.stats, stage.num_stats,
1935                                      NULL, &stage.bind_map);
1936       if (!bin) {
1937          ralloc_free(mem_ctx);
1938          return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1939       }
1940 
1941       stage.feedback.duration = os_time_get_nano() - stage_start;
1942    }
1943 
1944    anv_pipeline_add_executables(&pipeline->base, &stage, bin);
1945 
1946    ralloc_free(mem_ctx);
1947 
1948    if (cache_hit) {
1949       stage.feedback.flags |=
1950          VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1951       pipeline_feedback.flags |=
1952          VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1953    }
1954    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1955 
1956    const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
1957       vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
1958    if (create_feedback) {
1959       *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1960 
1961       assert(create_feedback->pipelineStageCreationFeedbackCount == 1);
1962       create_feedback->pPipelineStageCreationFeedbacks[0] = stage.feedback;
1963    }
1964 
1965    pipeline->cs = bin;
1966 
1967    return VK_SUCCESS;
1968 }
1969 
1970 /**
1971  * Copy pipeline state not marked as dynamic.
1972  * Dynamic state is pipeline state which hasn't been provided at pipeline
1973  * creation time, but is dynamically provided afterwards using various
1974  * vkCmdSet* functions.
1975  *
1976  * The set of state considered "non_dynamic" is determined by the pieces of
1977  * state that have their corresponding VkDynamicState enums omitted from
1978  * VkPipelineDynamicStateCreateInfo::pDynamicStates.
1979  *
1980  * @param[out] pipeline    Destination non_dynamic state.
1981  * @param[in]  pCreateInfo Source of non_dynamic state to be copied.
1982  */
1983 static void
copy_non_dynamic_state(struct anv_graphics_pipeline * pipeline,const VkGraphicsPipelineCreateInfo * pCreateInfo)1984 copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
1985                        const VkGraphicsPipelineCreateInfo *pCreateInfo)
1986 {
1987    anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL;
1988    struct anv_subpass *subpass = pipeline->subpass;
1989 
1990    pipeline->dynamic_state = default_dynamic_state;
1991 
1992    states &= ~pipeline->dynamic_states;
1993 
1994    struct anv_dynamic_state *dynamic = &pipeline->dynamic_state;
1995 
1996    bool raster_discard =
1997       pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
1998       !(pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE);
1999 
2000    /* Section 9.2 of the Vulkan 1.0.15 spec says:
2001     *
2002     *    pViewportState is [...] NULL if the pipeline
2003     *    has rasterization disabled.
2004     */
2005    if (!raster_discard) {
2006       assert(pCreateInfo->pViewportState);
2007 
2008       dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
2009       if (states & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) {
2010          typed_memcpy(dynamic->viewport.viewports,
2011                      pCreateInfo->pViewportState->pViewports,
2012                      pCreateInfo->pViewportState->viewportCount);
2013       }
2014 
2015       dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
2016       if (states & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) {
2017          typed_memcpy(dynamic->scissor.scissors,
2018                      pCreateInfo->pViewportState->pScissors,
2019                      pCreateInfo->pViewportState->scissorCount);
2020       }
2021    }
2022 
2023    if (states & ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH) {
2024       assert(pCreateInfo->pRasterizationState);
2025       dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth;
2026    }
2027 
2028    if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS) {
2029       assert(pCreateInfo->pRasterizationState);
2030       dynamic->depth_bias.bias =
2031          pCreateInfo->pRasterizationState->depthBiasConstantFactor;
2032       dynamic->depth_bias.clamp =
2033          pCreateInfo->pRasterizationState->depthBiasClamp;
2034       dynamic->depth_bias.slope =
2035          pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
2036    }
2037 
2038    if (states & ANV_CMD_DIRTY_DYNAMIC_CULL_MODE) {
2039       assert(pCreateInfo->pRasterizationState);
2040       dynamic->cull_mode =
2041          pCreateInfo->pRasterizationState->cullMode;
2042    }
2043 
2044    if (states & ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE) {
2045       assert(pCreateInfo->pRasterizationState);
2046       dynamic->front_face =
2047          pCreateInfo->pRasterizationState->frontFace;
2048    }
2049 
2050    if ((states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) &&
2051          (pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) {
2052       assert(pCreateInfo->pInputAssemblyState);
2053       dynamic->primitive_topology = pCreateInfo->pInputAssemblyState->topology;
2054    }
2055 
2056    if (states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE) {
2057       assert(pCreateInfo->pRasterizationState);
2058       dynamic->raster_discard =
2059          pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
2060    }
2061 
2062    if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE) {
2063       assert(pCreateInfo->pRasterizationState);
2064       dynamic->depth_bias_enable =
2065          pCreateInfo->pRasterizationState->depthBiasEnable;
2066    }
2067 
2068    if ((states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE) &&
2069          (pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) {
2070       assert(pCreateInfo->pInputAssemblyState);
2071       dynamic->primitive_restart_enable =
2072          pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
2073    }
2074 
2075    /* Section 9.2 of the Vulkan 1.0.15 spec says:
2076     *
2077     *    pColorBlendState is [...] NULL if the pipeline has rasterization
2078     *    disabled or if the subpass of the render pass the pipeline is
2079     *    created against does not use any color attachments.
2080     */
2081    bool uses_color_att = false;
2082    for (unsigned i = 0; i < subpass->color_count; ++i) {
2083       if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) {
2084          uses_color_att = true;
2085          break;
2086       }
2087    }
2088 
2089    if (uses_color_att && !raster_discard) {
2090       assert(pCreateInfo->pColorBlendState);
2091 
2092       if (states & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS)
2093          typed_memcpy(dynamic->blend_constants,
2094                      pCreateInfo->pColorBlendState->blendConstants, 4);
2095    }
2096 
2097    /* If there is no depthstencil attachment, then don't read
2098     * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
2099     * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
2100     * no need to override the depthstencil defaults in
2101     * anv_pipeline::dynamic_state when there is no depthstencil attachment.
2102     *
2103     * Section 9.2 of the Vulkan 1.0.15 spec says:
2104     *
2105     *    pDepthStencilState is [...] NULL if the pipeline has rasterization
2106     *    disabled or if the subpass of the render pass the pipeline is created
2107     *    against does not use a depth/stencil attachment.
2108     */
2109    if (!raster_discard && subpass->depth_stencil_attachment) {
2110       assert(pCreateInfo->pDepthStencilState);
2111 
2112       if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS) {
2113          dynamic->depth_bounds.min =
2114             pCreateInfo->pDepthStencilState->minDepthBounds;
2115          dynamic->depth_bounds.max =
2116             pCreateInfo->pDepthStencilState->maxDepthBounds;
2117       }
2118 
2119       if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK) {
2120          dynamic->stencil_compare_mask.front =
2121             pCreateInfo->pDepthStencilState->front.compareMask;
2122          dynamic->stencil_compare_mask.back =
2123             pCreateInfo->pDepthStencilState->back.compareMask;
2124       }
2125 
2126       if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK) {
2127          dynamic->stencil_write_mask.front =
2128             pCreateInfo->pDepthStencilState->front.writeMask;
2129          dynamic->stencil_write_mask.back =
2130             pCreateInfo->pDepthStencilState->back.writeMask;
2131       }
2132 
2133       if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE) {
2134          dynamic->stencil_reference.front =
2135             pCreateInfo->pDepthStencilState->front.reference;
2136          dynamic->stencil_reference.back =
2137             pCreateInfo->pDepthStencilState->back.reference;
2138       }
2139 
2140       if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE) {
2141          dynamic->depth_test_enable =
2142             pCreateInfo->pDepthStencilState->depthTestEnable;
2143       }
2144 
2145       if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE) {
2146          dynamic->depth_write_enable =
2147             pCreateInfo->pDepthStencilState->depthWriteEnable;
2148       }
2149 
2150       if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP) {
2151          dynamic->depth_compare_op =
2152             pCreateInfo->pDepthStencilState->depthCompareOp;
2153       }
2154 
2155       if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {
2156          dynamic->depth_bounds_test_enable =
2157             pCreateInfo->pDepthStencilState->depthBoundsTestEnable;
2158       }
2159 
2160       if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE) {
2161          dynamic->stencil_test_enable =
2162             pCreateInfo->pDepthStencilState->stencilTestEnable;
2163       }
2164 
2165       if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP) {
2166          const VkPipelineDepthStencilStateCreateInfo *info =
2167             pCreateInfo->pDepthStencilState;
2168          memcpy(&dynamic->stencil_op.front, &info->front,
2169                 sizeof(dynamic->stencil_op.front));
2170          memcpy(&dynamic->stencil_op.back, &info->back,
2171                 sizeof(dynamic->stencil_op.back));
2172       }
2173    }
2174 
2175    const VkPipelineRasterizationLineStateCreateInfoEXT *line_state =
2176       vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
2177                            PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
2178    if (!raster_discard && line_state && line_state->stippledLineEnable) {
2179       if (states & ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE) {
2180          dynamic->line_stipple.factor = line_state->lineStippleFactor;
2181          dynamic->line_stipple.pattern = line_state->lineStipplePattern;
2182       }
2183    }
2184 
2185    const VkPipelineMultisampleStateCreateInfo *ms_info =
2186       pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? NULL :
2187       pCreateInfo->pMultisampleState;
2188    if (states & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) {
2189       const VkPipelineSampleLocationsStateCreateInfoEXT *sl_info = ms_info ?
2190          vk_find_struct_const(ms_info, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT) : NULL;
2191 
2192       if (sl_info) {
2193          dynamic->sample_locations.samples =
2194             sl_info->sampleLocationsInfo.sampleLocationsCount;
2195          const VkSampleLocationEXT *positions =
2196             sl_info->sampleLocationsInfo.pSampleLocations;
2197          for (uint32_t i = 0; i < dynamic->sample_locations.samples; i++) {
2198             dynamic->sample_locations.locations[i].x = positions[i].x;
2199             dynamic->sample_locations.locations[i].y = positions[i].y;
2200          }
2201       }
2202    }
2203    /* Ensure we always have valid values for sample_locations. */
2204    if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations &&
2205        dynamic->sample_locations.samples == 0) {
2206       dynamic->sample_locations.samples =
2207          ms_info ? ms_info->rasterizationSamples : 1;
2208       const struct intel_sample_position *positions =
2209          intel_get_sample_positions(dynamic->sample_locations.samples);
2210       for (uint32_t i = 0; i < dynamic->sample_locations.samples; i++) {
2211          dynamic->sample_locations.locations[i].x = positions[i].x;
2212          dynamic->sample_locations.locations[i].y = positions[i].y;
2213       }
2214    }
2215 
2216    if (states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) {
2217       if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
2218           uses_color_att) {
2219          assert(pCreateInfo->pColorBlendState);
2220          const VkPipelineColorWriteCreateInfoEXT *color_write_info =
2221             vk_find_struct_const(pCreateInfo->pColorBlendState->pNext,
2222                                  PIPELINE_COLOR_WRITE_CREATE_INFO_EXT);
2223 
2224          if (color_write_info) {
2225             dynamic->color_writes = 0;
2226             for (uint32_t i = 0; i < color_write_info->attachmentCount; i++) {
2227                dynamic->color_writes |=
2228                   color_write_info->pColorWriteEnables[i] ? (1u << i) : 0;
2229             }
2230          }
2231       }
2232    }
2233 
2234    const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_state =
2235       vk_find_struct_const(pCreateInfo->pNext,
2236                            PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR);
2237    if (fsr_state) {
2238       if (states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE)
2239          dynamic->fragment_shading_rate = fsr_state->fragmentSize;
2240    }
2241 
2242    pipeline->dynamic_state_mask = states;
2243 
2244    /* Mark states that can either be dynamic or fully baked into the pipeline.
2245     */
2246    pipeline->static_state_mask = states &
2247       (ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS |
2248        ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE |
2249        ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE |
2250        ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE |
2251        ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP |
2252        ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY);
2253 }
2254 
2255 static void
anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo * info)2256 anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info)
2257 {
2258 #ifdef DEBUG
2259    struct anv_render_pass *renderpass = NULL;
2260    struct anv_subpass *subpass = NULL;
2261 
2262    /* Assert that all required members of VkGraphicsPipelineCreateInfo are
2263     * present.  See the Vulkan 1.0.28 spec, Section 9.2 Graphics Pipelines.
2264     */
2265    assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
2266 
2267    renderpass = anv_render_pass_from_handle(info->renderPass);
2268    assert(renderpass);
2269 
2270    assert(info->subpass < renderpass->subpass_count);
2271    subpass = &renderpass->subpasses[info->subpass];
2272 
2273    assert(info->stageCount >= 1);
2274    assert(info->pRasterizationState);
2275    if (!info->pRasterizationState->rasterizerDiscardEnable) {
2276       assert(info->pViewportState);
2277       assert(info->pMultisampleState);
2278 
2279       if (subpass && subpass->depth_stencil_attachment)
2280          assert(info->pDepthStencilState);
2281 
2282       if (subpass && subpass->color_count > 0) {
2283          bool all_color_unused = true;
2284          for (int i = 0; i < subpass->color_count; i++) {
2285             if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
2286                all_color_unused = false;
2287          }
2288          /* pColorBlendState is ignored if the pipeline has rasterization
2289           * disabled or if the subpass of the render pass the pipeline is
2290           * created against does not use any color attachments.
2291           */
2292          assert(info->pColorBlendState || all_color_unused);
2293       }
2294    }
2295 
2296    for (uint32_t i = 0; i < info->stageCount; ++i) {
2297       switch (info->pStages[i].stage) {
2298       case VK_SHADER_STAGE_VERTEX_BIT:
2299          assert(info->pVertexInputState);
2300          assert(info->pInputAssemblyState);
2301          break;
2302       case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
2303       case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
2304          assert(info->pTessellationState);
2305          break;
2306       default:
2307          break;
2308       }
2309    }
2310 #endif
2311 }
2312 
2313 /**
2314  * Calculate the desired L3 partitioning based on the current state of the
2315  * pipeline.  For now this simply returns the conservative defaults calculated
2316  * by get_default_l3_weights(), but we could probably do better by gathering
2317  * more statistics from the pipeline state (e.g. guess of expected URB usage
2318  * and bound surfaces), or by using feed-back from performance counters.
2319  */
2320 void
anv_pipeline_setup_l3_config(struct anv_pipeline * pipeline,bool needs_slm)2321 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)
2322 {
2323    const struct intel_device_info *devinfo = &pipeline->device->info;
2324 
2325    const struct intel_l3_weights w =
2326       intel_get_default_l3_weights(devinfo, true, needs_slm);
2327 
2328    pipeline->l3_config = intel_get_l3_config(devinfo, w);
2329 }
2330 
2331 static VkLineRasterizationModeEXT
vk_line_rasterization_mode(const VkPipelineRasterizationLineStateCreateInfoEXT * line_info,const VkPipelineMultisampleStateCreateInfo * ms_info)2332 vk_line_rasterization_mode(const VkPipelineRasterizationLineStateCreateInfoEXT *line_info,
2333                            const VkPipelineMultisampleStateCreateInfo *ms_info)
2334 {
2335    VkLineRasterizationModeEXT line_mode =
2336       line_info ? line_info->lineRasterizationMode :
2337                   VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT;
2338 
2339    if (line_mode == VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT) {
2340       if (ms_info && ms_info->rasterizationSamples > 1) {
2341          return VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT;
2342       } else {
2343          return VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT;
2344       }
2345    }
2346 
2347    return line_mode;
2348 }
2349 
2350 VkResult
anv_graphics_pipeline_init(struct anv_graphics_pipeline * pipeline,struct anv_device * device,struct anv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * alloc)2351 anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline,
2352                            struct anv_device *device,
2353                            struct anv_pipeline_cache *cache,
2354                            const VkGraphicsPipelineCreateInfo *pCreateInfo,
2355                            const VkAllocationCallbacks *alloc)
2356 {
2357    VkResult result;
2358 
2359    anv_pipeline_validate_create_info(pCreateInfo);
2360 
2361    result = anv_pipeline_init(&pipeline->base, device,
2362                               ANV_PIPELINE_GRAPHICS, pCreateInfo->flags,
2363                               alloc);
2364    if (result != VK_SUCCESS)
2365       return result;
2366 
2367    anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS,
2368                          pipeline->batch_data, sizeof(pipeline->batch_data));
2369 
2370    ANV_FROM_HANDLE(anv_render_pass, render_pass, pCreateInfo->renderPass);
2371    assert(pCreateInfo->subpass < render_pass->subpass_count);
2372    pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
2373 
2374    assert(pCreateInfo->pRasterizationState);
2375 
2376    if (pCreateInfo->pDynamicState) {
2377       /* Remove all of the states that are marked as dynamic */
2378       uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
2379       for (uint32_t s = 0; s < count; s++) {
2380          pipeline->dynamic_states |= anv_cmd_dirty_bit_for_vk_dynamic_state(
2381             pCreateInfo->pDynamicState->pDynamicStates[s]);
2382       }
2383    }
2384 
2385    pipeline->active_stages = 0;
2386    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
2387       pipeline->active_stages |= pCreateInfo->pStages[i].stage;
2388 
2389    if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
2390       pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
2391 
2392    copy_non_dynamic_state(pipeline, pCreateInfo);
2393 
2394    pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState->depthClampEnable;
2395 
2396    /* Previously we enabled depth clipping when !depthClampEnable.
2397     * DepthClipStateCreateInfo now makes depth clipping explicit so if the
2398     * clipping info is available, use its enable value to determine clipping,
2399     * otherwise fallback to the previous !depthClampEnable logic.
2400     */
2401    const VkPipelineRasterizationDepthClipStateCreateInfoEXT *clip_info =
2402       vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
2403                            PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT);
2404    pipeline->depth_clip_enable = clip_info ? clip_info->depthClipEnable : !pipeline->depth_clamp_enable;
2405 
2406    /* If rasterization is not enabled, ms_info must be ignored. */
2407    const bool raster_enabled =
2408       !pCreateInfo->pRasterizationState->rasterizerDiscardEnable ||
2409       (pipeline->dynamic_states &
2410        ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE);
2411 
2412    const VkPipelineMultisampleStateCreateInfo *ms_info =
2413       raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2414 
2415    pipeline->sample_shading_enable = ms_info && is_sample_shading(ms_info);
2416 
2417    result = anv_pipeline_compile_graphics(pipeline, cache, pCreateInfo);
2418    if (result != VK_SUCCESS) {
2419       anv_pipeline_finish(&pipeline->base, device, alloc);
2420       return result;
2421    }
2422 
2423    anv_pipeline_setup_l3_config(&pipeline->base, false);
2424 
2425    if (anv_pipeline_is_primitive(pipeline)) {
2426       const VkPipelineVertexInputStateCreateInfo *vi_info =
2427          pCreateInfo->pVertexInputState;
2428 
2429       const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;
2430 
2431       for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
2432          const VkVertexInputAttributeDescription *desc =
2433             &vi_info->pVertexAttributeDescriptions[i];
2434 
2435          if (inputs_read & (1ull << (VERT_ATTRIB_GENERIC0 + desc->location)))
2436             pipeline->vb_used |= 1 << desc->binding;
2437       }
2438 
2439       for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
2440          const VkVertexInputBindingDescription *desc =
2441             &vi_info->pVertexBindingDescriptions[i];
2442 
2443          pipeline->vb[desc->binding].stride = desc->stride;
2444 
2445          /* Step rate is programmed per vertex element (attribute), not
2446           * binding. Set up a map of which bindings step per instance, for
2447           * reference by vertex element setup. */
2448          switch (desc->inputRate) {
2449          default:
2450          case VK_VERTEX_INPUT_RATE_VERTEX:
2451             pipeline->vb[desc->binding].instanced = false;
2452             break;
2453          case VK_VERTEX_INPUT_RATE_INSTANCE:
2454             pipeline->vb[desc->binding].instanced = true;
2455             break;
2456          }
2457 
2458          pipeline->vb[desc->binding].instance_divisor = 1;
2459       }
2460 
2461       const VkPipelineVertexInputDivisorStateCreateInfoEXT *vi_div_state =
2462          vk_find_struct_const(vi_info->pNext,
2463                               PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
2464       if (vi_div_state) {
2465          for (uint32_t i = 0; i < vi_div_state->vertexBindingDivisorCount; i++) {
2466             const VkVertexInputBindingDivisorDescriptionEXT *desc =
2467                &vi_div_state->pVertexBindingDivisors[i];
2468 
2469             pipeline->vb[desc->binding].instance_divisor = desc->divisor;
2470          }
2471       }
2472 
2473       /* Our implementation of VK_KHR_multiview uses instancing to draw the
2474        * different views.  If the client asks for instancing, we need to multiply
2475        * the instance divisor by the number of views ensure that we repeat the
2476        * client's per-instance data once for each view.
2477        */
2478       if (pipeline->subpass->view_mask && !pipeline->use_primitive_replication) {
2479          const uint32_t view_count = anv_subpass_view_count(pipeline->subpass);
2480          for (uint32_t vb = 0; vb < MAX_VBS; vb++) {
2481             if (pipeline->vb[vb].instanced)
2482                pipeline->vb[vb].instance_divisor *= view_count;
2483          }
2484       }
2485 
2486       const VkPipelineInputAssemblyStateCreateInfo *ia_info =
2487          pCreateInfo->pInputAssemblyState;
2488       const VkPipelineTessellationStateCreateInfo *tess_info =
2489          pCreateInfo->pTessellationState;
2490 
2491       if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
2492          pipeline->topology = _3DPRIM_PATCHLIST(tess_info->patchControlPoints);
2493       else
2494          pipeline->topology = vk_to_intel_primitive_type[ia_info->topology];
2495    }
2496 
2497    const VkPipelineRasterizationLineStateCreateInfoEXT *line_info =
2498       vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
2499                            PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
2500 
2501    /* Store line mode, polygon mode and rasterization samples, these are used
2502     * for dynamic primitive topology.
2503     */
2504    pipeline->line_mode = vk_line_rasterization_mode(line_info, ms_info);
2505    pipeline->polygon_mode = pCreateInfo->pRasterizationState->polygonMode;
2506    pipeline->rasterization_samples =
2507       ms_info ? ms_info->rasterizationSamples : 1;
2508 
2509    return VK_SUCCESS;
2510 }
2511 
2512 static VkResult
compile_upload_rt_shader(struct anv_ray_tracing_pipeline * pipeline,struct anv_pipeline_cache * cache,nir_shader * nir,struct anv_pipeline_stage * stage,struct anv_shader_bin ** shader_out,void * mem_ctx)2513 compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline,
2514                          struct anv_pipeline_cache *cache,
2515                          nir_shader *nir,
2516                          struct anv_pipeline_stage *stage,
2517                          struct anv_shader_bin **shader_out,
2518                          void *mem_ctx)
2519 {
2520    const struct brw_compiler *compiler =
2521       pipeline->base.device->physical->compiler;
2522    const struct intel_device_info *devinfo = compiler->devinfo;
2523 
2524    nir_shader **resume_shaders = NULL;
2525    uint32_t num_resume_shaders = 0;
2526    if (nir->info.stage != MESA_SHADER_COMPUTE) {
2527       NIR_PASS_V(nir, nir_lower_shader_calls,
2528                  nir_address_format_64bit_global,
2529                  BRW_BTD_STACK_ALIGN,
2530                  &resume_shaders, &num_resume_shaders, mem_ctx);
2531       NIR_PASS_V(nir, brw_nir_lower_shader_calls);
2532       NIR_PASS_V(nir, brw_nir_lower_rt_intrinsics, devinfo);
2533    }
2534 
2535    for (unsigned i = 0; i < num_resume_shaders; i++) {
2536       NIR_PASS_V(resume_shaders[i], brw_nir_lower_shader_calls);
2537       NIR_PASS_V(resume_shaders[i], brw_nir_lower_rt_intrinsics, devinfo);
2538    }
2539 
2540    stage->code =
2541       brw_compile_bs(compiler, pipeline->base.device, mem_ctx,
2542                      &stage->key.bs, &stage->prog_data.bs, nir,
2543                      num_resume_shaders, resume_shaders, stage->stats, NULL);
2544    if (stage->code == NULL)
2545       return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2546 
2547    /* Ray-tracing shaders don't have a "real" bind map */
2548    struct anv_pipeline_bind_map empty_bind_map = {};
2549 
2550    const unsigned code_size = stage->prog_data.base.program_size;
2551    struct anv_shader_bin *bin =
2552       anv_device_upload_kernel(pipeline->base.device,
2553                                cache,
2554                                stage->stage,
2555                                &stage->cache_key, sizeof(stage->cache_key),
2556                                stage->code, code_size,
2557                                &stage->prog_data.base,
2558                                sizeof(stage->prog_data.bs),
2559                                stage->stats, 1,
2560                                NULL, &empty_bind_map);
2561    if (bin == NULL)
2562       return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2563 
2564    /* TODO: Figure out executables for resume shaders */
2565    anv_pipeline_add_executables(&pipeline->base, stage, bin);
2566    util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, bin);
2567 
2568    *shader_out = bin;
2569 
2570    return VK_SUCCESS;
2571 }
2572 
2573 static bool
is_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR * info)2574 is_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR *info)
2575 {
2576    if (info->pDynamicState == NULL)
2577       return false;
2578 
2579    for (unsigned i = 0; i < info->pDynamicState->dynamicStateCount; i++) {
2580       if (info->pDynamicState->pDynamicStates[i] ==
2581           VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR)
2582          return true;
2583    }
2584 
2585    return false;
2586 }
2587 
2588 static void
anv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline * pipeline,const VkRayTracingPipelineCreateInfoKHR * info,uint32_t * stack_max)2589 anv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline *pipeline,
2590                                         const VkRayTracingPipelineCreateInfoKHR *info,
2591                                         uint32_t *stack_max)
2592 {
2593    if (is_rt_stack_size_dynamic(info)) {
2594       pipeline->stack_size = 0; /* 0 means dynamic */
2595    } else {
2596       /* From the Vulkan spec:
2597        *
2598        *    "If the stack size is not set explicitly, the stack size for a
2599        *    pipeline is:
2600        *
2601        *       rayGenStackMax +
2602        *       min(1, maxPipelineRayRecursionDepth) ×
2603        *       max(closestHitStackMax, missStackMax,
2604        *           intersectionStackMax + anyHitStackMax) +
2605        *       max(0, maxPipelineRayRecursionDepth-1) ×
2606        *       max(closestHitStackMax, missStackMax) +
2607        *       2 × callableStackMax"
2608        */
2609       pipeline->stack_size =
2610          stack_max[MESA_SHADER_RAYGEN] +
2611          MIN2(1, info->maxPipelineRayRecursionDepth) *
2612          MAX4(stack_max[MESA_SHADER_CLOSEST_HIT],
2613               stack_max[MESA_SHADER_MISS],
2614               stack_max[MESA_SHADER_INTERSECTION],
2615               stack_max[MESA_SHADER_ANY_HIT]) +
2616          MAX2(0, (int)info->maxPipelineRayRecursionDepth - 1) *
2617          MAX2(stack_max[MESA_SHADER_CLOSEST_HIT],
2618               stack_max[MESA_SHADER_MISS]) +
2619          2 * stack_max[MESA_SHADER_CALLABLE];
2620 
2621       /* This is an extremely unlikely case but we need to set it to some
2622        * non-zero value so that we don't accidentally think it's dynamic.
2623        * Our minimum stack size is 2KB anyway so we could set to any small
2624        * value we like.
2625        */
2626       if (pipeline->stack_size == 0)
2627          pipeline->stack_size = 1;
2628    }
2629 }
2630 
2631 static struct anv_pipeline_stage *
anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline * pipeline,const VkRayTracingPipelineCreateInfoKHR * info,void * pipeline_ctx)2632 anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline,
2633                                      const VkRayTracingPipelineCreateInfoKHR *info,
2634                                      void *pipeline_ctx)
2635 {
2636    ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
2637 
2638    /* Create enough stage entries for all shader modules plus potential
2639     * combinaisons in the groups.
2640     */
2641    struct anv_pipeline_stage *stages =
2642       rzalloc_array(pipeline_ctx, struct anv_pipeline_stage, info->stageCount);
2643 
2644    for (uint32_t i = 0; i < info->stageCount; i++) {
2645       const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
2646       if (sinfo->module == VK_NULL_HANDLE)
2647          continue;
2648 
2649       int64_t stage_start = os_time_get_nano();
2650 
2651       stages[i] = (struct anv_pipeline_stage) {
2652          .stage = vk_to_mesa_shader_stage(sinfo->stage),
2653          .module = vk_shader_module_from_handle(sinfo->module),
2654          .entrypoint = sinfo->pName,
2655          .spec_info = sinfo->pSpecializationInfo,
2656          .cache_key = {
2657             .stage = vk_to_mesa_shader_stage(sinfo->stage),
2658          },
2659          .feedback = {
2660             .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
2661          },
2662       };
2663 
2664       populate_bs_prog_key(&pipeline->base.device->info, sinfo->flags,
2665                            pipeline->base.device->robust_buffer_access,
2666                            &stages[i].key.bs);
2667 
2668       anv_pipeline_hash_shader(stages[i].module,
2669                                stages[i].entrypoint,
2670                                stages[i].stage,
2671                                stages[i].spec_info,
2672                                stages[i].shader_sha1);
2673 
2674       if (stages[i].stage != MESA_SHADER_INTERSECTION) {
2675          anv_pipeline_hash_ray_tracing_shader(pipeline, layout, &stages[i],
2676                                               stages[i].cache_key.sha1);
2677       }
2678 
2679       stages[i].feedback.duration += os_time_get_nano() - stage_start;
2680    }
2681 
2682    for (uint32_t i = 0; i < info->groupCount; i++) {
2683       const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];
2684 
2685       if (ginfo->type != VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR)
2686          continue;
2687 
2688       int64_t stage_start = os_time_get_nano();
2689 
2690       uint32_t intersection_idx = ginfo->intersectionShader;
2691       assert(intersection_idx < info->stageCount);
2692 
2693       uint32_t any_hit_idx = ginfo->anyHitShader;
2694       if (any_hit_idx != VK_SHADER_UNUSED_KHR) {
2695          assert(any_hit_idx < info->stageCount);
2696          anv_pipeline_hash_ray_tracing_combined_shader(pipeline,
2697                                                        layout,
2698                                                        &stages[intersection_idx],
2699                                                        &stages[any_hit_idx],
2700                                                        stages[intersection_idx].cache_key.sha1);
2701       } else {
2702          anv_pipeline_hash_ray_tracing_shader(pipeline, layout,
2703                                               &stages[intersection_idx],
2704                                               stages[intersection_idx].cache_key.sha1);
2705       }
2706 
2707       stages[intersection_idx].feedback.duration += os_time_get_nano() - stage_start;
2708    }
2709 
2710    return stages;
2711 }
2712 
2713 static bool
anv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline * pipeline,struct anv_pipeline_cache * cache,const VkRayTracingPipelineCreateInfoKHR * info,struct anv_pipeline_stage * stages,uint32_t * stack_max)2714 anv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline *pipeline,
2715                                  struct anv_pipeline_cache *cache,
2716                                  const VkRayTracingPipelineCreateInfoKHR *info,
2717                                  struct anv_pipeline_stage *stages,
2718                                  uint32_t *stack_max)
2719 {
2720    uint32_t shaders = 0, cache_hits = 0;
2721    for (uint32_t i = 0; i < info->stageCount; i++) {
2722       if (stages[i].entrypoint == NULL)
2723          continue;
2724 
2725       shaders++;
2726 
2727       int64_t stage_start = os_time_get_nano();
2728 
2729       bool cache_hit;
2730       stages[i].bin = anv_device_search_for_kernel(pipeline->base.device, cache,
2731                                                    &stages[i].cache_key,
2732                                                    sizeof(stages[i].cache_key),
2733                                                    &cache_hit);
2734       if (cache_hit) {
2735          cache_hits++;
2736          stages[i].feedback.flags |=
2737             VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
2738       }
2739 
2740       if (stages[i].bin != NULL) {
2741          anv_pipeline_add_executables(&pipeline->base, &stages[i], stages[i].bin);
2742          util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, stages[i].bin);
2743 
2744          uint32_t stack_size =
2745             brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size;
2746          stack_max[stages[i].stage] =
2747             MAX2(stack_max[stages[i].stage], stack_size);
2748       }
2749 
2750       stages[i].feedback.duration += os_time_get_nano() - stage_start;
2751    }
2752 
2753    return cache_hits == shaders;
2754 }
2755 
2756 static VkResult
anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline * pipeline,struct anv_pipeline_cache * cache,const VkRayTracingPipelineCreateInfoKHR * info)2757 anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline,
2758                                  struct anv_pipeline_cache *cache,
2759                                  const VkRayTracingPipelineCreateInfoKHR *info)
2760 {
2761    const struct intel_device_info *devinfo = &pipeline->base.device->info;
2762    VkResult result;
2763 
2764    VkPipelineCreationFeedbackEXT pipeline_feedback = {
2765       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
2766    };
2767    int64_t pipeline_start = os_time_get_nano();
2768 
2769    void *pipeline_ctx = ralloc_context(NULL);
2770 
2771    struct anv_pipeline_stage *stages =
2772       anv_pipeline_init_ray_tracing_stages(pipeline, info, pipeline_ctx);
2773 
2774    ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
2775 
2776    const bool skip_cache_lookup =
2777       (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
2778 
2779    uint32_t stack_max[MESA_VULKAN_SHADER_STAGES] = {};
2780 
2781    if (!skip_cache_lookup &&
2782        anv_pipeline_load_cached_shaders(pipeline, cache, info, stages, stack_max)) {
2783       pipeline_feedback.flags |=
2784          VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
2785       goto done;
2786    }
2787 
2788    if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) {
2789       ralloc_free(pipeline_ctx);
2790       return VK_PIPELINE_COMPILE_REQUIRED_EXT;
2791    }
2792 
2793    for (uint32_t i = 0; i < info->stageCount; i++) {
2794       if (stages[i].entrypoint == NULL)
2795          continue;
2796 
2797       int64_t stage_start = os_time_get_nano();
2798 
2799       stages[i].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
2800                                                  pipeline_ctx, &stages[i]);
2801       if (stages[i].nir == NULL) {
2802          ralloc_free(pipeline_ctx);
2803          return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2804       }
2805 
2806       anv_pipeline_lower_nir(&pipeline->base, pipeline_ctx, &stages[i], layout);
2807 
2808       stages[i].feedback.duration += os_time_get_nano() - stage_start;
2809    }
2810 
2811    for (uint32_t i = 0; i < info->stageCount; i++) {
2812       if (stages[i].entrypoint == NULL)
2813          continue;
2814 
2815       /* Shader found in cache already. */
2816       if (stages[i].bin != NULL)
2817          continue;
2818 
2819       /* We handle intersection shaders as part of the group */
2820       if (stages[i].stage == MESA_SHADER_INTERSECTION)
2821          continue;
2822 
2823       int64_t stage_start = os_time_get_nano();
2824 
2825       void *stage_ctx = ralloc_context(pipeline_ctx);
2826 
2827       nir_shader *nir = nir_shader_clone(stage_ctx, stages[i].nir);
2828       switch (stages[i].stage) {
2829       case MESA_SHADER_RAYGEN:
2830          brw_nir_lower_raygen(nir);
2831          break;
2832 
2833       case MESA_SHADER_ANY_HIT:
2834          brw_nir_lower_any_hit(nir, devinfo);
2835          break;
2836 
2837       case MESA_SHADER_CLOSEST_HIT:
2838          brw_nir_lower_closest_hit(nir);
2839          break;
2840 
2841       case MESA_SHADER_MISS:
2842          brw_nir_lower_miss(nir);
2843          break;
2844 
2845       case MESA_SHADER_INTERSECTION:
2846          unreachable("These are handled later");
2847 
2848       case MESA_SHADER_CALLABLE:
2849          brw_nir_lower_callable(nir);
2850          break;
2851 
2852       default:
2853          unreachable("Invalid ray-tracing shader stage");
2854       }
2855 
2856       result = compile_upload_rt_shader(pipeline, cache, nir, &stages[i],
2857                                         &stages[i].bin, stage_ctx);
2858       if (result != VK_SUCCESS) {
2859          ralloc_free(pipeline_ctx);
2860          return result;
2861       }
2862 
2863       uint32_t stack_size =
2864          brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size;
2865       stack_max[stages[i].stage] = MAX2(stack_max[stages[i].stage], stack_size);
2866 
2867       ralloc_free(stage_ctx);
2868 
2869       stages[i].feedback.duration += os_time_get_nano() - stage_start;
2870    }
2871 
2872    for (uint32_t i = 0; i < info->groupCount; i++) {
2873       const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];
2874       struct anv_rt_shader_group *group = &pipeline->groups[i];
2875       group->type = ginfo->type;
2876       switch (ginfo->type) {
2877       case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
2878          assert(ginfo->generalShader < info->stageCount);
2879          group->general = stages[ginfo->generalShader].bin;
2880          break;
2881 
2882       case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
2883          if (ginfo->anyHitShader < info->stageCount)
2884             group->any_hit = stages[ginfo->anyHitShader].bin;
2885 
2886          if (ginfo->closestHitShader < info->stageCount)
2887             group->closest_hit = stages[ginfo->closestHitShader].bin;
2888          break;
2889 
2890       case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: {
2891          if (ginfo->closestHitShader < info->stageCount)
2892             group->closest_hit = stages[ginfo->closestHitShader].bin;
2893 
2894          uint32_t intersection_idx = info->pGroups[i].intersectionShader;
2895          assert(intersection_idx < info->stageCount);
2896 
2897          /* Only compile this stage if not already found in the cache. */
2898          if (stages[intersection_idx].bin == NULL) {
2899             /* The any-hit and intersection shader have to be combined */
2900             uint32_t any_hit_idx = info->pGroups[i].anyHitShader;
2901             const nir_shader *any_hit = NULL;
2902             if (any_hit_idx < info->stageCount)
2903                any_hit = stages[any_hit_idx].nir;
2904 
2905             void *group_ctx = ralloc_context(pipeline_ctx);
2906             nir_shader *intersection =
2907                nir_shader_clone(group_ctx, stages[intersection_idx].nir);
2908 
2909             brw_nir_lower_combined_intersection_any_hit(intersection, any_hit,
2910                                                         devinfo);
2911 
2912             result = compile_upload_rt_shader(pipeline, cache,
2913                                               intersection,
2914                                               &stages[intersection_idx],
2915                                               &group->intersection,
2916                                               group_ctx);
2917             ralloc_free(group_ctx);
2918             if (result != VK_SUCCESS)
2919                return result;
2920          } else {
2921             group->intersection = stages[intersection_idx].bin;
2922          }
2923 
2924          uint32_t stack_size =
2925             brw_bs_prog_data_const(group->intersection->prog_data)->max_stack_size;
2926          stack_max[MESA_SHADER_INTERSECTION] =
2927             MAX2(stack_max[MESA_SHADER_INTERSECTION], stack_size);
2928 
2929          break;
2930       }
2931 
2932       default:
2933          unreachable("Invalid ray tracing shader group type");
2934       }
2935    }
2936 
2937  done:
2938    ralloc_free(pipeline_ctx);
2939 
2940    anv_pipeline_compute_ray_tracing_stacks(pipeline, info, stack_max);
2941 
2942    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
2943 
2944    const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
2945       vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
2946    if (create_feedback) {
2947       *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
2948 
2949       assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount);
2950       for (uint32_t i = 0; i < info->stageCount; i++) {
2951          gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
2952          create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
2953       }
2954    }
2955 
2956    return VK_SUCCESS;
2957 }
2958 
2959 VkResult
anv_device_init_rt_shaders(struct anv_device * device)2960 anv_device_init_rt_shaders(struct anv_device *device)
2961 {
2962    if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
2963       return VK_SUCCESS;
2964 
2965    bool cache_hit;
2966 
2967    struct brw_rt_trampoline {
2968       char name[16];
2969       struct brw_cs_prog_key key;
2970    } trampoline_key = {
2971       .name = "rt-trampoline",
2972       .key = {
2973          /* TODO: Other subgroup sizes? */
2974          .base.subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_8,
2975       },
2976    };
2977    device->rt_trampoline =
2978       anv_device_search_for_kernel(device, &device->default_pipeline_cache,
2979                                    &trampoline_key, sizeof(trampoline_key),
2980                                    &cache_hit);
2981    if (device->rt_trampoline == NULL) {
2982 
2983       void *tmp_ctx = ralloc_context(NULL);
2984       nir_shader *trampoline_nir =
2985          brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx);
2986 
2987       struct anv_pipeline_bind_map bind_map = {
2988          .surface_count = 0,
2989          .sampler_count = 0,
2990       };
2991       uint32_t dummy_params[4] = { 0, };
2992       struct brw_cs_prog_data trampoline_prog_data = {
2993          .base.nr_params = 4,
2994          .base.param = dummy_params,
2995          .uses_inline_data = true,
2996          .uses_btd_stack_ids = true,
2997       };
2998       struct brw_compile_cs_params params = {
2999          .nir = trampoline_nir,
3000          .key = &trampoline_key.key,
3001          .prog_data = &trampoline_prog_data,
3002          .log_data = device,
3003       };
3004       const unsigned *tramp_data =
3005          brw_compile_cs(device->physical->compiler, tmp_ctx, &params);
3006 
3007       device->rt_trampoline =
3008          anv_device_upload_kernel(device, &device->default_pipeline_cache,
3009                                   MESA_SHADER_COMPUTE,
3010                                   &trampoline_key, sizeof(trampoline_key),
3011                                   tramp_data,
3012                                   trampoline_prog_data.base.program_size,
3013                                   &trampoline_prog_data.base,
3014                                   sizeof(trampoline_prog_data),
3015                                   NULL, 0, NULL, &bind_map);
3016 
3017       ralloc_free(tmp_ctx);
3018 
3019       if (device->rt_trampoline == NULL)
3020          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3021    }
3022 
3023    struct brw_rt_trivial_return {
3024       char name[16];
3025       struct brw_bs_prog_key key;
3026    } return_key = {
3027       .name = "rt-trivial-ret",
3028    };
3029    device->rt_trivial_return =
3030       anv_device_search_for_kernel(device, &device->default_pipeline_cache,
3031                                    &return_key, sizeof(return_key),
3032                                    &cache_hit);
3033    if (device->rt_trivial_return == NULL) {
3034       void *tmp_ctx = ralloc_context(NULL);
3035       nir_shader *trivial_return_nir =
3036          brw_nir_create_trivial_return_shader(device->physical->compiler, tmp_ctx);
3037 
3038       NIR_PASS_V(trivial_return_nir, brw_nir_lower_rt_intrinsics, &device->info);
3039 
3040       struct anv_pipeline_bind_map bind_map = {
3041          .surface_count = 0,
3042          .sampler_count = 0,
3043       };
3044       struct brw_bs_prog_data return_prog_data = { 0, };
3045       const unsigned *return_data =
3046          brw_compile_bs(device->physical->compiler, device, tmp_ctx,
3047                         &return_key.key, &return_prog_data, trivial_return_nir,
3048                         0, 0, NULL, NULL);
3049 
3050       device->rt_trivial_return =
3051          anv_device_upload_kernel(device, &device->default_pipeline_cache,
3052                                   MESA_SHADER_CALLABLE,
3053                                   &return_key, sizeof(return_key),
3054                                   return_data, return_prog_data.base.program_size,
3055                                   &return_prog_data.base, sizeof(return_prog_data),
3056                                   NULL, 0, NULL, &bind_map);
3057 
3058       ralloc_free(tmp_ctx);
3059 
3060       if (device->rt_trivial_return == NULL) {
3061          anv_shader_bin_unref(device, device->rt_trampoline);
3062          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3063       }
3064    }
3065 
3066    return VK_SUCCESS;
3067 }
3068 
3069 void
anv_device_finish_rt_shaders(struct anv_device * device)3070 anv_device_finish_rt_shaders(struct anv_device *device)
3071 {
3072    if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
3073       return;
3074 
3075    anv_shader_bin_unref(device, device->rt_trampoline);
3076 }
3077 
3078 VkResult
anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline * pipeline,struct anv_device * device,struct anv_pipeline_cache * cache,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,const VkAllocationCallbacks * alloc)3079 anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline,
3080                               struct anv_device *device,
3081                               struct anv_pipeline_cache *cache,
3082                               const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
3083                               const VkAllocationCallbacks *alloc)
3084 {
3085    VkResult result;
3086 
3087    util_dynarray_init(&pipeline->shaders, pipeline->base.mem_ctx);
3088 
3089    result = anv_pipeline_compile_ray_tracing(pipeline, cache, pCreateInfo);
3090    if (result != VK_SUCCESS)
3091       goto fail;
3092 
3093    anv_pipeline_setup_l3_config(&pipeline->base, /* needs_slm */ false);
3094 
3095    return VK_SUCCESS;
3096 
3097 fail:
3098    util_dynarray_foreach(&pipeline->shaders,
3099                          struct anv_shader_bin *, shader) {
3100       anv_shader_bin_unref(device, *shader);
3101    }
3102    return result;
3103 }
3104 
3105 #define WRITE_STR(field, ...) ({                               \
3106    memset(field, 0, sizeof(field));                            \
3107    UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \
3108    assert(i > 0 && i < sizeof(field));                         \
3109 })
3110 
anv_GetPipelineExecutablePropertiesKHR(VkDevice device,const VkPipelineInfoKHR * pPipelineInfo,uint32_t * pExecutableCount,VkPipelineExecutablePropertiesKHR * pProperties)3111 VkResult anv_GetPipelineExecutablePropertiesKHR(
3112     VkDevice                                    device,
3113     const VkPipelineInfoKHR*                    pPipelineInfo,
3114     uint32_t*                                   pExecutableCount,
3115     VkPipelineExecutablePropertiesKHR*          pProperties)
3116 {
3117    ANV_FROM_HANDLE(anv_pipeline, pipeline, pPipelineInfo->pipeline);
3118    VK_OUTARRAY_MAKE(out, pProperties, pExecutableCount);
3119 
3120    util_dynarray_foreach (&pipeline->executables, struct anv_pipeline_executable, exe) {
3121       vk_outarray_append(&out, props) {
3122          gl_shader_stage stage = exe->stage;
3123          props->stages = mesa_to_vk_shader_stage(stage);
3124 
3125          unsigned simd_width = exe->stats.dispatch_width;
3126          if (stage == MESA_SHADER_FRAGMENT) {
3127             WRITE_STR(props->name, "%s%d %s",
3128                       simd_width ? "SIMD" : "vec",
3129                       simd_width ? simd_width : 4,
3130                       _mesa_shader_stage_to_string(stage));
3131          } else {
3132             WRITE_STR(props->name, "%s", _mesa_shader_stage_to_string(stage));
3133          }
3134          WRITE_STR(props->description, "%s%d %s shader",
3135                    simd_width ? "SIMD" : "vec",
3136                    simd_width ? simd_width : 4,
3137                    _mesa_shader_stage_to_string(stage));
3138 
3139          /* The compiler gives us a dispatch width of 0 for vec4 but Vulkan
3140           * wants a subgroup size of 1.
3141           */
3142          props->subgroupSize = MAX2(simd_width, 1);
3143       }
3144    }
3145 
3146    return vk_outarray_status(&out);
3147 }
3148 
3149 static const struct anv_pipeline_executable *
anv_pipeline_get_executable(struct anv_pipeline * pipeline,uint32_t index)3150 anv_pipeline_get_executable(struct anv_pipeline *pipeline, uint32_t index)
3151 {
3152    assert(index < util_dynarray_num_elements(&pipeline->executables,
3153                                              struct anv_pipeline_executable));
3154    return util_dynarray_element(
3155       &pipeline->executables, struct anv_pipeline_executable, index);
3156 }
3157 
anv_GetPipelineExecutableStatisticsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pStatisticCount,VkPipelineExecutableStatisticKHR * pStatistics)3158 VkResult anv_GetPipelineExecutableStatisticsKHR(
3159     VkDevice                                    device,
3160     const VkPipelineExecutableInfoKHR*          pExecutableInfo,
3161     uint32_t*                                   pStatisticCount,
3162     VkPipelineExecutableStatisticKHR*           pStatistics)
3163 {
3164    ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
3165    VK_OUTARRAY_MAKE(out, pStatistics, pStatisticCount);
3166 
3167    const struct anv_pipeline_executable *exe =
3168       anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3169 
3170    const struct brw_stage_prog_data *prog_data;
3171    switch (pipeline->type) {
3172    case ANV_PIPELINE_GRAPHICS: {
3173       prog_data = anv_pipeline_to_graphics(pipeline)->shaders[exe->stage]->prog_data;
3174       break;
3175    }
3176    case ANV_PIPELINE_COMPUTE: {
3177       prog_data = anv_pipeline_to_compute(pipeline)->cs->prog_data;
3178       break;
3179    }
3180    default:
3181       unreachable("invalid pipeline type");
3182    }
3183 
3184    vk_outarray_append(&out, stat) {
3185       WRITE_STR(stat->name, "Instruction Count");
3186       WRITE_STR(stat->description,
3187                 "Number of GEN instructions in the final generated "
3188                 "shader executable.");
3189       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3190       stat->value.u64 = exe->stats.instructions;
3191    }
3192 
3193    vk_outarray_append(&out, stat) {
3194       WRITE_STR(stat->name, "SEND Count");
3195       WRITE_STR(stat->description,
3196                 "Number of instructions in the final generated shader "
3197                 "executable which access external units such as the "
3198                 "constant cache or the sampler.");
3199       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3200       stat->value.u64 = exe->stats.sends;
3201    }
3202 
3203    vk_outarray_append(&out, stat) {
3204       WRITE_STR(stat->name, "Loop Count");
3205       WRITE_STR(stat->description,
3206                 "Number of loops (not unrolled) in the final generated "
3207                 "shader executable.");
3208       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3209       stat->value.u64 = exe->stats.loops;
3210    }
3211 
3212    vk_outarray_append(&out, stat) {
3213       WRITE_STR(stat->name, "Cycle Count");
3214       WRITE_STR(stat->description,
3215                 "Estimate of the number of EU cycles required to execute "
3216                 "the final generated executable.  This is an estimate only "
3217                 "and may vary greatly from actual run-time performance.");
3218       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3219       stat->value.u64 = exe->stats.cycles;
3220    }
3221 
3222    vk_outarray_append(&out, stat) {
3223       WRITE_STR(stat->name, "Spill Count");
3224       WRITE_STR(stat->description,
3225                 "Number of scratch spill operations.  This gives a rough "
3226                 "estimate of the cost incurred due to spilling temporary "
3227                 "values to memory.  If this is non-zero, you may want to "
3228                 "adjust your shader to reduce register pressure.");
3229       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3230       stat->value.u64 = exe->stats.spills;
3231    }
3232 
3233    vk_outarray_append(&out, stat) {
3234       WRITE_STR(stat->name, "Fill Count");
3235       WRITE_STR(stat->description,
3236                 "Number of scratch fill operations.  This gives a rough "
3237                 "estimate of the cost incurred due to spilling temporary "
3238                 "values to memory.  If this is non-zero, you may want to "
3239                 "adjust your shader to reduce register pressure.");
3240       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3241       stat->value.u64 = exe->stats.fills;
3242    }
3243 
3244    vk_outarray_append(&out, stat) {
3245       WRITE_STR(stat->name, "Scratch Memory Size");
3246       WRITE_STR(stat->description,
3247                 "Number of bytes of scratch memory required by the "
3248                 "generated shader executable.  If this is non-zero, you "
3249                 "may want to adjust your shader to reduce register "
3250                 "pressure.");
3251       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3252       stat->value.u64 = prog_data->total_scratch;
3253    }
3254 
3255    if (gl_shader_stage_uses_workgroup(exe->stage)) {
3256       vk_outarray_append(&out, stat) {
3257          WRITE_STR(stat->name, "Workgroup Memory Size");
3258          WRITE_STR(stat->description,
3259                    "Number of bytes of workgroup shared memory used by this "
3260                    "shader including any padding.");
3261          stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3262          stat->value.u64 = prog_data->total_shared;
3263       }
3264    }
3265 
3266    return vk_outarray_status(&out);
3267 }
3268 
3269 static bool
write_ir_text(VkPipelineExecutableInternalRepresentationKHR * ir,const char * data)3270 write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
3271               const char *data)
3272 {
3273    ir->isText = VK_TRUE;
3274 
3275    size_t data_len = strlen(data) + 1;
3276 
3277    if (ir->pData == NULL) {
3278       ir->dataSize = data_len;
3279       return true;
3280    }
3281 
3282    strncpy(ir->pData, data, ir->dataSize);
3283    if (ir->dataSize < data_len)
3284       return false;
3285 
3286    ir->dataSize = data_len;
3287    return true;
3288 }
3289 
anv_GetPipelineExecutableInternalRepresentationsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pInternalRepresentationCount,VkPipelineExecutableInternalRepresentationKHR * pInternalRepresentations)3290 VkResult anv_GetPipelineExecutableInternalRepresentationsKHR(
3291     VkDevice                                    device,
3292     const VkPipelineExecutableInfoKHR*          pExecutableInfo,
3293     uint32_t*                                   pInternalRepresentationCount,
3294     VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)
3295 {
3296    ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
3297    VK_OUTARRAY_MAKE(out, pInternalRepresentations,
3298                     pInternalRepresentationCount);
3299    bool incomplete_text = false;
3300 
3301    const struct anv_pipeline_executable *exe =
3302       anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3303 
3304    if (exe->nir) {
3305       vk_outarray_append(&out, ir) {
3306          WRITE_STR(ir->name, "Final NIR");
3307          WRITE_STR(ir->description,
3308                    "Final NIR before going into the back-end compiler");
3309 
3310          if (!write_ir_text(ir, exe->nir))
3311             incomplete_text = true;
3312       }
3313    }
3314 
3315    if (exe->disasm) {
3316       vk_outarray_append(&out, ir) {
3317          WRITE_STR(ir->name, "GEN Assembly");
3318          WRITE_STR(ir->description,
3319                    "Final GEN assembly for the generated shader binary");
3320 
3321          if (!write_ir_text(ir, exe->disasm))
3322             incomplete_text = true;
3323       }
3324    }
3325 
3326    return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
3327 }
3328 
3329 VkResult
anv_GetRayTracingShaderGroupHandlesKHR(VkDevice _device,VkPipeline _pipeline,uint32_t firstGroup,uint32_t groupCount,size_t dataSize,void * pData)3330 anv_GetRayTracingShaderGroupHandlesKHR(
3331     VkDevice                                    _device,
3332     VkPipeline                                  _pipeline,
3333     uint32_t                                    firstGroup,
3334     uint32_t                                    groupCount,
3335     size_t                                      dataSize,
3336     void*                                       pData)
3337 {
3338    ANV_FROM_HANDLE(anv_device, device, _device);
3339    ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
3340 
3341    if (pipeline->type != ANV_PIPELINE_RAY_TRACING)
3342       return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
3343 
3344    struct anv_ray_tracing_pipeline *rt_pipeline =
3345       anv_pipeline_to_ray_tracing(pipeline);
3346 
3347    for (uint32_t i = 0; i < groupCount; i++) {
3348       struct anv_rt_shader_group *group = &rt_pipeline->groups[firstGroup + i];
3349       memcpy(pData, group->handle, sizeof(group->handle));
3350       pData += sizeof(group->handle);
3351    }
3352 
3353    return VK_SUCCESS;
3354 }
3355 
3356 VkResult
anv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(VkDevice _device,VkPipeline pipeline,uint32_t firstGroup,uint32_t groupCount,size_t dataSize,void * pData)3357 anv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(
3358     VkDevice                                    _device,
3359     VkPipeline                                  pipeline,
3360     uint32_t                                    firstGroup,
3361     uint32_t                                    groupCount,
3362     size_t                                      dataSize,
3363     void*                                       pData)
3364 {
3365    ANV_FROM_HANDLE(anv_device, device, _device);
3366    unreachable("Unimplemented");
3367    return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
3368 }
3369 
3370 VkDeviceSize
anv_GetRayTracingShaderGroupStackSizeKHR(VkDevice device,VkPipeline _pipeline,uint32_t group,VkShaderGroupShaderKHR groupShader)3371 anv_GetRayTracingShaderGroupStackSizeKHR(
3372     VkDevice                                    device,
3373     VkPipeline                                  _pipeline,
3374     uint32_t                                    group,
3375     VkShaderGroupShaderKHR                      groupShader)
3376 {
3377    ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
3378    assert(pipeline->type == ANV_PIPELINE_RAY_TRACING);
3379 
3380    struct anv_ray_tracing_pipeline *rt_pipeline =
3381       anv_pipeline_to_ray_tracing(pipeline);
3382 
3383    assert(group < rt_pipeline->group_count);
3384 
3385    struct anv_shader_bin *bin;
3386    switch (groupShader) {
3387    case VK_SHADER_GROUP_SHADER_GENERAL_KHR:
3388       bin = rt_pipeline->groups[group].general;
3389       break;
3390 
3391    case VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR:
3392       bin = rt_pipeline->groups[group].closest_hit;
3393       break;
3394 
3395    case VK_SHADER_GROUP_SHADER_ANY_HIT_KHR:
3396       bin = rt_pipeline->groups[group].any_hit;
3397       break;
3398 
3399    case VK_SHADER_GROUP_SHADER_INTERSECTION_KHR:
3400       bin = rt_pipeline->groups[group].intersection;
3401       break;
3402 
3403    default:
3404       unreachable("Invalid VkShaderGroupShader enum");
3405    }
3406 
3407    if (bin == NULL)
3408       return 0;
3409 
3410    return brw_bs_prog_data_const(bin->prog_data)->max_stack_size;
3411 }
3412