• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * SPDX-License-Identifier: MIT
9  */
10 
11 #include "radv_pipeline.h"
12 #include "meta/radv_meta.h"
13 #include "nir/nir.h"
14 #include "nir/nir_builder.h"
15 #include "nir/nir_serialize.h"
16 #include "nir/radv_nir.h"
17 #include "spirv/nir_spirv.h"
18 #include "util/disk_cache.h"
19 #include "util/os_time.h"
20 #include "util/u_atomic.h"
21 #include "radv_cs.h"
22 #include "radv_debug.h"
23 #include "radv_pipeline_rt.h"
24 #include "radv_rmv.h"
25 #include "radv_shader.h"
26 #include "radv_shader_args.h"
27 #include "vk_pipeline.h"
28 #include "vk_render_pass.h"
29 #include "vk_util.h"
30 
31 #include "util/u_debug.h"
32 #include "ac_binary.h"
33 #include "ac_nir.h"
34 #include "ac_shader_util.h"
35 #include "aco_interface.h"
36 #include "sid.h"
37 #include "vk_format.h"
38 #include "vk_nir_convert_ycbcr.h"
39 #include "vk_ycbcr_conversion.h"
40 #if AMD_LLVM_AVAILABLE
41 #include "ac_llvm_util.h"
42 #endif
43 
44 bool
radv_shader_need_indirect_descriptor_sets(const struct radv_shader * shader)45 radv_shader_need_indirect_descriptor_sets(const struct radv_shader *shader)
46 {
47    const struct radv_userdata_info *loc = radv_get_user_sgpr_info(shader, AC_UD_INDIRECT_DESCRIPTOR_SETS);
48    return loc->sgpr_idx != -1;
49 }
50 
51 bool
radv_pipeline_capture_shaders(const struct radv_device * device,VkPipelineCreateFlags2 flags)52 radv_pipeline_capture_shaders(const struct radv_device *device, VkPipelineCreateFlags2 flags)
53 {
54    const struct radv_physical_device *pdev = radv_device_physical(device);
55    const struct radv_instance *instance = radv_physical_device_instance(pdev);
56 
57    return (flags & VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR) ||
58           (instance->debug_flags & RADV_DEBUG_DUMP_SHADERS) || device->keep_shader_info;
59 }
60 
61 bool
radv_pipeline_capture_shader_stats(const struct radv_device * device,VkPipelineCreateFlags2 flags)62 radv_pipeline_capture_shader_stats(const struct radv_device *device, VkPipelineCreateFlags2 flags)
63 {
64    const struct radv_physical_device *pdev = radv_device_physical(device);
65    const struct radv_instance *instance = radv_physical_device_instance(pdev);
66 
67    /* Capture shader statistics when RGP is enabled to correlate shader hashes with Fossilize. */
68    return (flags & VK_PIPELINE_CREATE_2_CAPTURE_STATISTICS_BIT_KHR) ||
69           (instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) || device->keep_shader_info ||
70           (instance->vk.trace_mode & RADV_TRACE_MODE_RGP);
71 }
72 
73 bool
radv_pipeline_skip_shaders_cache(const struct radv_device * device,const struct radv_pipeline * pipeline)74 radv_pipeline_skip_shaders_cache(const struct radv_device *device, const struct radv_pipeline *pipeline)
75 {
76    const struct radv_physical_device *pdev = radv_device_physical(device);
77    const struct radv_instance *instance = radv_physical_device_instance(pdev);
78 
79    /* Skip the shaders cache when any of the below are true:
80     * - shaders are dumped for debugging (RADV_DEBUG=shaders)
81     * - shaders IR are captured (NIR, backend IR and ASM)
82     * - binaries are captured (driver shouldn't store data to an internal cache)
83     */
84    return (instance->debug_flags & RADV_DEBUG_DUMP_SHADERS) ||
85           (pipeline->create_flags &
86            (VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR | VK_PIPELINE_CREATE_2_CAPTURE_DATA_BIT_KHR));
87 }
88 
89 void
radv_pipeline_init(struct radv_device * device,struct radv_pipeline * pipeline,enum radv_pipeline_type type)90 radv_pipeline_init(struct radv_device *device, struct radv_pipeline *pipeline, enum radv_pipeline_type type)
91 {
92    vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE);
93 
94    pipeline->type = type;
95 }
96 
97 void
radv_pipeline_destroy(struct radv_device * device,struct radv_pipeline * pipeline,const VkAllocationCallbacks * allocator)98 radv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline,
99                       const VkAllocationCallbacks *allocator)
100 {
101    if (pipeline->cache_object)
102       vk_pipeline_cache_object_unref(&device->vk, pipeline->cache_object);
103 
104    switch (pipeline->type) {
105    case RADV_PIPELINE_GRAPHICS:
106       radv_destroy_graphics_pipeline(device, radv_pipeline_to_graphics(pipeline));
107       break;
108    case RADV_PIPELINE_GRAPHICS_LIB:
109       radv_destroy_graphics_lib_pipeline(device, radv_pipeline_to_graphics_lib(pipeline));
110       break;
111    case RADV_PIPELINE_COMPUTE:
112       radv_destroy_compute_pipeline(device, radv_pipeline_to_compute(pipeline));
113       break;
114    case RADV_PIPELINE_RAY_TRACING:
115       radv_destroy_ray_tracing_pipeline(device, radv_pipeline_to_ray_tracing(pipeline));
116       break;
117    default:
118       unreachable("invalid pipeline type");
119    }
120 
121    radv_rmv_log_resource_destroy(device, (uint64_t)radv_pipeline_to_handle(pipeline));
122    vk_object_base_finish(&pipeline->base);
123    vk_free2(&device->vk.alloc, allocator, pipeline);
124 }
125 
126 VKAPI_ATTR void VKAPI_CALL
radv_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)127 radv_DestroyPipeline(VkDevice _device, VkPipeline _pipeline, const VkAllocationCallbacks *pAllocator)
128 {
129    VK_FROM_HANDLE(radv_device, device, _device);
130    VK_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
131 
132    if (!_pipeline)
133       return;
134 
135    radv_pipeline_destroy(device, pipeline, pAllocator);
136 }
137 
138 struct radv_shader_stage_key
radv_pipeline_get_shader_key(const struct radv_device * device,const VkPipelineShaderStageCreateInfo * stage,VkPipelineCreateFlags2 flags,const void * pNext)139 radv_pipeline_get_shader_key(const struct radv_device *device, const VkPipelineShaderStageCreateInfo *stage,
140                              VkPipelineCreateFlags2 flags, const void *pNext)
141 {
142    const struct radv_physical_device *pdev = radv_device_physical(device);
143    const struct radv_instance *instance = radv_physical_device_instance(pdev);
144    gl_shader_stage s = vk_to_mesa_shader_stage(stage->stage);
145    struct vk_pipeline_robustness_state rs;
146    struct radv_shader_stage_key key = {0};
147 
148    key.keep_statistic_info = radv_pipeline_capture_shader_stats(device, flags);
149 
150    if (flags & VK_PIPELINE_CREATE_2_DISABLE_OPTIMIZATION_BIT)
151       key.optimisations_disabled = 1;
152 
153    if (flags & VK_PIPELINE_CREATE_2_VIEW_INDEX_FROM_DEVICE_INDEX_BIT)
154       key.view_index_from_device_index = 1;
155 
156    if (flags & VK_PIPELINE_CREATE_2_INDIRECT_BINDABLE_BIT_EXT)
157       key.indirect_bindable = 1;
158 
159    if (stage->stage & RADV_GRAPHICS_STAGE_BITS) {
160       key.version = instance->drirc.override_graphics_shader_version;
161    } else if (stage->stage & RADV_RT_STAGE_BITS) {
162       key.version = instance->drirc.override_ray_tracing_shader_version;
163    } else {
164       assert(stage->stage == VK_SHADER_STAGE_COMPUTE_BIT);
165       key.version = instance->drirc.override_compute_shader_version;
166    }
167 
168    vk_pipeline_robustness_state_fill(&device->vk, &rs, pNext, stage->pNext);
169 
170    radv_set_stage_key_robustness(&rs, s, &key);
171 
172    const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *const subgroup_size =
173       vk_find_struct_const(stage->pNext, PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO);
174 
175    if (subgroup_size) {
176       if (subgroup_size->requiredSubgroupSize == 32)
177          key.subgroup_required_size = RADV_REQUIRED_WAVE32;
178       else if (subgroup_size->requiredSubgroupSize == 64)
179          key.subgroup_required_size = RADV_REQUIRED_WAVE64;
180       else
181          unreachable("Unsupported required subgroup size.");
182    }
183 
184    if (stage->flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT) {
185       key.subgroup_require_full = 1;
186    }
187 
188    return key;
189 }
190 
191 void
radv_pipeline_stage_init(VkPipelineCreateFlags2 pipeline_flags,const VkPipelineShaderStageCreateInfo * sinfo,const struct radv_pipeline_layout * pipeline_layout,const struct radv_shader_stage_key * stage_key,struct radv_shader_stage * out_stage)192 radv_pipeline_stage_init(VkPipelineCreateFlags2 pipeline_flags, const VkPipelineShaderStageCreateInfo *sinfo,
193                          const struct radv_pipeline_layout *pipeline_layout,
194                          const struct radv_shader_stage_key *stage_key, struct radv_shader_stage *out_stage)
195 {
196    const VkShaderModuleCreateInfo *minfo = vk_find_struct_const(sinfo->pNext, SHADER_MODULE_CREATE_INFO);
197    const VkPipelineShaderStageModuleIdentifierCreateInfoEXT *iinfo =
198       vk_find_struct_const(sinfo->pNext, PIPELINE_SHADER_STAGE_MODULE_IDENTIFIER_CREATE_INFO_EXT);
199 
200    if (sinfo->module == VK_NULL_HANDLE && !minfo && !iinfo)
201       return;
202 
203    memset(out_stage, 0, sizeof(*out_stage));
204 
205    out_stage->stage = vk_to_mesa_shader_stage(sinfo->stage);
206    out_stage->next_stage = MESA_SHADER_NONE;
207    out_stage->entrypoint = sinfo->pName;
208    out_stage->spec_info = sinfo->pSpecializationInfo;
209    out_stage->feedback.flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
210    out_stage->key = *stage_key;
211 
212    if (sinfo->module != VK_NULL_HANDLE) {
213       struct vk_shader_module *module = vk_shader_module_from_handle(sinfo->module);
214 
215       out_stage->spirv.data = module->data;
216       out_stage->spirv.size = module->size;
217       out_stage->spirv.object = &module->base;
218 
219       if (module->nir)
220          out_stage->internal_nir = module->nir;
221    } else if (minfo) {
222       out_stage->spirv.data = (const char *)minfo->pCode;
223       out_stage->spirv.size = minfo->codeSize;
224    }
225 
226    radv_shader_layout_init(pipeline_layout, out_stage->stage, &out_stage->layout);
227 
228    vk_pipeline_hash_shader_stage(pipeline_flags, sinfo, NULL, out_stage->shader_sha1);
229 }
230 
231 void
radv_shader_layout_init(const struct radv_pipeline_layout * pipeline_layout,gl_shader_stage stage,struct radv_shader_layout * layout)232 radv_shader_layout_init(const struct radv_pipeline_layout *pipeline_layout, gl_shader_stage stage,
233                         struct radv_shader_layout *layout)
234 {
235    layout->num_sets = pipeline_layout->num_sets;
236    for (unsigned i = 0; i < pipeline_layout->num_sets; i++) {
237       layout->set[i].layout = pipeline_layout->set[i].layout;
238       layout->set[i].dynamic_offset_start = pipeline_layout->set[i].dynamic_offset_start;
239    }
240 
241    layout->push_constant_size = pipeline_layout->push_constant_size;
242    layout->use_dynamic_descriptors = pipeline_layout->dynamic_offset_count &&
243                                      (pipeline_layout->dynamic_shader_stages & mesa_to_vk_shader_stage(stage));
244 }
245 
246 static const struct vk_ycbcr_conversion_state *
ycbcr_conversion_lookup(const void * data,uint32_t set,uint32_t binding,uint32_t array_index)247 ycbcr_conversion_lookup(const void *data, uint32_t set, uint32_t binding, uint32_t array_index)
248 {
249    const struct radv_shader_layout *layout = data;
250 
251    const struct radv_descriptor_set_layout *set_layout = layout->set[set].layout;
252    const struct vk_ycbcr_conversion_state *ycbcr_samplers = radv_immutable_ycbcr_samplers(set_layout, binding);
253 
254    if (!ycbcr_samplers)
255       return NULL;
256 
257    return ycbcr_samplers + array_index;
258 }
259 
260 static uint8_t
opt_vectorize_callback(const nir_instr * instr,const void * _)261 opt_vectorize_callback(const nir_instr *instr, const void *_)
262 {
263    if (instr->type != nir_instr_type_alu)
264       return 0;
265 
266    const struct radv_device *device = _;
267    const struct radv_physical_device *pdev = radv_device_physical(device);
268    enum amd_gfx_level chip = pdev->info.gfx_level;
269    if (chip < GFX9)
270       return 1;
271 
272    const nir_alu_instr *alu = nir_instr_as_alu(instr);
273    const unsigned bit_size = alu->def.bit_size;
274    if (bit_size != 16)
275       return 1;
276 
277    return aco_nir_op_supports_packed_math_16bit(alu) ? 2 : 1;
278 }
279 
280 static nir_component_mask_t
non_uniform_access_callback(const nir_src * src,void * _)281 non_uniform_access_callback(const nir_src *src, void *_)
282 {
283    if (src->ssa->num_components == 1)
284       return 0x1;
285    return nir_chase_binding(*src).success ? 0x2 : 0x3;
286 }
287 
288 void
radv_postprocess_nir(struct radv_device * device,const struct radv_graphics_state_key * gfx_state,struct radv_shader_stage * stage)289 radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
290                      struct radv_shader_stage *stage)
291 {
292    const struct radv_physical_device *pdev = radv_device_physical(device);
293    const struct radv_instance *instance = radv_physical_device_instance(pdev);
294    enum amd_gfx_level gfx_level = pdev->info.gfx_level;
295    bool progress;
296 
297    /* Wave and workgroup size should already be filled. */
298    assert(stage->info.wave_size && stage->info.workgroup_size);
299 
300    if (stage->stage == MESA_SHADER_FRAGMENT) {
301       if (!stage->key.optimisations_disabled) {
302          NIR_PASS(_, stage->nir, nir_opt_cse);
303       }
304       NIR_PASS(_, stage->nir, radv_nir_lower_fs_intrinsics, stage, gfx_state);
305    }
306 
307    /* LLVM could support more of these in theory. */
308    bool use_llvm = radv_use_llvm_for_stage(pdev, stage->stage);
309    bool has_inverse_ballot = true;
310 #if AMD_LLVM_AVAILABLE
311    has_inverse_ballot = !use_llvm || LLVM_VERSION_MAJOR >= 17;
312 #endif
313    radv_nir_opt_tid_function_options tid_options = {
314       .use_masked_swizzle_amd = true,
315       .use_dpp16_shift_amd = !use_llvm && gfx_level >= GFX8,
316       .use_clustered_rotate = !use_llvm,
317       .hw_subgroup_size = stage->info.wave_size,
318       .hw_ballot_bit_size = has_inverse_ballot ? stage->info.wave_size : 0,
319       .hw_ballot_num_comp = has_inverse_ballot ? 1 : 0,
320    };
321    NIR_PASS(_, stage->nir, radv_nir_opt_tid_function, &tid_options);
322 
323    nir_divergence_analysis(stage->nir);
324    NIR_PASS(_, stage->nir, ac_nir_flag_smem_for_loads, gfx_level, use_llvm, false);
325 
326    NIR_PASS(_, stage->nir, nir_lower_memory_model);
327 
328    nir_load_store_vectorize_options vectorize_opts = {
329       .modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_push_const | nir_var_mem_shared | nir_var_mem_global |
330                nir_var_shader_temp,
331       .callback = ac_nir_mem_vectorize_callback,
332       .cb_data = &(struct ac_nir_config){gfx_level, !use_llvm},
333       .robust_modes = 0,
334       /* On GFX6, read2/write2 is out-of-bounds if the offset register is negative, even if
335        * the final offset is not.
336        */
337       .has_shared2_amd = gfx_level >= GFX7,
338    };
339 
340    if (stage->key.uniform_robustness2)
341       vectorize_opts.robust_modes |= nir_var_mem_ubo;
342 
343    if (stage->key.storage_robustness2)
344       vectorize_opts.robust_modes |= nir_var_mem_ssbo;
345 
346    bool constant_fold_for_push_const = false;
347    if (!stage->key.optimisations_disabled) {
348       progress = false;
349       NIR_PASS(progress, stage->nir, nir_opt_load_store_vectorize, &vectorize_opts);
350       if (progress) {
351          NIR_PASS(_, stage->nir, nir_copy_prop);
352          NIR_PASS(_, stage->nir, nir_opt_shrink_stores, !instance->drirc.disable_shrink_image_store);
353 
354          constant_fold_for_push_const = true;
355 
356          /* Gather info again, to update whether 8/16-bit are used. */
357          nir_shader_gather_info(stage->nir, nir_shader_get_entrypoint(stage->nir));
358       }
359    }
360 
361    enum nir_lower_non_uniform_access_type lower_non_uniform_access_types =
362       nir_lower_non_uniform_ubo_access | nir_lower_non_uniform_ssbo_access | nir_lower_non_uniform_texture_access |
363       nir_lower_non_uniform_image_access;
364 
365    /* In practice, most shaders do not have non-uniform-qualified
366     * accesses (see
367     * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17558#note_1475069)
368     * thus a cheaper and likely to fail check is run first.
369     */
370    if (nir_has_non_uniform_access(stage->nir, lower_non_uniform_access_types)) {
371       if (!stage->key.optimisations_disabled) {
372          NIR_PASS(_, stage->nir, nir_opt_non_uniform_access);
373       }
374 
375       if (!radv_use_llvm_for_stage(pdev, stage->stage)) {
376          nir_lower_non_uniform_access_options options = {
377             .types = lower_non_uniform_access_types,
378             .callback = &non_uniform_access_callback,
379             .callback_data = NULL,
380          };
381          NIR_PASS(_, stage->nir, nir_lower_non_uniform_access, &options);
382       }
383    }
384 
385    progress = false;
386    NIR_PASS(progress, stage->nir, ac_nir_lower_mem_access_bit_sizes, gfx_level, use_llvm);
387    if (progress)
388       constant_fold_for_push_const = true;
389 
390    progress = false;
391    NIR_PASS(progress, stage->nir, nir_vk_lower_ycbcr_tex, ycbcr_conversion_lookup, &stage->layout);
392    /* Gather info in the case that nir_vk_lower_ycbcr_tex might have emitted resinfo instructions. */
393    if (progress)
394       nir_shader_gather_info(stage->nir, nir_shader_get_entrypoint(stage->nir));
395 
396    bool fix_derivs_in_divergent_cf =
397       stage->stage == MESA_SHADER_FRAGMENT && !radv_use_llvm_for_stage(pdev, stage->stage);
398    if (fix_derivs_in_divergent_cf)
399       nir_divergence_analysis(stage->nir);
400 
401    NIR_PASS(_, stage->nir, ac_nir_lower_tex,
402             &(ac_nir_lower_tex_options){
403                .gfx_level = gfx_level,
404                .lower_array_layer_round_even = !pdev->info.conformant_trunc_coord || device->disable_trunc_coord,
405                .fix_derivs_in_divergent_cf = fix_derivs_in_divergent_cf,
406                .max_wqm_vgprs = 64, // TODO: improve spiller and RA support for linear VGPRs
407             });
408 
409    if (stage->nir->info.uses_resource_info_query)
410       NIR_PASS(_, stage->nir, ac_nir_lower_resinfo, gfx_level);
411 
412    /* Ensure split load_push_constant still have constant offsets, for radv_nir_apply_pipeline_layout. */
413    if (constant_fold_for_push_const && stage->args.ac.inline_push_const_mask)
414       NIR_PASS(_, stage->nir, nir_opt_constant_folding);
415 
416    /* TODO: vectorize loads after this to vectorize loading adjacent descriptors */
417    NIR_PASS_V(stage->nir, radv_nir_apply_pipeline_layout, device, stage);
418 
419    if (!stage->key.optimisations_disabled) {
420       NIR_PASS(_, stage->nir, nir_opt_shrink_vectors, true);
421    }
422 
423    NIR_PASS(_, stage->nir, nir_lower_alu_width, opt_vectorize_callback, device);
424 
425    nir_move_options sink_opts = nir_move_const_undef | nir_move_copies;
426 
427    if (!stage->key.optimisations_disabled) {
428       NIR_PASS(_, stage->nir, nir_opt_licm);
429       if (stage->stage != MESA_SHADER_FRAGMENT || !pdev->cache_key.disable_sinking_load_input_fs)
430          sink_opts |= nir_move_load_input;
431 
432       NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts);
433       NIR_PASS(_, stage->nir, nir_opt_move, nir_move_load_input | nir_move_const_undef | nir_move_copies);
434    }
435 
436    /* Lower VS inputs. We need to do this after nir_opt_sink, because
437     * load_input can be reordered, but buffer loads can't.
438     */
439    if (stage->stage == MESA_SHADER_VERTEX) {
440       NIR_PASS(_, stage->nir, radv_nir_lower_vs_inputs, stage, gfx_state, &pdev->info);
441    }
442 
443    /* Lower I/O intrinsics to memory instructions. */
444    bool is_last_vgt_stage = radv_is_last_vgt_stage(stage);
445    bool io_to_mem = radv_nir_lower_io_to_mem(device, stage);
446    bool lowered_ngg = stage->info.is_ngg && is_last_vgt_stage;
447    if (lowered_ngg) {
448       radv_lower_ngg(device, stage, gfx_state);
449    } else if (is_last_vgt_stage) {
450       if (stage->stage != MESA_SHADER_GEOMETRY) {
451          NIR_PASS_V(stage->nir, ac_nir_lower_legacy_vs, gfx_level,
452                     stage->info.outinfo.clip_dist_mask | stage->info.outinfo.cull_dist_mask,
453                     stage->info.outinfo.vs_output_param_offset, stage->info.outinfo.param_exports,
454                     stage->info.outinfo.export_prim_id, false, false, false, stage->info.force_vrs_per_vertex);
455 
456       } else {
457          ac_nir_gs_output_info gs_out_info = {
458             .streams = stage->info.gs.output_streams,
459             .sysval_mask = stage->info.gs.output_usage_mask,
460             .varying_mask = stage->info.gs.output_usage_mask,
461          };
462          NIR_PASS_V(stage->nir, ac_nir_lower_legacy_gs, false, false, &gs_out_info);
463       }
464    } else if (stage->stage == MESA_SHADER_FRAGMENT) {
465       ac_nir_lower_ps_early_options early_options = {
466          .alpha_func = COMPARE_FUNC_ALWAYS,
467          .spi_shader_col_format_hint = ~0,
468       };
469 
470       ac_nir_lower_ps_late_options late_options = {
471          .gfx_level = gfx_level,
472          .family = pdev->info.family,
473          .use_aco = !radv_use_llvm_for_stage(pdev, stage->stage),
474          .bc_optimize_for_persp = G_0286CC_PERSP_CENTER_ENA(stage->info.ps.spi_ps_input_ena) &&
475                                   G_0286CC_PERSP_CENTROID_ENA(stage->info.ps.spi_ps_input_ena),
476          .bc_optimize_for_linear = G_0286CC_LINEAR_CENTER_ENA(stage->info.ps.spi_ps_input_ena) &&
477                                    G_0286CC_LINEAR_CENTROID_ENA(stage->info.ps.spi_ps_input_ena),
478          .uses_discard = true,
479          .no_color_export = stage->info.ps.has_epilog,
480          .no_depth_export = stage->info.ps.exports_mrtz_via_epilog,
481 
482       };
483 
484       if (!late_options.no_color_export) {
485          late_options.dual_src_blend_swizzle = gfx_state->ps.epilog.mrt0_is_dual_src && gfx_level >= GFX11;
486          late_options.color_is_int8 = gfx_state->ps.epilog.color_is_int8;
487          late_options.color_is_int10 = gfx_state->ps.epilog.color_is_int10;
488          late_options.enable_mrt_output_nan_fixup =
489             gfx_state->ps.epilog.enable_mrt_output_nan_fixup && !stage->nir->info.internal;
490          /* Need to filter out unwritten color slots. */
491          early_options.spi_shader_col_format_hint = late_options.spi_shader_col_format =
492             gfx_state->ps.epilog.spi_shader_col_format & stage->info.ps.colors_written;
493          late_options.alpha_to_one = gfx_state->ps.epilog.alpha_to_one;
494       }
495 
496       if (!late_options.no_depth_export) {
497          /* Compared to gfx_state.ps.alpha_to_coverage_via_mrtz,
498           * radv_shader_info.ps.writes_mrt0_alpha need any depth/stencil/sample_mask exist.
499           * ac_nir_lower_ps() require this field to reflect whether alpha via mrtz is really
500           * present.
501           */
502          early_options.keep_alpha_for_mrtz = late_options.alpha_to_coverage_via_mrtz = stage->info.ps.writes_mrt0_alpha;
503       }
504 
505       NIR_PASS_V(stage->nir, ac_nir_lower_ps_early, &early_options);
506       NIR_PASS_V(stage->nir, ac_nir_lower_ps_late, &late_options);
507    }
508 
509    if (radv_shader_should_clear_lds(device, stage->nir)) {
510       const unsigned chunk_size = 16; /* max single store size */
511       const unsigned shared_size = ALIGN(stage->nir->info.shared_size, chunk_size);
512       NIR_PASS(_, stage->nir, nir_clear_shared_memory, shared_size, chunk_size);
513    }
514 
515    /* This must be after lowering resources to descriptor loads and before lowering intrinsics
516     * to args and lowering int64.
517     */
518    if (!radv_use_llvm_for_stage(pdev, stage->stage))
519       ac_nir_optimize_uniform_atomics(stage->nir);
520 
521    NIR_PASS(_, stage->nir, nir_lower_int64);
522 
523    NIR_PASS(_, stage->nir, nir_opt_idiv_const, 8);
524 
525    NIR_PASS(_, stage->nir, nir_lower_idiv,
526             &(nir_lower_idiv_options){
527                .allow_fp16 = gfx_level >= GFX9,
528             });
529 
530    NIR_PASS(_, stage->nir, ac_nir_lower_global_access);
531    NIR_PASS_V(stage->nir, ac_nir_lower_intrinsics_to_args, gfx_level,
532               pdev->info.has_ls_vgpr_init_bug && gfx_state && !gfx_state->vs.has_prolog,
533               radv_select_hw_stage(&stage->info, gfx_level), stage->info.wave_size, stage->info.workgroup_size,
534               &stage->args.ac);
535    NIR_PASS_V(stage->nir, radv_nir_lower_abi, gfx_level, stage, gfx_state, pdev->info.address32_hi);
536    radv_optimize_nir_algebraic(
537       stage->nir, io_to_mem || lowered_ngg || stage->stage == MESA_SHADER_COMPUTE || stage->stage == MESA_SHADER_TASK,
538       gfx_level >= GFX8);
539 
540    NIR_PASS(_, stage->nir, nir_lower_fp16_casts, nir_lower_fp16_split_fp64);
541 
542    if (stage->nir->info.bit_sizes_int & (8 | 16)) {
543       if (gfx_level >= GFX8)
544          nir_divergence_analysis(stage->nir);
545 
546       if (nir_lower_bit_size(stage->nir, ac_nir_lower_bit_size_callback, &gfx_level)) {
547          NIR_PASS(_, stage->nir, nir_opt_constant_folding);
548       }
549    }
550    if (gfx_level >= GFX9) {
551       bool separate_g16 = gfx_level >= GFX10;
552       struct nir_opt_tex_srcs_options opt_srcs_options[] = {
553          {
554             .sampler_dims = ~(BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) | BITFIELD_BIT(GLSL_SAMPLER_DIM_BUF)),
555             .src_types = (1 << nir_tex_src_coord) | (1 << nir_tex_src_lod) | (1 << nir_tex_src_bias) |
556                          (1 << nir_tex_src_min_lod) | (1 << nir_tex_src_ms_index) |
557                          (separate_g16 ? 0 : (1 << nir_tex_src_ddx) | (1 << nir_tex_src_ddy)),
558          },
559          {
560             .sampler_dims = ~BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE),
561             .src_types = (1 << nir_tex_src_ddx) | (1 << nir_tex_src_ddy),
562          },
563       };
564       struct nir_opt_16bit_tex_image_options opt_16bit_options = {
565          .rounding_mode = nir_rounding_mode_undef,
566          .opt_tex_dest_types = nir_type_float | nir_type_int | nir_type_uint,
567          .opt_image_dest_types = nir_type_float | nir_type_int | nir_type_uint,
568          .integer_dest_saturates = true,
569          .opt_image_store_data = true,
570          .opt_image_srcs = true,
571          .opt_srcs_options_count = separate_g16 ? 2 : 1,
572          .opt_srcs_options = opt_srcs_options,
573       };
574       bool run_copy_prop = false;
575       NIR_PASS(run_copy_prop, stage->nir, nir_opt_16bit_tex_image, &opt_16bit_options);
576 
577       /* Optimizing 16bit texture/image dests leaves scalar moves that stops
578        * nir_opt_vectorize from vectorzing the alu uses of them.
579        */
580       if (run_copy_prop) {
581          NIR_PASS(_, stage->nir, nir_copy_prop);
582          NIR_PASS(_, stage->nir, nir_opt_dce);
583       }
584 
585       if (!stage->key.optimisations_disabled &&
586           ((stage->nir->info.bit_sizes_int | stage->nir->info.bit_sizes_float) & 16)) {
587          NIR_PASS(_, stage->nir, nir_opt_vectorize, opt_vectorize_callback, device);
588       }
589    }
590 
591    /* cleanup passes */
592    NIR_PASS(_, stage->nir, nir_lower_alu_width, opt_vectorize_callback, device);
593 
594    /* This pass changes the global float control mode to RTZ, so can't be used
595     * with LLVM, which only supports RTNE, or RT, where the mode needs to match
596     * across separately compiled stages.
597     */
598    if (!radv_use_llvm_for_stage(pdev, stage->stage) && !gl_shader_stage_is_rt(stage->stage))
599       NIR_PASS(_, stage->nir, ac_nir_opt_pack_half, gfx_level);
600 
601    NIR_PASS(_, stage->nir, nir_lower_load_const_to_scalar);
602    NIR_PASS(_, stage->nir, nir_copy_prop);
603    NIR_PASS(_, stage->nir, nir_opt_dce);
604 
605    if (!stage->key.optimisations_disabled) {
606       sink_opts |= nir_move_comparisons | nir_move_load_ubo | nir_move_load_ssbo | nir_move_alu;
607       NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts);
608 
609       nir_move_options move_opts = nir_move_const_undef | nir_move_load_ubo | nir_move_load_input |
610                                    nir_move_comparisons | nir_move_copies | nir_move_alu;
611       NIR_PASS(_, stage->nir, nir_opt_move, move_opts);
612 
613       /* Run nir_opt_move again to make sure that comparision are as close as possible to the first use to prevent SCC
614        * spilling.
615        */
616       NIR_PASS(_, stage->nir, nir_opt_move, nir_move_comparisons);
617    }
618 }
619 
620 bool
radv_shader_should_clear_lds(const struct radv_device * device,const nir_shader * shader)621 radv_shader_should_clear_lds(const struct radv_device *device, const nir_shader *shader)
622 {
623    const struct radv_physical_device *pdev = radv_device_physical(device);
624    const struct radv_instance *instance = radv_physical_device_instance(pdev);
625 
626    return (shader->info.stage == MESA_SHADER_COMPUTE || shader->info.stage == MESA_SHADER_MESH ||
627            shader->info.stage == MESA_SHADER_TASK) &&
628           shader->info.shared_size > 0 && instance->drirc.clear_lds;
629 }
630 
631 static uint32_t
radv_get_executable_count(struct radv_pipeline * pipeline)632 radv_get_executable_count(struct radv_pipeline *pipeline)
633 {
634    uint32_t ret = 0;
635 
636    if (pipeline->type == RADV_PIPELINE_RAY_TRACING) {
637       struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
638       for (uint32_t i = 0; i < rt_pipeline->stage_count; i++)
639          ret += rt_pipeline->stages[i].shader ? 1 : 0;
640    }
641 
642    for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
643       if (!pipeline->shaders[i])
644          continue;
645 
646       ret += 1u;
647       if (i == MESA_SHADER_GEOMETRY && pipeline->gs_copy_shader) {
648          ret += 1u;
649       }
650    }
651 
652    return ret;
653 }
654 
655 static struct radv_shader *
radv_get_shader_from_executable_index(struct radv_pipeline * pipeline,int index,gl_shader_stage * stage)656 radv_get_shader_from_executable_index(struct radv_pipeline *pipeline, int index, gl_shader_stage *stage)
657 {
658    if (pipeline->type == RADV_PIPELINE_RAY_TRACING) {
659       struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
660       for (uint32_t i = 0; i < rt_pipeline->stage_count; i++) {
661          struct radv_ray_tracing_stage *rt_stage = &rt_pipeline->stages[i];
662          if (!rt_stage->shader)
663             continue;
664 
665          if (!index) {
666             *stage = rt_stage->stage;
667             return rt_stage->shader;
668          }
669 
670          index--;
671       }
672    }
673 
674    for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
675       if (!pipeline->shaders[i])
676          continue;
677       if (!index) {
678          *stage = i;
679          return pipeline->shaders[i];
680       }
681 
682       --index;
683 
684       if (i == MESA_SHADER_GEOMETRY && pipeline->gs_copy_shader) {
685          if (!index) {
686             *stage = i;
687             return pipeline->gs_copy_shader;
688          }
689          --index;
690       }
691    }
692 
693    *stage = -1;
694    return NULL;
695 }
696 
697 /* Basically strlcpy (which does not exist on linux) specialized for
698  * descriptions. */
699 static void
desc_copy(char * desc,const char * src)700 desc_copy(char *desc, const char *src)
701 {
702    int len = strlen(src);
703    assert(len < VK_MAX_DESCRIPTION_SIZE);
704    memcpy(desc, src, len);
705    memset(desc + len, 0, VK_MAX_DESCRIPTION_SIZE - len);
706 }
707 
708 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPipelineExecutablePropertiesKHR(VkDevice _device,const VkPipelineInfoKHR * pPipelineInfo,uint32_t * pExecutableCount,VkPipelineExecutablePropertiesKHR * pProperties)709 radv_GetPipelineExecutablePropertiesKHR(VkDevice _device, const VkPipelineInfoKHR *pPipelineInfo,
710                                         uint32_t *pExecutableCount, VkPipelineExecutablePropertiesKHR *pProperties)
711 {
712    VK_FROM_HANDLE(radv_pipeline, pipeline, pPipelineInfo->pipeline);
713    const uint32_t total_count = radv_get_executable_count(pipeline);
714 
715    if (!pProperties) {
716       *pExecutableCount = total_count;
717       return VK_SUCCESS;
718    }
719 
720    const uint32_t count = MIN2(total_count, *pExecutableCount);
721    for (uint32_t executable_idx = 0; executable_idx < count; executable_idx++) {
722       gl_shader_stage stage;
723       struct radv_shader *shader = radv_get_shader_from_executable_index(pipeline, executable_idx, &stage);
724 
725       pProperties[executable_idx].stages = mesa_to_vk_shader_stage(stage);
726 
727       const char *name = _mesa_shader_stage_to_string(stage);
728       const char *description = NULL;
729       switch (stage) {
730       case MESA_SHADER_VERTEX:
731          description = "Vulkan Vertex Shader";
732          break;
733       case MESA_SHADER_TESS_CTRL:
734          if (!pipeline->shaders[MESA_SHADER_VERTEX]) {
735             pProperties[executable_idx].stages |= VK_SHADER_STAGE_VERTEX_BIT;
736             name = "vertex + tessellation control";
737             description = "Combined Vulkan Vertex and Tessellation Control Shaders";
738          } else {
739             description = "Vulkan Tessellation Control Shader";
740          }
741          break;
742       case MESA_SHADER_TESS_EVAL:
743          description = "Vulkan Tessellation Evaluation Shader";
744          break;
745       case MESA_SHADER_GEOMETRY:
746          if (shader->info.type == RADV_SHADER_TYPE_GS_COPY) {
747             name = "geometry copy";
748             description = "Extra shader stage that loads the GS output ringbuffer into the rasterizer";
749             break;
750          }
751 
752          if (pipeline->shaders[MESA_SHADER_TESS_CTRL] && !pipeline->shaders[MESA_SHADER_TESS_EVAL]) {
753             pProperties[executable_idx].stages |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
754             name = "tessellation evaluation + geometry";
755             description = "Combined Vulkan Tessellation Evaluation and Geometry Shaders";
756          } else if (!pipeline->shaders[MESA_SHADER_TESS_CTRL] && !pipeline->shaders[MESA_SHADER_VERTEX]) {
757             pProperties[executable_idx].stages |= VK_SHADER_STAGE_VERTEX_BIT;
758             name = "vertex + geometry";
759             description = "Combined Vulkan Vertex and Geometry Shaders";
760          } else {
761             description = "Vulkan Geometry Shader";
762          }
763          break;
764       case MESA_SHADER_FRAGMENT:
765          description = "Vulkan Fragment Shader";
766          break;
767       case MESA_SHADER_COMPUTE:
768          description = "Vulkan Compute Shader";
769          break;
770       case MESA_SHADER_MESH:
771          description = "Vulkan Mesh Shader";
772          break;
773       case MESA_SHADER_TASK:
774          description = "Vulkan Task Shader";
775          break;
776       case MESA_SHADER_RAYGEN:
777          description = "Vulkan Ray Generation Shader";
778          break;
779       case MESA_SHADER_ANY_HIT:
780          description = "Vulkan Any-Hit Shader";
781          break;
782       case MESA_SHADER_CLOSEST_HIT:
783          description = "Vulkan Closest-Hit Shader";
784          break;
785       case MESA_SHADER_MISS:
786          description = "Vulkan Miss Shader";
787          break;
788       case MESA_SHADER_INTERSECTION:
789          description = "Shader responsible for traversing the acceleration structure";
790          break;
791       case MESA_SHADER_CALLABLE:
792          description = "Vulkan Callable Shader";
793          break;
794       default:
795          unreachable("Unsupported shader stage");
796       }
797 
798       pProperties[executable_idx].subgroupSize = shader->info.wave_size;
799       desc_copy(pProperties[executable_idx].name, name);
800       desc_copy(pProperties[executable_idx].description, description);
801    }
802 
803    VkResult result = *pExecutableCount < total_count ? VK_INCOMPLETE : VK_SUCCESS;
804    *pExecutableCount = count;
805    return result;
806 }
807 
808 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPipelineExecutableStatisticsKHR(VkDevice _device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pStatisticCount,VkPipelineExecutableStatisticKHR * pStatistics)809 radv_GetPipelineExecutableStatisticsKHR(VkDevice _device, const VkPipelineExecutableInfoKHR *pExecutableInfo,
810                                         uint32_t *pStatisticCount, VkPipelineExecutableStatisticKHR *pStatistics)
811 {
812    VK_FROM_HANDLE(radv_device, device, _device);
813    VK_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline);
814    gl_shader_stage stage;
815    struct radv_shader *shader =
816       radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage);
817 
818    const struct radv_physical_device *pdev = radv_device_physical(device);
819    const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
820 
821    unsigned lds_increment =
822       gfx_level >= GFX11 && stage == MESA_SHADER_FRAGMENT ? 1024 : pdev->info.lds_encode_granularity;
823 
824    VkPipelineExecutableStatisticKHR *s = pStatistics;
825    VkPipelineExecutableStatisticKHR *end = s + (pStatistics ? *pStatisticCount : 0);
826    VkResult result = VK_SUCCESS;
827 
828    if (s < end) {
829       desc_copy(s->name, "Driver pipeline hash");
830       desc_copy(s->description, "Driver pipeline hash used by RGP");
831       s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
832       s->value.u64 = pipeline->pipeline_hash;
833    }
834    ++s;
835 
836    if (s < end) {
837       desc_copy(s->name, "SGPRs");
838       desc_copy(s->description, "Number of SGPR registers allocated per subgroup");
839       s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
840       s->value.u64 = shader->config.num_sgprs;
841    }
842    ++s;
843 
844    if (s < end) {
845       desc_copy(s->name, "VGPRs");
846       desc_copy(s->description, "Number of VGPR registers allocated per subgroup");
847       s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
848       s->value.u64 = shader->config.num_vgprs;
849    }
850    ++s;
851 
852    if (s < end) {
853       desc_copy(s->name, "Spilled SGPRs");
854       desc_copy(s->description, "Number of SGPR registers spilled per subgroup");
855       s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
856       s->value.u64 = shader->config.spilled_sgprs;
857    }
858    ++s;
859 
860    if (s < end) {
861       desc_copy(s->name, "Spilled VGPRs");
862       desc_copy(s->description, "Number of VGPR registers spilled per subgroup");
863       s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
864       s->value.u64 = shader->config.spilled_vgprs;
865    }
866    ++s;
867 
868    if (s < end) {
869       desc_copy(s->name, "Code size");
870       desc_copy(s->description, "Code size in bytes");
871       s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
872       s->value.u64 = shader->exec_size;
873    }
874    ++s;
875 
876    if (s < end) {
877       desc_copy(s->name, "LDS size");
878       desc_copy(s->description, "LDS size in bytes per workgroup");
879       s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
880       s->value.u64 = shader->config.lds_size * lds_increment;
881    }
882    ++s;
883 
884    if (s < end) {
885       desc_copy(s->name, "Scratch size");
886       desc_copy(s->description, "Private memory in bytes per subgroup");
887       s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
888       s->value.u64 = shader->config.scratch_bytes_per_wave;
889    }
890    ++s;
891 
892    if (s < end) {
893       desc_copy(s->name, "Subgroups per SIMD");
894       desc_copy(s->description, "The maximum number of subgroups in flight on a SIMD unit");
895       s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
896       s->value.u64 = shader->max_waves;
897    }
898    ++s;
899 
900    if (s < end) {
901       desc_copy(s->name, "Combined inputs");
902       desc_copy(s->description, "Number of input slots reserved for the shader (including merged stages)");
903       s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
904       s->value.u64 = 0;
905 
906       switch (stage) {
907       case MESA_SHADER_VERTEX:
908          if (gfx_level <= GFX8 || (!shader->info.vs.as_es && !shader->info.vs.as_ls)) {
909             /* VS inputs when VS is a separate stage */
910             s->value.u64 += util_bitcount(shader->info.vs.input_slot_usage_mask);
911          }
912          break;
913 
914       case MESA_SHADER_TESS_CTRL:
915          if (gfx_level >= GFX9) {
916             /* VS inputs when pipeline has tess */
917             s->value.u64 += util_bitcount(shader->info.vs.input_slot_usage_mask);
918          }
919 
920          /* VS -> TCS inputs */
921          s->value.u64 += shader->info.tcs.num_linked_inputs;
922          break;
923 
924       case MESA_SHADER_TESS_EVAL:
925          if (gfx_level <= GFX8 || !shader->info.tes.as_es) {
926             /* TCS -> TES inputs when TES is a separate stage */
927             s->value.u64 += shader->info.tes.num_linked_inputs + shader->info.tes.num_linked_patch_inputs;
928          }
929          break;
930 
931       case MESA_SHADER_GEOMETRY:
932          /* The IO stats of the GS copy shader are already reflected by GS and FS, so leave it empty. */
933          if (shader->info.type == RADV_SHADER_TYPE_GS_COPY)
934             break;
935 
936          if (gfx_level >= GFX9) {
937             if (shader->info.gs.es_type == MESA_SHADER_VERTEX) {
938                /* VS inputs when pipeline has GS but no tess */
939                s->value.u64 += util_bitcount(shader->info.vs.input_slot_usage_mask);
940             } else if (shader->info.gs.es_type == MESA_SHADER_TESS_EVAL) {
941                /* TCS -> TES inputs when pipeline has GS */
942                s->value.u64 += shader->info.tes.num_linked_inputs + shader->info.tes.num_linked_patch_inputs;
943             }
944          }
945 
946          /* VS -> GS or TES -> GS inputs */
947          s->value.u64 += shader->info.gs.num_linked_inputs;
948          break;
949 
950       case MESA_SHADER_FRAGMENT:
951          s->value.u64 += shader->info.ps.num_inputs;
952          break;
953 
954       default:
955          /* Other stages don't have IO or we are not interested in them. */
956          break;
957       }
958    }
959    ++s;
960 
961    if (s < end) {
962       desc_copy(s->name, "Combined outputs");
963       desc_copy(s->description, "Number of output slots reserved for the shader (including merged stages)");
964       s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
965       s->value.u64 = 0;
966 
967       switch (stage) {
968       case MESA_SHADER_VERTEX:
969          if (!shader->info.vs.as_ls && !shader->info.vs.as_es) {
970             /* VS -> FS outputs. */
971             s->value.u64 += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
972                             shader->info.outinfo.prim_param_exports;
973          } else if (gfx_level <= GFX8) {
974             /* VS -> TCS, VS -> GS outputs on GFX6-8 */
975             s->value.u64 += shader->info.vs.num_linked_outputs;
976          }
977          break;
978 
979       case MESA_SHADER_TESS_CTRL:
980          if (gfx_level >= GFX9) {
981             /* VS -> TCS outputs on GFX9+ */
982             s->value.u64 += shader->info.vs.num_linked_outputs;
983          }
984 
985          /* TCS -> TES outputs */
986          s->value.u64 += shader->info.tcs.num_linked_outputs + shader->info.tcs.num_linked_patch_outputs;
987          break;
988 
989       case MESA_SHADER_TESS_EVAL:
990          if (!shader->info.tes.as_es) {
991             /* TES -> FS outputs */
992             s->value.u64 += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
993                             shader->info.outinfo.prim_param_exports;
994          } else if (gfx_level <= GFX8) {
995             /* TES -> GS outputs on GFX6-8 */
996             s->value.u64 += shader->info.tes.num_linked_outputs;
997          }
998          break;
999 
1000       case MESA_SHADER_GEOMETRY:
1001          /* The IO stats of the GS copy shader are already reflected by GS and FS, so leave it empty. */
1002          if (shader->info.type == RADV_SHADER_TYPE_GS_COPY)
1003             break;
1004 
1005          if (gfx_level >= GFX9) {
1006             if (shader->info.gs.es_type == MESA_SHADER_VERTEX) {
1007                /* VS -> GS outputs on GFX9+ */
1008                s->value.u64 += shader->info.vs.num_linked_outputs;
1009             } else if (shader->info.gs.es_type == MESA_SHADER_TESS_EVAL) {
1010                /* TES -> GS outputs on GFX9+ */
1011                s->value.u64 += shader->info.tes.num_linked_outputs;
1012             }
1013          }
1014 
1015          if (shader->info.is_ngg) {
1016             /* GS -> FS outputs (GFX10+ NGG) */
1017             s->value.u64 += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
1018                             shader->info.outinfo.prim_param_exports;
1019          } else {
1020             /* GS -> FS outputs (GFX6-10.3 legacy) */
1021             s->value.u64 += shader->info.gs.gsvs_vertex_size / 16;
1022          }
1023          break;
1024 
1025       case MESA_SHADER_MESH:
1026          /* MS -> FS outputs */
1027          s->value.u64 += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
1028                          shader->info.outinfo.prim_param_exports;
1029          break;
1030 
1031       case MESA_SHADER_FRAGMENT:
1032          s->value.u64 += DIV_ROUND_UP(util_bitcount(shader->info.ps.colors_written), 4) + !!shader->info.ps.writes_z +
1033                          !!shader->info.ps.writes_stencil + !!shader->info.ps.writes_sample_mask +
1034                          !!shader->info.ps.writes_mrt0_alpha;
1035          break;
1036 
1037       default:
1038          /* Other stages don't have IO or we are not interested in them. */
1039          break;
1040       }
1041    }
1042    ++s;
1043 
1044    if (shader->statistics) {
1045       for (unsigned i = 0; i < aco_num_statistics; i++) {
1046          const struct aco_compiler_statistic_info *info = &aco_statistic_infos[i];
1047          if (s < end) {
1048             desc_copy(s->name, info->name);
1049             desc_copy(s->description, info->desc);
1050             s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
1051             s->value.u64 = shader->statistics[i];
1052          }
1053          ++s;
1054       }
1055    }
1056 
1057    if (!pStatistics)
1058       *pStatisticCount = s - pStatistics;
1059    else if (s > end) {
1060       *pStatisticCount = end - pStatistics;
1061       result = VK_INCOMPLETE;
1062    } else {
1063       *pStatisticCount = s - pStatistics;
1064    }
1065 
1066    return result;
1067 }
1068 
1069 static VkResult
radv_copy_representation(void * data,size_t * data_size,const char * src)1070 radv_copy_representation(void *data, size_t *data_size, const char *src)
1071 {
1072    size_t total_size = strlen(src) + 1;
1073 
1074    if (!data) {
1075       *data_size = total_size;
1076       return VK_SUCCESS;
1077    }
1078 
1079    size_t size = MIN2(total_size, *data_size);
1080 
1081    memcpy(data, src, size);
1082    if (size)
1083       *((char *)data + size - 1) = 0;
1084    return size < total_size ? VK_INCOMPLETE : VK_SUCCESS;
1085 }
1086 
1087 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPipelineExecutableInternalRepresentationsKHR(VkDevice _device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pInternalRepresentationCount,VkPipelineExecutableInternalRepresentationKHR * pInternalRepresentations)1088 radv_GetPipelineExecutableInternalRepresentationsKHR(
1089    VkDevice _device, const VkPipelineExecutableInfoKHR *pExecutableInfo, uint32_t *pInternalRepresentationCount,
1090    VkPipelineExecutableInternalRepresentationKHR *pInternalRepresentations)
1091 {
1092    VK_FROM_HANDLE(radv_device, device, _device);
1093    VK_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline);
1094    const struct radv_physical_device *pdev = radv_device_physical(device);
1095    gl_shader_stage stage;
1096    struct radv_shader *shader =
1097       radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage);
1098 
1099    VkPipelineExecutableInternalRepresentationKHR *p = pInternalRepresentations;
1100    VkPipelineExecutableInternalRepresentationKHR *end =
1101       p + (pInternalRepresentations ? *pInternalRepresentationCount : 0);
1102    VkResult result = VK_SUCCESS;
1103    /* optimized NIR */
1104    if (p < end) {
1105       p->isText = true;
1106       desc_copy(p->name, "NIR Shader(s)");
1107       desc_copy(p->description, "The optimized NIR shader(s)");
1108       if (radv_copy_representation(p->pData, &p->dataSize, shader->nir_string) != VK_SUCCESS)
1109          result = VK_INCOMPLETE;
1110    }
1111    ++p;
1112 
1113    /* backend IR */
1114    if (p < end) {
1115       p->isText = true;
1116       if (radv_use_llvm_for_stage(pdev, stage)) {
1117          desc_copy(p->name, "LLVM IR");
1118          desc_copy(p->description, "The LLVM IR after some optimizations");
1119       } else {
1120          desc_copy(p->name, "ACO IR");
1121          desc_copy(p->description, "The ACO IR after some optimizations");
1122       }
1123       if (radv_copy_representation(p->pData, &p->dataSize, shader->ir_string) != VK_SUCCESS)
1124          result = VK_INCOMPLETE;
1125    }
1126    ++p;
1127 
1128    /* Disassembler */
1129    if (p < end && shader->disasm_string) {
1130       p->isText = true;
1131       desc_copy(p->name, "Assembly");
1132       desc_copy(p->description, "Final Assembly");
1133       if (radv_copy_representation(p->pData, &p->dataSize, shader->disasm_string) != VK_SUCCESS)
1134          result = VK_INCOMPLETE;
1135    }
1136    ++p;
1137 
1138    if (!pInternalRepresentations)
1139       *pInternalRepresentationCount = p - pInternalRepresentations;
1140    else if (p > end) {
1141       result = VK_INCOMPLETE;
1142       *pInternalRepresentationCount = end - pInternalRepresentations;
1143    } else {
1144       *pInternalRepresentationCount = p - pInternalRepresentations;
1145    }
1146 
1147    return result;
1148 }
1149 
1150 static void
vk_shader_module_finish(void * _module)1151 vk_shader_module_finish(void *_module)
1152 {
1153    struct vk_shader_module *module = _module;
1154    vk_object_base_finish(&module->base);
1155 }
1156 
1157 VkPipelineShaderStageCreateInfo *
radv_copy_shader_stage_create_info(struct radv_device * device,uint32_t stageCount,const VkPipelineShaderStageCreateInfo * pStages,void * mem_ctx)1158 radv_copy_shader_stage_create_info(struct radv_device *device, uint32_t stageCount,
1159                                    const VkPipelineShaderStageCreateInfo *pStages, void *mem_ctx)
1160 {
1161    VkPipelineShaderStageCreateInfo *new_stages;
1162 
1163    size_t size = sizeof(VkPipelineShaderStageCreateInfo) * stageCount;
1164    new_stages = ralloc_size(mem_ctx, size);
1165    if (!new_stages)
1166       return NULL;
1167 
1168    if (size)
1169       memcpy(new_stages, pStages, size);
1170 
1171    for (uint32_t i = 0; i < stageCount; i++) {
1172       VK_FROM_HANDLE(vk_shader_module, module, new_stages[i].module);
1173 
1174       const VkShaderModuleCreateInfo *minfo = vk_find_struct_const(pStages[i].pNext, SHADER_MODULE_CREATE_INFO);
1175 
1176       if (module) {
1177          struct vk_shader_module *new_module = ralloc_size(mem_ctx, sizeof(struct vk_shader_module) + module->size);
1178          if (!new_module)
1179             return NULL;
1180 
1181          ralloc_set_destructor(new_module, vk_shader_module_finish);
1182          vk_object_base_init(&device->vk, &new_module->base, VK_OBJECT_TYPE_SHADER_MODULE);
1183 
1184          new_module->nir = NULL;
1185          memcpy(new_module->hash, module->hash, sizeof(module->hash));
1186          new_module->size = module->size;
1187          memcpy(new_module->data, module->data, module->size);
1188 
1189          module = new_module;
1190       } else if (minfo) {
1191          module = ralloc_size(mem_ctx, sizeof(struct vk_shader_module) + minfo->codeSize);
1192          if (!module)
1193             return NULL;
1194 
1195          vk_shader_module_init(&device->vk, module, minfo);
1196       }
1197 
1198       if (module) {
1199          const VkSpecializationInfo *spec = new_stages[i].pSpecializationInfo;
1200          if (spec) {
1201             VkSpecializationInfo *new_spec = ralloc(mem_ctx, VkSpecializationInfo);
1202             if (!new_spec)
1203                return NULL;
1204 
1205             new_spec->mapEntryCount = spec->mapEntryCount;
1206             uint32_t map_entries_size = sizeof(VkSpecializationMapEntry) * spec->mapEntryCount;
1207             new_spec->pMapEntries = ralloc_size(mem_ctx, map_entries_size);
1208             if (!new_spec->pMapEntries)
1209                return NULL;
1210             memcpy((void *)new_spec->pMapEntries, spec->pMapEntries, map_entries_size);
1211 
1212             new_spec->dataSize = spec->dataSize;
1213             new_spec->pData = ralloc_size(mem_ctx, spec->dataSize);
1214             if (!new_spec->pData)
1215                return NULL;
1216             memcpy((void *)new_spec->pData, spec->pData, spec->dataSize);
1217 
1218             new_stages[i].pSpecializationInfo = new_spec;
1219          }
1220 
1221          new_stages[i].module = vk_shader_module_to_handle(module);
1222          new_stages[i].pName = ralloc_strdup(mem_ctx, new_stages[i].pName);
1223          if (!new_stages[i].pName)
1224             return NULL;
1225          new_stages[i].pNext = NULL;
1226       }
1227    }
1228 
1229    return new_stages;
1230 }
1231 
1232 void
radv_pipeline_hash(const struct radv_device * device,const struct radv_pipeline_layout * pipeline_layout,struct mesa_sha1 * ctx)1233 radv_pipeline_hash(const struct radv_device *device, const struct radv_pipeline_layout *pipeline_layout,
1234                    struct mesa_sha1 *ctx)
1235 {
1236    _mesa_sha1_update(ctx, device->cache_hash, sizeof(device->cache_hash));
1237    if (pipeline_layout)
1238       _mesa_sha1_update(ctx, pipeline_layout->hash, sizeof(pipeline_layout->hash));
1239 }
1240 
1241 void
radv_pipeline_hash_shader_stage(VkPipelineCreateFlags2 pipeline_flags,const VkPipelineShaderStageCreateInfo * sinfo,const struct radv_shader_stage_key * stage_key,struct mesa_sha1 * ctx)1242 radv_pipeline_hash_shader_stage(VkPipelineCreateFlags2 pipeline_flags, const VkPipelineShaderStageCreateInfo *sinfo,
1243                                 const struct radv_shader_stage_key *stage_key, struct mesa_sha1 *ctx)
1244 {
1245    unsigned char shader_sha1[SHA1_DIGEST_LENGTH];
1246 
1247    vk_pipeline_hash_shader_stage(pipeline_flags, sinfo, NULL, shader_sha1);
1248 
1249    _mesa_sha1_update(ctx, shader_sha1, sizeof(shader_sha1));
1250    _mesa_sha1_update(ctx, stage_key, sizeof(*stage_key));
1251 }
1252