• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Google
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir/nir.h"
25 #include "nir/nir_builder.h"
26 
27 #include "nir/radv_nir.h"
28 #include "radv_debug.h"
29 #include "radv_private.h"
30 #include "radv_shader.h"
31 #include "vk_pipeline.h"
32 
33 struct rt_handle_hash_entry {
34    uint32_t key;
35    char hash[20];
36 };
37 
38 static uint32_t
handle_from_stages(struct radv_device * device,const struct radv_ray_tracing_stage * stages,unsigned stage_count,bool replay_namespace)39 handle_from_stages(struct radv_device *device, const struct radv_ray_tracing_stage *stages, unsigned stage_count,
40                    bool replay_namespace)
41 {
42    struct mesa_sha1 ctx;
43    _mesa_sha1_init(&ctx);
44 
45    for (uint32_t i = 0; i < stage_count; i++)
46       _mesa_sha1_update(&ctx, stages[i].sha1, SHA1_DIGEST_LENGTH);
47 
48    unsigned char hash[20];
49    _mesa_sha1_final(&ctx, hash);
50 
51    uint32_t ret;
52    memcpy(&ret, hash, sizeof(ret));
53 
54    /* Leave the low half for resume shaders etc. */
55    ret |= 1u << 31;
56 
57    /* Ensure we have dedicated space for replayable shaders */
58    ret &= ~(1u << 30);
59    ret |= replay_namespace << 30;
60 
61    simple_mtx_lock(&device->rt_handles_mtx);
62 
63    struct hash_entry *he = NULL;
64    for (;;) {
65       he = _mesa_hash_table_search(device->rt_handles, &ret);
66       if (!he)
67          break;
68 
69       if (memcmp(he->data, hash, sizeof(hash)) == 0)
70          break;
71 
72       ++ret;
73    }
74 
75    if (!he) {
76       struct rt_handle_hash_entry *e = ralloc(device->rt_handles, struct rt_handle_hash_entry);
77       e->key = ret;
78       memcpy(e->hash, hash, sizeof(e->hash));
79       _mesa_hash_table_insert(device->rt_handles, &e->key, &e->hash);
80    }
81 
82    simple_mtx_unlock(&device->rt_handles_mtx);
83 
84    return ret;
85 }
86 
87 static void
radv_generate_rt_shaders_key(const struct radv_device * device,const struct radv_ray_tracing_pipeline * pipeline,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,struct radv_shader_stage_key * stage_keys)88 radv_generate_rt_shaders_key(const struct radv_device *device, const struct radv_ray_tracing_pipeline *pipeline,
89                              const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
90                              struct radv_shader_stage_key *stage_keys)
91 {
92    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
93       const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[i];
94       gl_shader_stage s = vk_to_mesa_shader_stage(stage->stage);
95 
96       stage_keys[s] = radv_pipeline_get_shader_key(device, stage, pipeline->base.base.create_flags, pCreateInfo->pNext);
97    }
98 
99    if (pCreateInfo->pLibraryInfo) {
100       for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) {
101          RADV_FROM_HANDLE(radv_pipeline, pipeline_lib, pCreateInfo->pLibraryInfo->pLibraries[i]);
102          struct radv_ray_tracing_pipeline *library_pipeline = radv_pipeline_to_ray_tracing(pipeline_lib);
103          /* apply shader robustness from merged shaders */
104          if (library_pipeline->traversal_storage_robustness2)
105             stage_keys[MESA_SHADER_INTERSECTION].storage_robustness2 = true;
106 
107          if (library_pipeline->traversal_uniform_robustness2)
108             stage_keys[MESA_SHADER_INTERSECTION].uniform_robustness2 = true;
109       }
110    }
111 }
112 
113 static VkResult
radv_create_group_handles(struct radv_device * device,const struct radv_ray_tracing_pipeline * pipeline,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,const struct radv_ray_tracing_stage * stages,struct radv_ray_tracing_group * groups)114 radv_create_group_handles(struct radv_device *device, const struct radv_ray_tracing_pipeline *pipeline,
115                           const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
116                           const struct radv_ray_tracing_stage *stages, struct radv_ray_tracing_group *groups)
117 {
118    bool capture_replay =
119       pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY_BIT_KHR;
120    for (unsigned i = 0; i < pCreateInfo->groupCount; ++i) {
121       const VkRayTracingShaderGroupCreateInfoKHR *group_info = &pCreateInfo->pGroups[i];
122       switch (group_info->type) {
123       case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
124          if (group_info->generalShader != VK_SHADER_UNUSED_KHR)
125             groups[i].handle.general_index =
126                handle_from_stages(device, &stages[group_info->generalShader], 1, capture_replay);
127 
128          break;
129       case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR:
130          if (group_info->closestHitShader != VK_SHADER_UNUSED_KHR)
131             groups[i].handle.closest_hit_index =
132                handle_from_stages(device, &stages[group_info->closestHitShader], 1, capture_replay);
133 
134          if (group_info->intersectionShader != VK_SHADER_UNUSED_KHR) {
135             struct radv_ray_tracing_stage temp_stages[2];
136             unsigned cnt = 0;
137 
138             temp_stages[cnt++] = stages[group_info->intersectionShader];
139 
140             if (group_info->anyHitShader != VK_SHADER_UNUSED_KHR)
141                temp_stages[cnt++] = stages[group_info->anyHitShader];
142 
143             groups[i].handle.intersection_index = handle_from_stages(device, temp_stages, cnt, capture_replay);
144          }
145          break;
146       case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
147          if (group_info->closestHitShader != VK_SHADER_UNUSED_KHR)
148             groups[i].handle.closest_hit_index =
149                handle_from_stages(device, &stages[group_info->closestHitShader], 1, capture_replay);
150 
151          if (group_info->anyHitShader != VK_SHADER_UNUSED_KHR)
152             groups[i].handle.any_hit_index =
153                handle_from_stages(device, &stages[group_info->anyHitShader], 1, capture_replay);
154 
155          break;
156       case VK_SHADER_GROUP_SHADER_MAX_ENUM_KHR:
157          unreachable("VK_SHADER_GROUP_SHADER_MAX_ENUM_KHR");
158       }
159 
160       if (group_info->pShaderGroupCaptureReplayHandle) {
161          const struct radv_rt_capture_replay_handle *handle = group_info->pShaderGroupCaptureReplayHandle;
162          if (memcmp(&handle->non_recursive_idx, &groups[i].handle.any_hit_index, sizeof(uint32_t)) != 0) {
163             return VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS;
164          }
165       }
166    }
167 
168    return VK_SUCCESS;
169 }
170 
171 static VkResult
radv_rt_fill_group_info(struct radv_device * device,const struct radv_ray_tracing_pipeline * pipeline,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,const struct radv_ray_tracing_stage * stages,struct radv_serialized_shader_arena_block * capture_replay_blocks,struct radv_ray_tracing_group * groups)172 radv_rt_fill_group_info(struct radv_device *device, const struct radv_ray_tracing_pipeline *pipeline,
173                         const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
174                         const struct radv_ray_tracing_stage *stages,
175                         struct radv_serialized_shader_arena_block *capture_replay_blocks,
176                         struct radv_ray_tracing_group *groups)
177 {
178    VkResult result = radv_create_group_handles(device, pipeline, pCreateInfo, stages, groups);
179 
180    uint32_t idx;
181    for (idx = 0; idx < pCreateInfo->groupCount; idx++) {
182       groups[idx].type = pCreateInfo->pGroups[idx].type;
183       if (groups[idx].type == VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR)
184          groups[idx].recursive_shader = pCreateInfo->pGroups[idx].generalShader;
185       else
186          groups[idx].recursive_shader = pCreateInfo->pGroups[idx].closestHitShader;
187       groups[idx].any_hit_shader = pCreateInfo->pGroups[idx].anyHitShader;
188       groups[idx].intersection_shader = pCreateInfo->pGroups[idx].intersectionShader;
189 
190       if (pCreateInfo->pGroups[idx].pShaderGroupCaptureReplayHandle) {
191          const struct radv_rt_capture_replay_handle *handle =
192             (const struct radv_rt_capture_replay_handle *)pCreateInfo->pGroups[idx].pShaderGroupCaptureReplayHandle;
193 
194          if (groups[idx].recursive_shader < pCreateInfo->stageCount) {
195             capture_replay_blocks[groups[idx].recursive_shader] = handle->recursive_shader_alloc;
196          } else if (groups[idx].recursive_shader != VK_SHADER_UNUSED_KHR) {
197             struct radv_shader *library_shader = stages[groups[idx].recursive_shader].shader;
198             simple_mtx_lock(&library_shader->replay_mtx);
199             if (!library_shader->has_replay_alloc) {
200                union radv_shader_arena_block *new_block =
201                   radv_replay_shader_arena_block(device, &handle->recursive_shader_alloc, library_shader);
202                if (!new_block) {
203                   result = VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS;
204                   goto reloc_out;
205                }
206 
207                radv_shader_wait_for_upload(device, library_shader->upload_seq);
208                radv_free_shader_memory(device, library_shader->alloc);
209 
210                library_shader->alloc = new_block;
211                library_shader->has_replay_alloc = true;
212 
213                library_shader->bo = library_shader->alloc->arena->bo;
214                library_shader->va = radv_buffer_get_va(library_shader->bo) + library_shader->alloc->offset;
215 
216                if (!radv_shader_reupload(device, library_shader)) {
217                   result = VK_ERROR_UNKNOWN;
218                   goto reloc_out;
219                }
220             }
221 
222          reloc_out:
223             simple_mtx_unlock(&library_shader->replay_mtx);
224             if (result != VK_SUCCESS)
225                return result;
226          }
227       }
228    }
229 
230    /* copy and adjust library groups (incl. handles) */
231    if (pCreateInfo->pLibraryInfo) {
232       unsigned stage_count = pCreateInfo->stageCount;
233       for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) {
234          RADV_FROM_HANDLE(radv_pipeline, pipeline_lib, pCreateInfo->pLibraryInfo->pLibraries[i]);
235          struct radv_ray_tracing_pipeline *library_pipeline = radv_pipeline_to_ray_tracing(pipeline_lib);
236 
237          for (unsigned j = 0; j < library_pipeline->group_count; ++j) {
238             struct radv_ray_tracing_group *dst = &groups[idx + j];
239             *dst = library_pipeline->groups[j];
240             if (dst->recursive_shader != VK_SHADER_UNUSED_KHR)
241                dst->recursive_shader += stage_count;
242             if (dst->any_hit_shader != VK_SHADER_UNUSED_KHR)
243                dst->any_hit_shader += stage_count;
244             if (dst->intersection_shader != VK_SHADER_UNUSED_KHR)
245                dst->intersection_shader += stage_count;
246             /* Don't set the shader VA since the handles are part of the pipeline hash */
247             dst->handle.recursive_shader_ptr = 0;
248          }
249          idx += library_pipeline->group_count;
250          stage_count += library_pipeline->stage_count;
251       }
252    }
253 
254    return result;
255 }
256 
257 static void
radv_rt_fill_stage_info(const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,struct radv_ray_tracing_stage * stages)258 radv_rt_fill_stage_info(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, struct radv_ray_tracing_stage *stages)
259 {
260    uint32_t idx;
261    for (idx = 0; idx < pCreateInfo->stageCount; idx++)
262       stages[idx].stage = vk_to_mesa_shader_stage(pCreateInfo->pStages[idx].stage);
263 
264    if (pCreateInfo->pLibraryInfo) {
265       for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) {
266          RADV_FROM_HANDLE(radv_pipeline, pipeline, pCreateInfo->pLibraryInfo->pLibraries[i]);
267          struct radv_ray_tracing_pipeline *library_pipeline = radv_pipeline_to_ray_tracing(pipeline);
268          for (unsigned j = 0; j < library_pipeline->stage_count; ++j) {
269             if (library_pipeline->stages[j].nir)
270                stages[idx].nir = vk_pipeline_cache_object_ref(library_pipeline->stages[j].nir);
271             if (library_pipeline->stages[j].shader)
272                stages[idx].shader = radv_shader_ref(library_pipeline->stages[j].shader);
273 
274             stages[idx].stage = library_pipeline->stages[j].stage;
275             stages[idx].stack_size = library_pipeline->stages[j].stack_size;
276             memcpy(stages[idx].sha1, library_pipeline->stages[j].sha1, SHA1_DIGEST_LENGTH);
277             idx++;
278          }
279       }
280    }
281 }
282 
283 static void
radv_init_rt_stage_hashes(struct radv_device * device,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,struct radv_ray_tracing_stage * stages,const struct radv_shader_stage_key * stage_keys)284 radv_init_rt_stage_hashes(struct radv_device *device, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
285                           struct radv_ray_tracing_stage *stages, const struct radv_shader_stage_key *stage_keys)
286 {
287    RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
288 
289    for (uint32_t idx = 0; idx < pCreateInfo->stageCount; idx++) {
290       gl_shader_stage s = vk_to_mesa_shader_stage(pCreateInfo->pStages[idx].stage);
291       struct radv_shader_stage stage;
292 
293       radv_pipeline_stage_init(&pCreateInfo->pStages[idx], pipeline_layout, &stage_keys[s], &stage);
294 
295       radv_hash_shaders(device, stages[idx].sha1, &stage, 1, NULL, NULL);
296    }
297 }
298 
299 static VkRayTracingPipelineCreateInfoKHR
radv_create_merged_rt_create_info(const VkRayTracingPipelineCreateInfoKHR * pCreateInfo)300 radv_create_merged_rt_create_info(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo)
301 {
302    VkRayTracingPipelineCreateInfoKHR local_create_info = *pCreateInfo;
303    uint32_t total_stages = pCreateInfo->stageCount;
304    uint32_t total_groups = pCreateInfo->groupCount;
305 
306    if (pCreateInfo->pLibraryInfo) {
307       for (unsigned i = 0; i < pCreateInfo->pLibraryInfo->libraryCount; ++i) {
308          RADV_FROM_HANDLE(radv_pipeline, pipeline, pCreateInfo->pLibraryInfo->pLibraries[i]);
309          struct radv_ray_tracing_pipeline *library_pipeline = radv_pipeline_to_ray_tracing(pipeline);
310 
311          total_stages += library_pipeline->stage_count;
312          total_groups += library_pipeline->group_count;
313       }
314    }
315    local_create_info.stageCount = total_stages;
316    local_create_info.groupCount = total_groups;
317 
318    return local_create_info;
319 }
320 
321 static bool
should_move_rt_instruction(nir_intrinsic_instr * instr)322 should_move_rt_instruction(nir_intrinsic_instr *instr)
323 {
324    switch (instr->intrinsic) {
325    case nir_intrinsic_load_hit_attrib_amd:
326       return nir_intrinsic_base(instr) < RADV_MAX_HIT_ATTRIB_DWORDS;
327    case nir_intrinsic_load_rt_arg_scratch_offset_amd:
328    case nir_intrinsic_load_ray_flags:
329    case nir_intrinsic_load_ray_object_origin:
330    case nir_intrinsic_load_ray_world_origin:
331    case nir_intrinsic_load_ray_t_min:
332    case nir_intrinsic_load_ray_object_direction:
333    case nir_intrinsic_load_ray_world_direction:
334    case nir_intrinsic_load_ray_t_max:
335       return true;
336    default:
337       return false;
338    }
339 }
340 
341 static void
move_rt_instructions(nir_shader * shader)342 move_rt_instructions(nir_shader *shader)
343 {
344    nir_cursor target = nir_before_impl(nir_shader_get_entrypoint(shader));
345 
346    nir_foreach_block (block, nir_shader_get_entrypoint(shader)) {
347       nir_foreach_instr_safe (instr, block) {
348          if (instr->type != nir_instr_type_intrinsic)
349             continue;
350 
351          nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
352 
353          if (!should_move_rt_instruction(intrinsic))
354             continue;
355 
356          nir_instr_move(target, instr);
357       }
358    }
359 
360    nir_metadata_preserve(nir_shader_get_entrypoint(shader), nir_metadata_all & (~nir_metadata_instr_index));
361 }
362 
363 static VkResult
radv_rt_nir_to_asm(struct radv_device * device,struct vk_pipeline_cache * cache,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,struct radv_ray_tracing_pipeline * pipeline,bool monolithic,struct radv_shader_stage * stage,uint32_t * stack_size,struct radv_serialized_shader_arena_block * replay_block,struct radv_shader ** out_shader)364 radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
365                    const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, struct radv_ray_tracing_pipeline *pipeline,
366                    bool monolithic, struct radv_shader_stage *stage, uint32_t *stack_size,
367                    struct radv_serialized_shader_arena_block *replay_block, struct radv_shader **out_shader)
368 {
369    struct radv_shader_binary *binary;
370    bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.base.create_flags);
371    bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pipeline->base.base.create_flags);
372 
373    radv_nir_lower_rt_io(stage->nir, monolithic, 0);
374 
375    /* Gather shader info. */
376    nir_shader_gather_info(stage->nir, nir_shader_get_entrypoint(stage->nir));
377    radv_nir_shader_info_init(stage->stage, MESA_SHADER_NONE, &stage->info);
378    radv_nir_shader_info_pass(device, stage->nir, &stage->layout, &stage->key, NULL, RADV_PIPELINE_RAY_TRACING, false,
379                              &stage->info);
380 
381    /* Declare shader arguments. */
382    radv_declare_shader_args(device, NULL, &stage->info, stage->stage, MESA_SHADER_NONE, &stage->args);
383 
384    stage->info.user_sgprs_locs = stage->args.user_sgprs_locs;
385    stage->info.inline_push_constant_mask = stage->args.ac.inline_push_const_mask;
386 
387    /* Move ray tracing system values to the top that are set by rt_trace_ray
388     * to prevent them from being overwritten by other rt_trace_ray calls.
389     */
390    NIR_PASS_V(stage->nir, move_rt_instructions);
391 
392    uint32_t num_resume_shaders = 0;
393    nir_shader **resume_shaders = NULL;
394 
395    if (stage->stage != MESA_SHADER_INTERSECTION && !monolithic) {
396       nir_builder b = nir_builder_at(nir_after_impl(nir_shader_get_entrypoint(stage->nir)));
397       nir_rt_return_amd(&b);
398 
399       const nir_lower_shader_calls_options opts = {
400          .address_format = nir_address_format_32bit_offset,
401          .stack_alignment = 16,
402          .localized_loads = true,
403          .vectorizer_callback = radv_mem_vectorize_callback,
404          .vectorizer_data = &device->physical_device->rad_info.gfx_level,
405       };
406       nir_lower_shader_calls(stage->nir, &opts, &resume_shaders, &num_resume_shaders, stage->nir);
407    }
408 
409    unsigned num_shaders = num_resume_shaders + 1;
410    nir_shader **shaders = ralloc_array(stage->nir, nir_shader *, num_shaders);
411    if (!shaders)
412       return VK_ERROR_OUT_OF_HOST_MEMORY;
413 
414    shaders[0] = stage->nir;
415    for (uint32_t i = 0; i < num_resume_shaders; i++)
416       shaders[i + 1] = resume_shaders[i];
417 
418    /* Postprocess shader parts. */
419    for (uint32_t i = 0; i < num_shaders; i++) {
420       struct radv_shader_stage temp_stage = *stage;
421       temp_stage.nir = shaders[i];
422       radv_nir_lower_rt_abi(temp_stage.nir, pCreateInfo, &temp_stage.args, &stage->info, stack_size, i > 0, device,
423                             pipeline, monolithic);
424 
425       /* Info might be out-of-date after inlining in radv_nir_lower_rt_abi(). */
426       nir_shader_gather_info(temp_stage.nir, nir_shader_get_entrypoint(temp_stage.nir));
427 
428       radv_optimize_nir(temp_stage.nir, stage->key.optimisations_disabled);
429       radv_postprocess_nir(device, NULL, &temp_stage);
430 
431       if (radv_can_dump_shader(device, temp_stage.nir, false))
432          nir_print_shader(temp_stage.nir, stderr);
433    }
434 
435    bool dump_shader = radv_can_dump_shader(device, shaders[0], false);
436    bool replayable =
437       pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY_BIT_KHR;
438 
439    /* Compile NIR shader to AMD assembly. */
440    binary =
441       radv_shader_nir_to_asm(device, stage, shaders, num_shaders, NULL, keep_executable_info, keep_statistic_info);
442    struct radv_shader *shader;
443    if (replay_block || replayable) {
444       VkResult result = radv_shader_create_uncached(device, binary, replayable, replay_block, &shader);
445       if (result != VK_SUCCESS) {
446          free(binary);
447          return result;
448       }
449    } else
450       shader = radv_shader_create(device, cache, binary, keep_executable_info || dump_shader);
451 
452    if (shader) {
453       radv_shader_generate_debug_info(device, dump_shader, keep_executable_info, binary, shader, shaders, num_shaders,
454                                       &stage->info);
455 
456       if (shader && keep_executable_info && stage->spirv.size) {
457          shader->spirv = malloc(stage->spirv.size);
458          memcpy(shader->spirv, stage->spirv.data, stage->spirv.size);
459          shader->spirv_size = stage->spirv.size;
460       }
461    }
462 
463    free(binary);
464 
465    *out_shader = shader;
466    return shader ? VK_SUCCESS : VK_ERROR_OUT_OF_HOST_MEMORY;
467 }
468 
469 static bool
radv_rt_can_inline_shader(nir_shader * nir)470 radv_rt_can_inline_shader(nir_shader *nir)
471 {
472    if (nir->info.stage == MESA_SHADER_RAYGEN || nir->info.stage == MESA_SHADER_ANY_HIT ||
473        nir->info.stage == MESA_SHADER_INTERSECTION)
474       return true;
475 
476    if (nir->info.stage == MESA_SHADER_CALLABLE)
477       return false;
478 
479    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
480    nir_foreach_block (block, impl) {
481       nir_foreach_instr (instr, block) {
482          if (instr->type != nir_instr_type_intrinsic)
483             continue;
484 
485          if (nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_trace_ray)
486             return false;
487       }
488    }
489 
490    return true;
491 }
492 
493 static inline bool
radv_ray_tracing_stage_is_always_inlined(struct radv_ray_tracing_stage * stage)494 radv_ray_tracing_stage_is_always_inlined(struct radv_ray_tracing_stage *stage)
495 {
496    return stage->stage == MESA_SHADER_ANY_HIT || stage->stage == MESA_SHADER_INTERSECTION;
497 }
498 
499 static VkResult
radv_rt_compile_shaders(struct radv_device * device,struct vk_pipeline_cache * cache,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,const VkPipelineCreationFeedbackCreateInfo * creation_feedback,const struct radv_shader_stage_key * stage_keys,struct radv_ray_tracing_pipeline * pipeline,struct radv_serialized_shader_arena_block * capture_replay_handles)500 radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *cache,
501                         const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
502                         const VkPipelineCreationFeedbackCreateInfo *creation_feedback,
503                         const struct radv_shader_stage_key *stage_keys, struct radv_ray_tracing_pipeline *pipeline,
504                         struct radv_serialized_shader_arena_block *capture_replay_handles)
505 {
506    RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
507 
508    if (pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_KHR)
509       return VK_PIPELINE_COMPILE_REQUIRED;
510    VkResult result = VK_SUCCESS;
511 
512    struct radv_ray_tracing_stage *rt_stages = pipeline->stages;
513 
514    struct radv_shader_stage *stages = calloc(pCreateInfo->stageCount, sizeof(struct radv_shader_stage));
515    if (!stages)
516       return VK_ERROR_OUT_OF_HOST_MEMORY;
517 
518    bool library = pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR;
519 
520    bool monolithic = !library;
521    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
522       if (rt_stages[i].shader || rt_stages[i].nir)
523          continue;
524 
525       int64_t stage_start = os_time_get_nano();
526 
527       struct radv_shader_stage *stage = &stages[i];
528       gl_shader_stage s = vk_to_mesa_shader_stage(pCreateInfo->pStages[i].stage);
529       radv_pipeline_stage_init(&pCreateInfo->pStages[i], pipeline_layout, &stage_keys[s], stage);
530 
531       /* precompile the shader */
532       stage->nir = radv_shader_spirv_to_nir(device, stage, NULL, false);
533 
534       NIR_PASS(_, stage->nir, radv_nir_lower_hit_attrib_derefs);
535 
536       rt_stages[i].can_inline = radv_rt_can_inline_shader(stage->nir);
537 
538       stage->feedback.duration = os_time_get_nano() - stage_start;
539    }
540 
541    bool has_callable = false;
542    /* TODO: Recompile recursive raygen shaders instead. */
543    bool raygen_imported = false;
544    for (uint32_t i = 0; i < pipeline->stage_count; i++) {
545       has_callable |= rt_stages[i].stage == MESA_SHADER_CALLABLE;
546       monolithic &= rt_stages[i].can_inline;
547 
548       if (i > pCreateInfo->stageCount)
549          raygen_imported |= rt_stages[i].stage == MESA_SHADER_RAYGEN;
550    }
551 
552    for (uint32_t idx = 0; idx < pCreateInfo->stageCount; idx++) {
553       if (rt_stages[idx].shader || rt_stages[idx].nir)
554          continue;
555 
556       int64_t stage_start = os_time_get_nano();
557 
558       struct radv_shader_stage *stage = &stages[idx];
559 
560       /* Cases in which we need to keep around the NIR:
561        *    - pipeline library: The final pipeline might be monolithic in which case it will need every NIR shader.
562        *                        If there is a callable shader, we can be sure that the final pipeline won't be
563        *                        monolithic.
564        *    - non-recursive:    Non-recursive shaders are inlined into the traversal shader.
565        *    - monolithic:       Callable shaders (chit/miss) are inlined into the raygen shader.
566        */
567       bool always_inlined = radv_ray_tracing_stage_is_always_inlined(&rt_stages[idx]);
568       bool nir_needed =
569          (library && !has_callable) || always_inlined || (monolithic && rt_stages[idx].stage != MESA_SHADER_RAYGEN);
570       nir_needed &= !rt_stages[idx].nir;
571       if (nir_needed) {
572          rt_stages[idx].stack_size = stage->nir->scratch_size;
573          rt_stages[idx].nir = radv_pipeline_cache_nir_to_handle(device, cache, stage->nir, rt_stages[idx].sha1,
574                                                                 !stage->key.optimisations_disabled);
575       }
576 
577       stage->feedback.duration += os_time_get_nano() - stage_start;
578    }
579 
580    for (uint32_t idx = 0; idx < pCreateInfo->stageCount; idx++) {
581       int64_t stage_start = os_time_get_nano();
582       struct radv_shader_stage *stage = &stages[idx];
583 
584       /* Cases in which we need to compile the shader (raygen/callable/chit/miss):
585        *    TODO: - monolithic: Extend the loop to cover imported stages and force compilation of imported raygen
586        *                        shaders since pipeline library shaders use separate compilation.
587        *    - separate:   Compile any recursive stage if wasn't compiled yet.
588        */
589       bool shader_needed = !radv_ray_tracing_stage_is_always_inlined(&rt_stages[idx]) && !rt_stages[idx].shader;
590       if (rt_stages[idx].stage == MESA_SHADER_CLOSEST_HIT || rt_stages[idx].stage == MESA_SHADER_MISS)
591          shader_needed &= !monolithic || raygen_imported;
592 
593       if (shader_needed) {
594          uint32_t stack_size = 0;
595          struct radv_serialized_shader_arena_block *replay_block =
596             capture_replay_handles[idx].arena_va ? &capture_replay_handles[idx] : NULL;
597 
598          bool monolithic_raygen = monolithic && stage->stage == MESA_SHADER_RAYGEN;
599 
600          result = radv_rt_nir_to_asm(device, cache, pCreateInfo, pipeline, monolithic_raygen, stage, &stack_size,
601                                      replay_block, &rt_stages[idx].shader);
602          if (result != VK_SUCCESS)
603             goto cleanup;
604 
605          assert(rt_stages[idx].stack_size <= stack_size);
606          rt_stages[idx].stack_size = stack_size;
607       }
608 
609       if (creation_feedback && creation_feedback->pipelineStageCreationFeedbackCount) {
610          assert(idx < creation_feedback->pipelineStageCreationFeedbackCount);
611          stage->feedback.duration += os_time_get_nano() - stage_start;
612          creation_feedback->pPipelineStageCreationFeedbacks[idx] = stage->feedback;
613       }
614    }
615 
616    /* Monolithic raygen shaders do not need a traversal shader. Skip compiling one if there are only monolithic raygen
617     * shaders.
618     */
619    bool traversal_needed = !library && (!monolithic || raygen_imported);
620    if (!traversal_needed)
621       return VK_SUCCESS;
622 
623    /* create traversal shader */
624    struct vk_shader_module traversal_module = {
625       .base.type = VK_OBJECT_TYPE_SHADER_MODULE,
626       .nir = radv_build_traversal_shader(device, pipeline, pCreateInfo),
627    };
628    const VkPipelineShaderStageCreateInfo pStage = {
629       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
630       .stage = VK_SHADER_STAGE_INTERSECTION_BIT_KHR,
631       .module = vk_shader_module_to_handle(&traversal_module),
632       .pName = "main",
633    };
634    struct radv_shader_stage traversal_stage = {
635       .stage = MESA_SHADER_INTERSECTION,
636       .nir = traversal_module.nir,
637       .key = stage_keys[MESA_SHADER_INTERSECTION],
638    };
639    vk_pipeline_hash_shader_stage(&pStage, NULL, traversal_stage.shader_sha1);
640    radv_shader_layout_init(pipeline_layout, MESA_SHADER_INTERSECTION, &traversal_stage.layout);
641    result = radv_rt_nir_to_asm(device, cache, pCreateInfo, pipeline, false, &traversal_stage, NULL, NULL,
642                                &pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]);
643    ralloc_free(traversal_module.nir);
644 
645 cleanup:
646    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
647       ralloc_free(stages[i].nir);
648    free(stages);
649    return result;
650 }
651 
652 static bool
radv_rt_pipeline_has_dynamic_stack_size(const VkRayTracingPipelineCreateInfoKHR * pCreateInfo)653 radv_rt_pipeline_has_dynamic_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo)
654 {
655    if (!pCreateInfo->pDynamicState)
656       return false;
657 
658    for (unsigned i = 0; i < pCreateInfo->pDynamicState->dynamicStateCount; ++i) {
659       if (pCreateInfo->pDynamicState->pDynamicStates[i] == VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR)
660          return true;
661    }
662 
663    return false;
664 }
665 
666 static void
compute_rt_stack_size(const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,struct radv_ray_tracing_pipeline * pipeline)667 compute_rt_stack_size(const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, struct radv_ray_tracing_pipeline *pipeline)
668 {
669    if (radv_rt_pipeline_has_dynamic_stack_size(pCreateInfo)) {
670       pipeline->stack_size = -1u;
671       return;
672    }
673 
674    unsigned raygen_size = 0;
675    unsigned callable_size = 0;
676    unsigned chit_miss_size = 0;
677    unsigned intersection_size = 0;
678    unsigned any_hit_size = 0;
679 
680    for (unsigned i = 0; i < pipeline->stage_count; ++i) {
681       uint32_t size = pipeline->stages[i].stack_size;
682       switch (pipeline->stages[i].stage) {
683       case MESA_SHADER_RAYGEN:
684          raygen_size = MAX2(raygen_size, size);
685          break;
686       case MESA_SHADER_CLOSEST_HIT:
687       case MESA_SHADER_MISS:
688          chit_miss_size = MAX2(chit_miss_size, size);
689          break;
690       case MESA_SHADER_CALLABLE:
691          callable_size = MAX2(callable_size, size);
692          break;
693       case MESA_SHADER_INTERSECTION:
694          intersection_size = MAX2(intersection_size, size);
695          break;
696       case MESA_SHADER_ANY_HIT:
697          any_hit_size = MAX2(any_hit_size, size);
698          break;
699       default:
700          unreachable("Invalid stage type in RT shader");
701       }
702    }
703    pipeline->stack_size =
704       raygen_size +
705       MIN2(pCreateInfo->maxPipelineRayRecursionDepth, 1) * MAX2(chit_miss_size, intersection_size + any_hit_size) +
706       MAX2(0, (int)(pCreateInfo->maxPipelineRayRecursionDepth) - 1) * chit_miss_size + 2 * callable_size;
707 }
708 
709 static void
combine_config(struct ac_shader_config * config,struct ac_shader_config * other)710 combine_config(struct ac_shader_config *config, struct ac_shader_config *other)
711 {
712    config->num_sgprs = MAX2(config->num_sgprs, other->num_sgprs);
713    config->num_vgprs = MAX2(config->num_vgprs, other->num_vgprs);
714    config->num_shared_vgprs = MAX2(config->num_shared_vgprs, other->num_shared_vgprs);
715    config->spilled_sgprs = MAX2(config->spilled_sgprs, other->spilled_sgprs);
716    config->spilled_vgprs = MAX2(config->spilled_vgprs, other->spilled_vgprs);
717    config->lds_size = MAX2(config->lds_size, other->lds_size);
718    config->scratch_bytes_per_wave = MAX2(config->scratch_bytes_per_wave, other->scratch_bytes_per_wave);
719 
720    assert(config->float_mode == other->float_mode);
721 }
722 
723 static void
postprocess_rt_config(struct ac_shader_config * config,enum amd_gfx_level gfx_level,unsigned wave_size)724 postprocess_rt_config(struct ac_shader_config *config, enum amd_gfx_level gfx_level, unsigned wave_size)
725 {
726    config->rsrc1 =
727       (config->rsrc1 & C_00B848_VGPRS) | S_00B848_VGPRS((config->num_vgprs - 1) / (wave_size == 32 ? 8 : 4));
728    if (gfx_level < GFX10)
729       config->rsrc1 = (config->rsrc1 & C_00B848_SGPRS) | S_00B848_SGPRS((config->num_sgprs - 1) / 8);
730 
731    config->rsrc2 = (config->rsrc2 & C_00B84C_LDS_SIZE) | S_00B84C_LDS_SIZE(config->lds_size);
732    config->rsrc3 = (config->rsrc3 & C_00B8A0_SHARED_VGPR_CNT) | S_00B8A0_SHARED_VGPR_CNT(config->num_shared_vgprs / 8);
733 }
734 
735 static void
compile_rt_prolog(struct radv_device * device,struct radv_ray_tracing_pipeline * pipeline)736 compile_rt_prolog(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline)
737 {
738    pipeline->prolog = radv_create_rt_prolog(device);
739 
740    /* create combined config */
741    struct ac_shader_config *config = &pipeline->prolog->config;
742    for (unsigned i = 0; i < pipeline->stage_count; i++)
743       if (pipeline->stages[i].shader)
744          combine_config(config, &pipeline->stages[i].shader->config);
745 
746    if (pipeline->base.base.shaders[MESA_SHADER_INTERSECTION])
747       combine_config(config, &pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]->config);
748 
749    postprocess_rt_config(config, device->physical_device->rad_info.gfx_level, device->physical_device->rt_wave_size);
750 
751    pipeline->prolog->max_waves = radv_get_max_waves(device, config, &pipeline->prolog->info);
752 }
753 
754 static VkResult
radv_rt_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)755 radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
756                         const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
757 {
758    RADV_FROM_HANDLE(radv_device, device, _device);
759    VK_FROM_HANDLE(vk_pipeline_cache, cache, _cache);
760    RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
761    VkResult result;
762    const VkPipelineCreationFeedbackCreateInfo *creation_feedback =
763       vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
764    if (creation_feedback)
765       creation_feedback->pPipelineCreationFeedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
766 
767    int64_t pipeline_start = os_time_get_nano();
768 
769    VkRayTracingPipelineCreateInfoKHR local_create_info = radv_create_merged_rt_create_info(pCreateInfo);
770 
771    VK_MULTIALLOC(ma);
772    VK_MULTIALLOC_DECL(&ma, struct radv_ray_tracing_pipeline, pipeline, 1);
773    VK_MULTIALLOC_DECL(&ma, struct radv_ray_tracing_stage, stages, local_create_info.stageCount);
774    VK_MULTIALLOC_DECL(&ma, struct radv_ray_tracing_group, groups, local_create_info.groupCount);
775    VK_MULTIALLOC_DECL(&ma, struct radv_serialized_shader_arena_block, capture_replay_blocks, pCreateInfo->stageCount);
776    if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
777       return VK_ERROR_OUT_OF_HOST_MEMORY;
778 
779    radv_pipeline_init(device, &pipeline->base.base, RADV_PIPELINE_RAY_TRACING);
780    pipeline->base.base.create_flags = vk_rt_pipeline_create_flags(pCreateInfo);
781    pipeline->stage_count = local_create_info.stageCount;
782    pipeline->non_imported_stage_count = pCreateInfo->stageCount;
783    pipeline->group_count = local_create_info.groupCount;
784    pipeline->stages = stages;
785    pipeline->groups = groups;
786 
787    radv_rt_fill_stage_info(pCreateInfo, stages);
788 
789    struct radv_shader_stage_key stage_keys[MESA_VULKAN_SHADER_STAGES] = {0};
790 
791    radv_generate_rt_shaders_key(device, pipeline, pCreateInfo, stage_keys);
792 
793    /* cache robustness state for making merged shaders */
794    if (stage_keys[MESA_SHADER_INTERSECTION].storage_robustness2)
795       pipeline->traversal_storage_robustness2 = true;
796 
797    if (stage_keys[MESA_SHADER_INTERSECTION].uniform_robustness2)
798       pipeline->traversal_uniform_robustness2 = true;
799 
800    radv_init_rt_stage_hashes(device, pCreateInfo, stages, stage_keys);
801    result = radv_rt_fill_group_info(device, pipeline, pCreateInfo, stages, capture_replay_blocks, pipeline->groups);
802    if (result != VK_SUCCESS)
803       goto fail;
804 
805    bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.base.create_flags);
806    bool emit_ray_history = !!device->rra_trace.ray_history_buffer;
807 
808    radv_hash_rt_shaders(device, pipeline->sha1, stages, pCreateInfo, pipeline->groups);
809    pipeline->base.base.pipeline_hash = *(uint64_t *)pipeline->sha1;
810 
811    bool cache_hit = false;
812    if (!keep_executable_info && !emit_ray_history)
813       cache_hit = radv_ray_tracing_pipeline_cache_search(device, cache, pipeline, pCreateInfo);
814 
815    if (!cache_hit) {
816       result = radv_rt_compile_shaders(device, cache, pCreateInfo, creation_feedback, stage_keys, pipeline,
817                                        capture_replay_blocks);
818 
819       if (result != VK_SUCCESS)
820          goto fail;
821    }
822 
823    if (!(pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)) {
824       compute_rt_stack_size(pCreateInfo, pipeline);
825       compile_rt_prolog(device, pipeline);
826 
827       radv_compute_pipeline_init(device, &pipeline->base, pipeline_layout, pipeline->prolog);
828    }
829 
830    radv_rmv_log_rt_pipeline_create(device, pipeline);
831 
832    if (!cache_hit && !emit_ray_history)
833       radv_ray_tracing_pipeline_cache_insert(device, cache, pipeline, pCreateInfo->stageCount, pipeline->sha1);
834 
835    /* write shader VAs into group handles */
836    for (unsigned i = 0; i < pipeline->group_count; i++) {
837       if (pipeline->groups[i].recursive_shader != VK_SHADER_UNUSED_KHR) {
838          struct radv_shader *shader = pipeline->stages[pipeline->groups[i].recursive_shader].shader;
839          if (shader)
840             pipeline->groups[i].handle.recursive_shader_ptr = shader->va | radv_get_rt_priority(shader->info.stage);
841       }
842    }
843 
844 fail:
845    if (creation_feedback)
846       creation_feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - pipeline_start;
847 
848    if (result == VK_SUCCESS)
849       *pPipeline = radv_pipeline_to_handle(&pipeline->base.base);
850    else
851       radv_pipeline_destroy(device, &pipeline->base.base, pAllocator);
852    return result;
853 }
854 
855 void
radv_destroy_ray_tracing_pipeline(struct radv_device * device,struct radv_ray_tracing_pipeline * pipeline)856 radv_destroy_ray_tracing_pipeline(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline)
857 {
858    for (unsigned i = 0; i < pipeline->stage_count; i++) {
859       if (pipeline->stages[i].nir)
860          vk_pipeline_cache_object_unref(&device->vk, pipeline->stages[i].nir);
861       if (pipeline->stages[i].shader)
862          radv_shader_unref(device, pipeline->stages[i].shader);
863    }
864 
865    if (pipeline->prolog)
866       radv_shader_unref(device, pipeline->prolog);
867    if (pipeline->base.base.shaders[MESA_SHADER_INTERSECTION])
868       radv_shader_unref(device, pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]);
869 }
870 
871 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateRayTracingPipelinesKHR(VkDevice _device,VkDeferredOperationKHR deferredOperation,VkPipelineCache pipelineCache,uint32_t count,const VkRayTracingPipelineCreateInfoKHR * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)872 radv_CreateRayTracingPipelinesKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
873                                   VkPipelineCache pipelineCache, uint32_t count,
874                                   const VkRayTracingPipelineCreateInfoKHR *pCreateInfos,
875                                   const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
876 {
877    VkResult result = VK_SUCCESS;
878 
879    unsigned i = 0;
880    for (; i < count; i++) {
881       VkResult r;
882       r = radv_rt_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, &pPipelines[i]);
883       if (r != VK_SUCCESS) {
884          result = r;
885          pPipelines[i] = VK_NULL_HANDLE;
886 
887          const VkPipelineCreateFlagBits2KHR create_flags = vk_rt_pipeline_create_flags(&pCreateInfos[i]);
888          if (create_flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT_KHR)
889             break;
890       }
891    }
892 
893    for (; i < count; ++i)
894       pPipelines[i] = VK_NULL_HANDLE;
895 
896    if (result != VK_SUCCESS)
897       return result;
898 
899    /* Work around Portal RTX not handling VK_OPERATION_NOT_DEFERRED_KHR correctly. */
900    if (deferredOperation != VK_NULL_HANDLE)
901       return VK_OPERATION_DEFERRED_KHR;
902 
903    return result;
904 }
905 
906 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetRayTracingShaderGroupHandlesKHR(VkDevice device,VkPipeline _pipeline,uint32_t firstGroup,uint32_t groupCount,size_t dataSize,void * pData)907 radv_GetRayTracingShaderGroupHandlesKHR(VkDevice device, VkPipeline _pipeline, uint32_t firstGroup, uint32_t groupCount,
908                                         size_t dataSize, void *pData)
909 {
910    RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
911    struct radv_ray_tracing_group *groups = radv_pipeline_to_ray_tracing(pipeline)->groups;
912    char *data = pData;
913 
914    STATIC_ASSERT(sizeof(struct radv_pipeline_group_handle) <= RADV_RT_HANDLE_SIZE);
915 
916    memset(data, 0, groupCount * RADV_RT_HANDLE_SIZE);
917 
918    for (uint32_t i = 0; i < groupCount; ++i) {
919       memcpy(data + i * RADV_RT_HANDLE_SIZE, &groups[firstGroup + i].handle, sizeof(struct radv_pipeline_group_handle));
920    }
921 
922    return VK_SUCCESS;
923 }
924 
925 VKAPI_ATTR VkDeviceSize VKAPI_CALL
radv_GetRayTracingShaderGroupStackSizeKHR(VkDevice device,VkPipeline _pipeline,uint32_t group,VkShaderGroupShaderKHR groupShader)926 radv_GetRayTracingShaderGroupStackSizeKHR(VkDevice device, VkPipeline _pipeline, uint32_t group,
927                                           VkShaderGroupShaderKHR groupShader)
928 {
929    RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
930    struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
931    struct radv_ray_tracing_group *rt_group = &rt_pipeline->groups[group];
932    switch (groupShader) {
933    case VK_SHADER_GROUP_SHADER_GENERAL_KHR:
934    case VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR:
935       return rt_pipeline->stages[rt_group->recursive_shader].stack_size;
936    case VK_SHADER_GROUP_SHADER_ANY_HIT_KHR:
937       return rt_pipeline->stages[rt_group->any_hit_shader].stack_size;
938    case VK_SHADER_GROUP_SHADER_INTERSECTION_KHR:
939       return rt_pipeline->stages[rt_group->intersection_shader].stack_size;
940    default:
941       return 0;
942    }
943 }
944 
945 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(VkDevice device,VkPipeline _pipeline,uint32_t firstGroup,uint32_t groupCount,size_t dataSize,void * pData)946 radv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(VkDevice device, VkPipeline _pipeline, uint32_t firstGroup,
947                                                      uint32_t groupCount, size_t dataSize, void *pData)
948 {
949    RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
950    struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
951    struct radv_rt_capture_replay_handle *data = pData;
952 
953    memset(data, 0, groupCount * sizeof(struct radv_rt_capture_replay_handle));
954 
955    for (uint32_t i = 0; i < groupCount; ++i) {
956       uint32_t recursive_shader = rt_pipeline->groups[firstGroup + i].recursive_shader;
957       if (recursive_shader != VK_SHADER_UNUSED_KHR) {
958          struct radv_shader *shader = rt_pipeline->stages[recursive_shader].shader;
959          data[i].recursive_shader_alloc = radv_serialize_shader_arena_block(shader->alloc);
960       }
961       data[i].non_recursive_idx = rt_pipeline->groups[firstGroup + i].handle.any_hit_index;
962    }
963 
964    return VK_SUCCESS;
965 }
966