• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * SPDX-License-Identifier: MIT
9  */
10 
11 #include "meta/radv_meta.h"
12 #include "nir/nir.h"
13 #include "nir/nir_builder.h"
14 #include "nir/nir_serialize.h"
15 #include "nir/radv_nir.h"
16 #include "spirv/nir_spirv.h"
17 #include "util/disk_cache.h"
18 #include "util/mesa-sha1.h"
19 #include "util/os_time.h"
20 #include "util/u_atomic.h"
21 #include "radv_cs.h"
22 #include "radv_debug.h"
23 #include "radv_pipeline_binary.h"
24 #include "radv_pipeline_cache.h"
25 #include "radv_rmv.h"
26 #include "radv_shader.h"
27 #include "radv_shader_args.h"
28 #include "vk_nir_convert_ycbcr.h"
29 #include "vk_pipeline.h"
30 #include "vk_render_pass.h"
31 #include "vk_util.h"
32 
33 #include "util/u_debug.h"
34 #include "ac_binary.h"
35 #include "ac_nir.h"
36 #include "ac_shader_util.h"
37 #include "aco_interface.h"
38 #include "sid.h"
39 #include "vk_format.h"
40 
41 uint32_t
radv_get_compute_resource_limits(const struct radv_physical_device * pdev,const struct radv_shader_info * info)42 radv_get_compute_resource_limits(const struct radv_physical_device *pdev, const struct radv_shader_info *info)
43 {
44    unsigned threads_per_threadgroup;
45    unsigned threadgroups_per_cu = 1;
46    unsigned waves_per_threadgroup;
47    unsigned max_waves_per_sh = 0;
48 
49    /* Calculate best compute resource limits. */
50    threads_per_threadgroup = info->cs.block_size[0] * info->cs.block_size[1] * info->cs.block_size[2];
51    waves_per_threadgroup = DIV_ROUND_UP(threads_per_threadgroup, info->wave_size);
52 
53    if (pdev->info.gfx_level >= GFX10 && waves_per_threadgroup == 1)
54       threadgroups_per_cu = 2;
55 
56    return ac_get_compute_resource_limits(&pdev->info, waves_per_threadgroup, max_waves_per_sh, threadgroups_per_cu);
57 }
58 
59 void
radv_get_compute_shader_metadata(const struct radv_device * device,const struct radv_shader * cs,struct radv_compute_pipeline_metadata * metadata)60 radv_get_compute_shader_metadata(const struct radv_device *device, const struct radv_shader *cs,
61                                  struct radv_compute_pipeline_metadata *metadata)
62 {
63    uint32_t upload_sgpr = 0, inline_sgpr = 0;
64 
65    memset(metadata, 0, sizeof(*metadata));
66 
67    metadata->wave32 = cs->info.wave_size == 32;
68 
69    metadata->grid_base_sgpr = radv_get_user_sgpr(cs, AC_UD_CS_GRID_SIZE);
70 
71    upload_sgpr = radv_get_user_sgpr(cs, AC_UD_PUSH_CONSTANTS);
72    inline_sgpr = radv_get_user_sgpr(cs, AC_UD_INLINE_PUSH_CONSTANTS);
73 
74    metadata->push_const_sgpr = upload_sgpr | (inline_sgpr << 16);
75    metadata->inline_push_const_mask = cs->info.inline_push_constant_mask;
76 
77    metadata->indirect_desc_sets_sgpr = radv_get_user_sgpr(cs, AC_UD_INDIRECT_DESCRIPTOR_SETS);
78 }
79 
80 void
radv_compute_pipeline_init(struct radv_compute_pipeline * pipeline,const struct radv_pipeline_layout * layout,struct radv_shader * shader)81 radv_compute_pipeline_init(struct radv_compute_pipeline *pipeline, const struct radv_pipeline_layout *layout,
82                            struct radv_shader *shader)
83 {
84    pipeline->base.need_indirect_descriptor_sets |= radv_shader_need_indirect_descriptor_sets(shader);
85 
86    pipeline->base.push_constant_size = layout->push_constant_size;
87    pipeline->base.dynamic_offset_count = layout->dynamic_offset_count;
88 }
89 
90 struct radv_shader *
radv_compile_cs(struct radv_device * device,struct vk_pipeline_cache * cache,struct radv_shader_stage * cs_stage,bool keep_executable_info,bool keep_statistic_info,bool is_internal,bool skip_shaders_cache,struct radv_shader_binary ** cs_binary)91 radv_compile_cs(struct radv_device *device, struct vk_pipeline_cache *cache, struct radv_shader_stage *cs_stage,
92                 bool keep_executable_info, bool keep_statistic_info, bool is_internal, bool skip_shaders_cache,
93                 struct radv_shader_binary **cs_binary)
94 {
95    struct radv_physical_device *pdev = radv_device_physical(device);
96    struct radv_instance *instance = radv_physical_device_instance(pdev);
97 
98    struct radv_shader *cs_shader;
99 
100    /* Compile SPIR-V shader to NIR. */
101    cs_stage->nir = radv_shader_spirv_to_nir(device, cs_stage, NULL, is_internal);
102 
103    radv_optimize_nir(cs_stage->nir, cs_stage->key.optimisations_disabled);
104 
105    /* Gather info again, information such as outputs_read can be out-of-date. */
106    nir_shader_gather_info(cs_stage->nir, nir_shader_get_entrypoint(cs_stage->nir));
107 
108    /* Run the shader info pass. */
109    radv_nir_shader_info_init(cs_stage->stage, MESA_SHADER_NONE, &cs_stage->info);
110    radv_nir_shader_info_pass(device, cs_stage->nir, &cs_stage->layout, &cs_stage->key, NULL, RADV_PIPELINE_COMPUTE,
111                              false, &cs_stage->info);
112 
113    radv_declare_shader_args(device, NULL, &cs_stage->info, MESA_SHADER_COMPUTE, MESA_SHADER_NONE, &cs_stage->args);
114 
115    cs_stage->info.user_sgprs_locs = cs_stage->args.user_sgprs_locs;
116    cs_stage->info.inline_push_constant_mask = cs_stage->args.ac.inline_push_const_mask;
117 
118    /* Postprocess NIR. */
119    radv_postprocess_nir(device, NULL, cs_stage);
120 
121    bool dump_shader = radv_can_dump_shader(device, cs_stage->nir);
122    bool dump_nir = dump_shader && (instance->debug_flags & RADV_DEBUG_DUMP_NIR);
123 
124    if (dump_shader) {
125       simple_mtx_lock(&instance->shader_dump_mtx);
126 
127       if (dump_nir) {
128          nir_print_shader(cs_stage->nir, stderr);
129       }
130    }
131 
132    char *nir_string = NULL;
133    if (keep_executable_info || dump_shader)
134       nir_string = radv_dump_nir_shaders(instance, &cs_stage->nir, 1);
135 
136    /* Compile NIR shader to AMD assembly. */
137    *cs_binary =
138       radv_shader_nir_to_asm(device, cs_stage, &cs_stage->nir, 1, NULL, keep_executable_info, keep_statistic_info);
139 
140    cs_shader = radv_shader_create(device, cache, *cs_binary, skip_shaders_cache || dump_shader);
141 
142    cs_shader->nir_string = nir_string;
143 
144    radv_shader_dump_debug_info(device, dump_shader, *cs_binary, cs_shader, &cs_stage->nir, 1, &cs_stage->info);
145 
146    if (dump_shader)
147       simple_mtx_unlock(&instance->shader_dump_mtx);
148 
149    if (keep_executable_info && cs_stage->spirv.size) {
150       cs_shader->spirv = malloc(cs_stage->spirv.size);
151       memcpy(cs_shader->spirv, cs_stage->spirv.data, cs_stage->spirv.size);
152       cs_shader->spirv_size = cs_stage->spirv.size;
153    }
154 
155    return cs_shader;
156 }
157 
158 void
radv_compute_pipeline_hash(const struct radv_device * device,const VkComputePipelineCreateInfo * pCreateInfo,unsigned char * hash)159 radv_compute_pipeline_hash(const struct radv_device *device, const VkComputePipelineCreateInfo *pCreateInfo,
160                            unsigned char *hash)
161 {
162    VkPipelineCreateFlags2 create_flags = vk_compute_pipeline_create_flags(pCreateInfo);
163    VK_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
164    const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->stage;
165    struct mesa_sha1 ctx;
166 
167    struct radv_shader_stage_key stage_key =
168       radv_pipeline_get_shader_key(device, sinfo, create_flags, pCreateInfo->pNext);
169 
170    _mesa_sha1_init(&ctx);
171    radv_pipeline_hash(device, pipeline_layout, &ctx);
172    radv_pipeline_hash_shader_stage(create_flags, sinfo, &stage_key, &ctx);
173    _mesa_sha1_final(&ctx, hash);
174 }
175 
176 static VkResult
radv_compute_pipeline_compile(const VkComputePipelineCreateInfo * pCreateInfo,struct radv_compute_pipeline * pipeline,struct radv_pipeline_layout * pipeline_layout,struct radv_device * device,struct vk_pipeline_cache * cache,const VkPipelineShaderStageCreateInfo * pStage,const VkPipelineCreationFeedbackCreateInfo * creation_feedback)177 radv_compute_pipeline_compile(const VkComputePipelineCreateInfo *pCreateInfo, struct radv_compute_pipeline *pipeline,
178                               struct radv_pipeline_layout *pipeline_layout, struct radv_device *device,
179                               struct vk_pipeline_cache *cache, const VkPipelineShaderStageCreateInfo *pStage,
180                               const VkPipelineCreationFeedbackCreateInfo *creation_feedback)
181 {
182    struct radv_shader_binary *cs_binary = NULL;
183    bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.create_flags);
184    bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pipeline->base.create_flags);
185    const bool skip_shaders_cache = radv_pipeline_skip_shaders_cache(device, &pipeline->base);
186    struct radv_shader_stage cs_stage = {0};
187    VkPipelineCreationFeedback pipeline_feedback = {
188       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT,
189    };
190    VkResult result = VK_SUCCESS;
191 
192    int64_t pipeline_start = os_time_get_nano();
193 
194    radv_compute_pipeline_hash(device, pCreateInfo, pipeline->base.sha1);
195 
196    pipeline->base.pipeline_hash = *(uint64_t *)pipeline->base.sha1;
197 
198    bool found_in_application_cache = true;
199    if (!skip_shaders_cache &&
200        radv_compute_pipeline_cache_search(device, cache, pipeline, &found_in_application_cache)) {
201       if (found_in_application_cache)
202          pipeline_feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT;
203       result = VK_SUCCESS;
204       goto done;
205    }
206 
207    if (pipeline->base.create_flags & VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT)
208       return VK_PIPELINE_COMPILE_REQUIRED;
209 
210    int64_t stage_start = os_time_get_nano();
211 
212    const struct radv_shader_stage_key stage_key =
213       radv_pipeline_get_shader_key(device, &pCreateInfo->stage, pipeline->base.create_flags, pCreateInfo->pNext);
214 
215    radv_pipeline_stage_init(pipeline->base.create_flags, pStage, pipeline_layout, &stage_key, &cs_stage);
216 
217    pipeline->base.shaders[MESA_SHADER_COMPUTE] =
218       radv_compile_cs(device, cache, &cs_stage, keep_executable_info, keep_statistic_info, pipeline->base.is_internal,
219                       skip_shaders_cache, &cs_binary);
220 
221    cs_stage.feedback.duration += os_time_get_nano() - stage_start;
222 
223    if (!skip_shaders_cache) {
224       radv_pipeline_cache_insert(device, cache, &pipeline->base);
225    }
226 
227    free(cs_binary);
228    if (radv_can_dump_shader_stats(device, cs_stage.nir)) {
229       radv_dump_shader_stats(device, &pipeline->base, pipeline->base.shaders[MESA_SHADER_COMPUTE], MESA_SHADER_COMPUTE,
230                              stderr);
231    }
232    ralloc_free(cs_stage.nir);
233 
234 done:
235    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
236 
237    if (creation_feedback) {
238       *creation_feedback->pPipelineCreationFeedback = pipeline_feedback;
239 
240       if (creation_feedback->pipelineStageCreationFeedbackCount) {
241          assert(creation_feedback->pipelineStageCreationFeedbackCount == 1);
242          creation_feedback->pPipelineStageCreationFeedbacks[0] = cs_stage.feedback;
243       }
244    }
245 
246    return result;
247 }
248 
249 static VkResult
radv_compute_pipeline_import_binary(struct radv_device * device,struct radv_compute_pipeline * pipeline,const VkPipelineBinaryInfoKHR * binary_info)250 radv_compute_pipeline_import_binary(struct radv_device *device, struct radv_compute_pipeline *pipeline,
251                                     const VkPipelineBinaryInfoKHR *binary_info)
252 {
253    VK_FROM_HANDLE(radv_pipeline_binary, pipeline_binary, binary_info->pPipelineBinaries[0]);
254    struct radv_shader *shader;
255    struct blob_reader blob;
256 
257    assert(binary_info->binaryCount == 1);
258 
259    blob_reader_init(&blob, pipeline_binary->data, pipeline_binary->size);
260 
261    shader = radv_shader_deserialize(device, pipeline_binary->key, sizeof(pipeline_binary->key), &blob);
262    if (!shader)
263       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
264 
265    pipeline->base.shaders[MESA_SHADER_COMPUTE] = shader;
266 
267    pipeline->base.pipeline_hash = *(uint64_t *)pipeline_binary->key;
268 
269    return VK_SUCCESS;
270 }
271 
272 VkResult
radv_compute_pipeline_create(VkDevice _device,VkPipelineCache _cache,const VkComputePipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipeline)273 radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkComputePipelineCreateInfo *pCreateInfo,
274                              const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
275 {
276    VK_FROM_HANDLE(radv_device, device, _device);
277    VK_FROM_HANDLE(vk_pipeline_cache, cache, _cache);
278    VK_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout);
279    struct radv_compute_pipeline *pipeline;
280    VkResult result;
281 
282    pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
283    if (pipeline == NULL) {
284       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
285    }
286 
287    radv_pipeline_init(device, &pipeline->base, RADV_PIPELINE_COMPUTE);
288    pipeline->base.create_flags = vk_compute_pipeline_create_flags(pCreateInfo);
289    pipeline->base.is_internal = _cache == device->meta_state.cache;
290 
291    const VkPipelineCreationFeedbackCreateInfo *creation_feedback =
292       vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
293 
294    const VkPipelineBinaryInfoKHR *binary_info = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_BINARY_INFO_KHR);
295 
296    if (binary_info && binary_info->binaryCount > 0) {
297       result = radv_compute_pipeline_import_binary(device, pipeline, binary_info);
298    } else {
299       result = radv_compute_pipeline_compile(pCreateInfo, pipeline, pipeline_layout, device, cache, &pCreateInfo->stage,
300                                              creation_feedback);
301    }
302 
303    if (result != VK_SUCCESS) {
304       radv_pipeline_destroy(device, &pipeline->base, pAllocator);
305       return result;
306    }
307 
308    radv_compute_pipeline_init(pipeline, pipeline_layout, pipeline->base.shaders[MESA_SHADER_COMPUTE]);
309 
310    *pPipeline = radv_pipeline_to_handle(&pipeline->base);
311    radv_rmv_log_compute_pipeline_create(device, &pipeline->base, pipeline->base.is_internal);
312    return VK_SUCCESS;
313 }
314 
315 static VkResult
radv_create_compute_pipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)316 radv_create_compute_pipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
317                               const VkComputePipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator,
318                               VkPipeline *pPipelines)
319 {
320    VkResult result = VK_SUCCESS;
321 
322    unsigned i = 0;
323    for (; i < count; i++) {
324       VkResult r;
325       r = radv_compute_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, &pPipelines[i]);
326       if (r != VK_SUCCESS) {
327          result = r;
328          pPipelines[i] = VK_NULL_HANDLE;
329 
330          VkPipelineCreateFlagBits2 create_flags = vk_compute_pipeline_create_flags(&pCreateInfos[i]);
331          if (create_flags & VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT)
332             break;
333       }
334    }
335 
336    for (; i < count; ++i)
337       pPipelines[i] = VK_NULL_HANDLE;
338 
339    return result;
340 }
341 
342 void
radv_destroy_compute_pipeline(struct radv_device * device,struct radv_compute_pipeline * pipeline)343 radv_destroy_compute_pipeline(struct radv_device *device, struct radv_compute_pipeline *pipeline)
344 {
345    if (pipeline->base.shaders[MESA_SHADER_COMPUTE])
346       radv_shader_unref(device, pipeline->base.shaders[MESA_SHADER_COMPUTE]);
347 }
348 
349 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateComputePipelines(VkDevice _device,VkPipelineCache pipelineCache,uint32_t count,const VkComputePipelineCreateInfo * pCreateInfos,const VkAllocationCallbacks * pAllocator,VkPipeline * pPipelines)350 radv_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
351                             const VkComputePipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator,
352                             VkPipeline *pPipelines)
353 {
354    return radv_create_compute_pipelines(_device, pipelineCache, count, pCreateInfos, pAllocator, pPipelines);
355 }
356